1 // Copyright 2015, VIXL authors 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #include <sys/mman.h> 28 29 #include <cfloat> 30 #include <cmath> 31 #include <cstdio> 32 #include <cstdlib> 33 #include <cstring> 34 35 #include "test-runner.h" 36 #include "test-utils.h" 37 #include "aarch64/test-utils-aarch64.h" 38 39 #include "aarch64/cpu-aarch64.h" 40 #include "aarch64/debugger-aarch64.h" 41 #include "aarch64/disasm-aarch64.h" 42 #include "aarch64/macro-assembler-aarch64.h" 43 #include "aarch64/simulator-aarch64.h" 44 45 namespace vixl { 46 namespace aarch64 { 47 48 // Test infrastructure. 49 // 50 // Tests are functions which accept no parameters and have no return values. 51 // The testing code should not perform an explicit return once completed. For 52 // example to test the mov immediate instruction a very simple test would be: 53 // 54 // TEST(mov_x0_one) { 55 // SETUP(); 56 // 57 // START(); 58 // __ mov(x0, Operand(1)); 59 // END(); 60 // 61 // RUN(); 62 // 63 // ASSERT_EQUAL_64(1, x0); 64 // 65 // TEARDOWN(); 66 // } 67 // 68 // Within a START ... END block all registers but sp can be modified. sp has to 69 // be explicitly saved/restored. The END() macro replaces the function return 70 // so it may appear multiple times in a test if the test has multiple exit 71 // points. 72 // 73 // Once the test has been run all integer and floating point registers as well 74 // as flags are accessible through a RegisterDump instance, see 75 // utils-aarch64.cc for more info on RegisterDump. 76 // 77 // We provide some helper assert to handle common cases: 78 // 79 // ASSERT_EQUAL_32(int32_t, int_32t) 80 // ASSERT_EQUAL_FP32(float, float) 81 // ASSERT_EQUAL_32(int32_t, W register) 82 // ASSERT_EQUAL_FP32(float, S register) 83 // ASSERT_EQUAL_64(int64_t, int_64t) 84 // ASSERT_EQUAL_FP64(double, double) 85 // ASSERT_EQUAL_64(int64_t, X register) 86 // ASSERT_EQUAL_64(X register, X register) 87 // ASSERT_EQUAL_FP64(double, D register) 88 // 89 // e.g. ASSERT_EQUAL_64(0.5, d30); 90 // 91 // If more advanced computation is required before the assert then access the 92 // RegisterDump named core directly: 93 // 94 // ASSERT_EQUAL_64(0x1234, core->reg_x0() & 0xffff); 95 96 97 #define __ masm. 98 #define TEST(name) TEST_(AARCH64_ASM_##name) 99 100 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 101 // Run tests with the simulator. 102 103 #define SETUP() \ 104 MacroAssembler masm; \ 105 SETUP_COMMON() 106 107 #define SETUP_CUSTOM(size, pic) \ 108 byte* buf = new byte[size + CodeBuffer::kDefaultCapacity]; \ 109 MacroAssembler masm(buf, size + CodeBuffer::kDefaultCapacity, pic); \ 110 SETUP_COMMON() 111 112 #define SETUP_COMMON() \ 113 masm.SetGenerateSimulatorCode(true); \ 114 Decoder simulator_decoder; \ 115 Simulator* simulator = Test::run_debugger() \ 116 ? new Debugger(&simulator_decoder) \ 117 : new Simulator(&simulator_decoder); \ 118 simulator->SetColouredTrace(Test::coloured_trace()); \ 119 simulator->SetInstructionStats(Test::instruction_stats()); \ 120 Disassembler disasm; \ 121 Decoder disassembler_decoder; \ 122 disassembler_decoder.AppendVisitor(&disasm); \ 123 RegisterDump core; \ 124 ptrdiff_t offset_after_infrastructure_start; \ 125 ptrdiff_t offset_before_infrastructure_end 126 127 #define START() \ 128 masm.Reset(); \ 129 simulator->ResetState(); \ 130 __ PushCalleeSavedRegisters(); \ 131 { \ 132 int trace_parameters = 0; \ 133 if (Test::trace_reg()) trace_parameters |= LOG_STATE; \ 134 if (Test::trace_write()) trace_parameters |= LOG_WRITE; \ 135 if (Test::trace_sim()) trace_parameters |= LOG_DISASM; \ 136 if (Test::trace_branch()) trace_parameters |= LOG_BRANCH; \ 137 if (trace_parameters != 0) { \ 138 __ Trace(static_cast<TraceParameters>(trace_parameters), TRACE_ENABLE); \ 139 } \ 140 } \ 141 if (Test::instruction_stats()) { \ 142 __ EnableInstrumentation(); \ 143 } \ 144 offset_after_infrastructure_start = masm.GetCursorOffset(); \ 145 /* Avoid unused-variable warnings in case a test never calls RUN(). */ \ 146 USE(offset_after_infrastructure_start) 147 148 #define END() \ 149 offset_before_infrastructure_end = masm.GetCursorOffset(); \ 150 /* Avoid unused-variable warnings in case a test never calls RUN(). */ \ 151 USE(offset_before_infrastructure_end); \ 152 if (Test::instruction_stats()) { \ 153 __ DisableInstrumentation(); \ 154 } \ 155 __ Trace(LOG_ALL, TRACE_DISABLE); \ 156 core.Dump(&masm); \ 157 __ PopCalleeSavedRegisters(); \ 158 __ Ret(); \ 159 masm.FinalizeCode() 160 161 #define RUN() \ 162 DISASSEMBLE(); \ 163 simulator->RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>()) 164 165 #define RUN_CUSTOM() RUN() 166 167 #define TEARDOWN() TEARDOWN_COMMON() 168 169 #define TEARDOWN_CUSTOM() \ 170 delete[] buf; \ 171 TEARDOWN_COMMON() 172 173 #define TEARDOWN_COMMON() delete simulator; 174 175 #else // ifdef VIXL_INCLUDE_SIMULATOR_AARCH64. 176 // Run the test on real hardware or models. 177 #define SETUP() \ 178 MacroAssembler masm; \ 179 SETUP_COMMON() 180 181 #define SETUP_CUSTOM(size, pic) \ 182 byte* buffer = \ 183 reinterpret_cast<byte*>(mmap(NULL, \ 184 size + CodeBuffer::kDefaultCapacity, \ 185 PROT_READ | PROT_WRITE, \ 186 MAP_PRIVATE | MAP_ANONYMOUS, \ 187 -1, \ 188 0)); \ 189 size_t buffer_size = size + CodeBuffer::kDefaultCapacity; \ 190 MacroAssembler masm(buffer, buffer_size, pic); \ 191 SETUP_COMMON() 192 193 #define SETUP_COMMON() \ 194 Disassembler disasm; \ 195 Decoder disassembler_decoder; \ 196 disassembler_decoder.AppendVisitor(&disasm); \ 197 masm.SetGenerateSimulatorCode(false); \ 198 RegisterDump core; \ 199 CPU::SetUp(); \ 200 ptrdiff_t offset_after_infrastructure_start; \ 201 ptrdiff_t offset_before_infrastructure_end 202 203 #define START() \ 204 masm.Reset(); \ 205 __ PushCalleeSavedRegisters(); \ 206 offset_after_infrastructure_start = masm.GetCursorOffset(); \ 207 /* Avoid unused-variable warnings in case a test never calls RUN(). */ \ 208 USE(offset_after_infrastructure_start) 209 210 #define END() \ 211 offset_before_infrastructure_end = masm.GetCursorOffset(); \ 212 /* Avoid unused-variable warnings in case a test never calls RUN(). */ \ 213 USE(offset_before_infrastructure_end); \ 214 core.Dump(&masm); \ 215 __ PopCalleeSavedRegisters(); \ 216 __ Ret(); \ 217 masm.FinalizeCode() 218 219 // Execute the generated code from the memory area. 220 #define RUN() \ 221 DISASSEMBLE(); \ 222 masm.GetBuffer()->SetExecutable(); \ 223 ExecuteMemory(masm.GetBuffer()->GetStartAddress<byte*>(), \ 224 masm.GetSizeOfCodeGenerated()); \ 225 masm.GetBuffer()->SetWritable() 226 227 // The generated code was written directly into `buffer`, execute it directly. 228 #define RUN_CUSTOM() \ 229 DISASSEMBLE(); \ 230 mprotect(buffer, buffer_size, PROT_READ | PROT_EXEC); \ 231 ExecuteMemory(buffer, buffer_size); \ 232 mprotect(buffer, buffer_size, PROT_READ | PROT_WRITE) 233 234 #define TEARDOWN() 235 236 #define TEARDOWN_CUSTOM() 237 238 #endif // ifdef VIXL_INCLUDE_SIMULATOR_AARCH64. 239 240 #define DISASSEMBLE() \ 241 if (Test::disassemble()) { \ 242 ptrdiff_t start_offset = offset_after_infrastructure_start; \ 243 ptrdiff_t end_offset = offset_before_infrastructure_end; \ 244 if (Test::disassemble_infrastructure()) { \ 245 start_offset = 0; \ 246 end_offset = masm.GetSizeOfCodeGenerated(); \ 247 } else { \ 248 printf( \ 249 " Warning: Omitting infrastructure code. " \ 250 "Use --disassemble to see it.\n"); \ 251 } \ 252 Instruction* instruction = \ 253 masm.GetBuffer()->GetOffsetAddress<Instruction*>(start_offset); \ 254 Instruction* end = \ 255 masm.GetBuffer()->GetOffsetAddress<Instruction*>(end_offset); \ 256 while (instruction != end) { \ 257 disassembler_decoder.Decode(instruction); \ 258 uint32_t encoding; \ 259 memcpy(&encoding, instruction, sizeof(encoding)); \ 260 uint64_t address = reinterpret_cast<uintptr_t>(instruction); \ 261 printf(" %016" PRIx64 ":\t%08" PRIx32 "\t%s\n", \ 262 address, \ 263 encoding, \ 264 disasm.GetOutput()); \ 265 instruction += kInstructionSize; \ 266 } \ 267 } 268 269 #define ASSERT_EQUAL_NZCV(expected) \ 270 VIXL_CHECK(EqualNzcv(expected, core.flags_nzcv())) 271 272 #define ASSERT_EQUAL_REGISTERS(expected) \ 273 VIXL_CHECK(EqualRegisters(&expected, &core)) 274 275 #define ASSERT_EQUAL_32(expected, result) \ 276 VIXL_CHECK(Equal32(static_cast<uint32_t>(expected), &core, result)) 277 278 #define ASSERT_EQUAL_FP32(expected, result) \ 279 VIXL_CHECK(EqualFP32(expected, &core, result)) 280 281 #define ASSERT_EQUAL_64(expected, result) \ 282 VIXL_CHECK(Equal64(expected, &core, result)) 283 284 #define ASSERT_EQUAL_FP64(expected, result) \ 285 VIXL_CHECK(EqualFP64(expected, &core, result)) 286 287 #define ASSERT_EQUAL_128(expected_h, expected_l, result) \ 288 VIXL_CHECK(Equal128(expected_h, expected_l, &core, result)) 289 290 #define ASSERT_LITERAL_POOL_SIZE(expected) \ 291 VIXL_CHECK((expected + kInstructionSize) == (masm.GetLiteralPoolSize())) 292 293 294 TEST(preshift_immediates) { 295 SETUP(); 296 297 START(); 298 // Test operations involving immediates that could be generated using a 299 // pre-shifted encodable immediate followed by a post-shift applied to 300 // the arithmetic or logical operation. 301 302 // Save sp. 303 __ Mov(x29, sp); 304 305 // Set the registers to known values. 306 __ Mov(x0, 0x1000); 307 __ Mov(sp, 0x1004); 308 309 // Arithmetic ops. 310 __ Add(x1, x0, 0x1f7de); 311 __ Add(w2, w0, 0xffffff1); 312 __ Adds(x3, x0, 0x18001); 313 __ Adds(w4, w0, 0xffffff1); 314 __ Sub(x5, x0, 0x1f7de); 315 __ Sub(w6, w0, 0xffffff1); 316 __ Subs(x7, x0, 0x18001); 317 __ Subs(w8, w0, 0xffffff1); 318 319 // Logical ops. 320 __ And(x9, x0, 0x1f7de); 321 __ Orr(w10, w0, 0xffffff1); 322 __ Eor(x11, x0, 0x18001); 323 324 // Ops using the stack pointer. 325 __ Add(sp, sp, 0x18001); 326 __ Mov(x12, sp); 327 __ Mov(sp, 0x1004); 328 329 __ Add(sp, sp, 0x1f7de); 330 __ Mov(x13, sp); 331 __ Mov(sp, 0x1004); 332 333 __ Adds(x14, sp, 0x1f7de); 334 335 __ Orr(sp, x0, 0x1f7de); 336 __ Mov(x15, sp); 337 338 // Restore sp. 339 __ Mov(sp, x29); 340 END(); 341 342 RUN(); 343 344 ASSERT_EQUAL_64(0x1000, x0); 345 ASSERT_EQUAL_64(0x207de, x1); 346 ASSERT_EQUAL_64(0x10000ff1, x2); 347 ASSERT_EQUAL_64(0x19001, x3); 348 ASSERT_EQUAL_64(0x10000ff1, x4); 349 ASSERT_EQUAL_64(0xfffffffffffe1822, x5); 350 ASSERT_EQUAL_64(0xf000100f, x6); 351 ASSERT_EQUAL_64(0xfffffffffffe8fff, x7); 352 ASSERT_EQUAL_64(0xf000100f, x8); 353 ASSERT_EQUAL_64(0x1000, x9); 354 ASSERT_EQUAL_64(0xffffff1, x10); 355 ASSERT_EQUAL_64(0x19001, x11); 356 ASSERT_EQUAL_64(0x19005, x12); 357 ASSERT_EQUAL_64(0x207e2, x13); 358 ASSERT_EQUAL_64(0x207e2, x14); 359 ASSERT_EQUAL_64(0x1f7de, x15); 360 361 TEARDOWN(); 362 } 363 364 365 TEST(stack_ops) { 366 SETUP(); 367 368 START(); 369 // save sp. 370 __ Mov(x29, sp); 371 372 // Set the sp to a known value. 373 __ Mov(sp, 0x1004); 374 __ Mov(x0, sp); 375 376 // Add immediate to the sp, and move the result to a normal register. 377 __ Add(sp, sp, 0x50); 378 __ Mov(x1, sp); 379 380 // Add extended to the sp, and move the result to a normal register. 381 __ Mov(x17, 0xfff); 382 __ Add(sp, sp, Operand(x17, SXTB)); 383 __ Mov(x2, sp); 384 385 // Create an sp using a logical instruction, and move to normal register. 386 __ Orr(sp, xzr, 0x1fff); 387 __ Mov(x3, sp); 388 389 // Write wsp using a logical instruction. 390 __ Orr(wsp, wzr, 0xfffffff8); 391 __ Mov(x4, sp); 392 393 // Write sp, and read back wsp. 394 __ Orr(sp, xzr, 0xfffffff8); 395 __ Mov(w5, wsp); 396 397 // restore sp. 398 __ Mov(sp, x29); 399 END(); 400 401 RUN(); 402 403 ASSERT_EQUAL_64(0x1004, x0); 404 ASSERT_EQUAL_64(0x1054, x1); 405 ASSERT_EQUAL_64(0x1053, x2); 406 ASSERT_EQUAL_64(0x1fff, x3); 407 ASSERT_EQUAL_64(0xfffffff8, x4); 408 ASSERT_EQUAL_64(0xfffffff8, x5); 409 410 TEARDOWN(); 411 } 412 413 414 TEST(mvn) { 415 SETUP(); 416 417 START(); 418 __ Mvn(w0, 0xfff); 419 __ Mvn(x1, 0xfff); 420 __ Mvn(w2, Operand(w0, LSL, 1)); 421 __ Mvn(x3, Operand(x1, LSL, 2)); 422 __ Mvn(w4, Operand(w0, LSR, 3)); 423 __ Mvn(x5, Operand(x1, LSR, 4)); 424 __ Mvn(w6, Operand(w0, ASR, 11)); 425 __ Mvn(x7, Operand(x1, ASR, 12)); 426 __ Mvn(w8, Operand(w0, ROR, 13)); 427 __ Mvn(x9, Operand(x1, ROR, 14)); 428 __ Mvn(w10, Operand(w2, UXTB)); 429 __ Mvn(x11, Operand(x2, SXTB, 1)); 430 __ Mvn(w12, Operand(w2, UXTH, 2)); 431 __ Mvn(x13, Operand(x2, SXTH, 3)); 432 __ Mvn(x14, Operand(w2, UXTW, 4)); 433 __ Mvn(x15, Operand(w2, SXTW, 4)); 434 END(); 435 436 RUN(); 437 438 ASSERT_EQUAL_64(0xfffff000, x0); 439 ASSERT_EQUAL_64(0xfffffffffffff000, x1); 440 ASSERT_EQUAL_64(0x00001fff, x2); 441 ASSERT_EQUAL_64(0x0000000000003fff, x3); 442 ASSERT_EQUAL_64(0xe00001ff, x4); 443 ASSERT_EQUAL_64(0xf0000000000000ff, x5); 444 ASSERT_EQUAL_64(0x00000001, x6); 445 ASSERT_EQUAL_64(0x0000000000000000, x7); 446 ASSERT_EQUAL_64(0x7ff80000, x8); 447 ASSERT_EQUAL_64(0x3ffc000000000000, x9); 448 ASSERT_EQUAL_64(0xffffff00, x10); 449 ASSERT_EQUAL_64(0x0000000000000001, x11); 450 ASSERT_EQUAL_64(0xffff8003, x12); 451 ASSERT_EQUAL_64(0xffffffffffff0007, x13); 452 ASSERT_EQUAL_64(0xfffffffffffe000f, x14); 453 ASSERT_EQUAL_64(0xfffffffffffe000f, x15); 454 455 TEARDOWN(); 456 } 457 458 459 TEST(mov_imm_w) { 460 SETUP(); 461 462 START(); 463 __ Mov(w0, 0xffffffff); 464 __ Mov(w1, 0xffff1234); 465 __ Mov(w2, 0x1234ffff); 466 __ Mov(w3, 0x00000000); 467 __ Mov(w4, 0x00001234); 468 __ Mov(w5, 0x12340000); 469 __ Mov(w6, 0x12345678); 470 __ Mov(w7, (int32_t)0x80000000); 471 __ Mov(w8, (int32_t)0xffff0000); 472 __ Mov(w9, kWMinInt); 473 END(); 474 475 RUN(); 476 477 ASSERT_EQUAL_64(0xffffffff, x0); 478 ASSERT_EQUAL_64(0xffff1234, x1); 479 ASSERT_EQUAL_64(0x1234ffff, x2); 480 ASSERT_EQUAL_64(0x00000000, x3); 481 ASSERT_EQUAL_64(0x00001234, x4); 482 ASSERT_EQUAL_64(0x12340000, x5); 483 ASSERT_EQUAL_64(0x12345678, x6); 484 ASSERT_EQUAL_64(0x80000000, x7); 485 ASSERT_EQUAL_64(0xffff0000, x8); 486 ASSERT_EQUAL_32(kWMinInt, w9); 487 488 TEARDOWN(); 489 } 490 491 492 TEST(mov_imm_x) { 493 SETUP(); 494 495 START(); 496 __ Mov(x0, 0xffffffffffffffff); 497 __ Mov(x1, 0xffffffffffff1234); 498 __ Mov(x2, 0xffffffff12345678); 499 __ Mov(x3, 0xffff1234ffff5678); 500 __ Mov(x4, 0x1234ffffffff5678); 501 __ Mov(x5, 0x1234ffff5678ffff); 502 __ Mov(x6, 0x12345678ffffffff); 503 __ Mov(x7, 0x1234ffffffffffff); 504 __ Mov(x8, 0x123456789abcffff); 505 __ Mov(x9, 0x12345678ffff9abc); 506 __ Mov(x10, 0x1234ffff56789abc); 507 __ Mov(x11, 0xffff123456789abc); 508 __ Mov(x12, 0x0000000000000000); 509 __ Mov(x13, 0x0000000000001234); 510 __ Mov(x14, 0x0000000012345678); 511 __ Mov(x15, 0x0000123400005678); 512 __ Mov(x18, 0x1234000000005678); 513 __ Mov(x19, 0x1234000056780000); 514 __ Mov(x20, 0x1234567800000000); 515 __ Mov(x21, 0x1234000000000000); 516 __ Mov(x22, 0x123456789abc0000); 517 __ Mov(x23, 0x1234567800009abc); 518 __ Mov(x24, 0x1234000056789abc); 519 __ Mov(x25, 0x0000123456789abc); 520 __ Mov(x26, 0x123456789abcdef0); 521 __ Mov(x27, 0xffff000000000001); 522 __ Mov(x28, 0x8000ffff00000000); 523 END(); 524 525 RUN(); 526 527 ASSERT_EQUAL_64(0xffffffffffff1234, x1); 528 ASSERT_EQUAL_64(0xffffffff12345678, x2); 529 ASSERT_EQUAL_64(0xffff1234ffff5678, x3); 530 ASSERT_EQUAL_64(0x1234ffffffff5678, x4); 531 ASSERT_EQUAL_64(0x1234ffff5678ffff, x5); 532 ASSERT_EQUAL_64(0x12345678ffffffff, x6); 533 ASSERT_EQUAL_64(0x1234ffffffffffff, x7); 534 ASSERT_EQUAL_64(0x123456789abcffff, x8); 535 ASSERT_EQUAL_64(0x12345678ffff9abc, x9); 536 ASSERT_EQUAL_64(0x1234ffff56789abc, x10); 537 ASSERT_EQUAL_64(0xffff123456789abc, x11); 538 ASSERT_EQUAL_64(0x0000000000000000, x12); 539 ASSERT_EQUAL_64(0x0000000000001234, x13); 540 ASSERT_EQUAL_64(0x0000000012345678, x14); 541 ASSERT_EQUAL_64(0x0000123400005678, x15); 542 ASSERT_EQUAL_64(0x1234000000005678, x18); 543 ASSERT_EQUAL_64(0x1234000056780000, x19); 544 ASSERT_EQUAL_64(0x1234567800000000, x20); 545 ASSERT_EQUAL_64(0x1234000000000000, x21); 546 ASSERT_EQUAL_64(0x123456789abc0000, x22); 547 ASSERT_EQUAL_64(0x1234567800009abc, x23); 548 ASSERT_EQUAL_64(0x1234000056789abc, x24); 549 ASSERT_EQUAL_64(0x0000123456789abc, x25); 550 ASSERT_EQUAL_64(0x123456789abcdef0, x26); 551 ASSERT_EQUAL_64(0xffff000000000001, x27); 552 ASSERT_EQUAL_64(0x8000ffff00000000, x28); 553 554 555 TEARDOWN(); 556 } 557 558 559 TEST(mov) { 560 SETUP(); 561 562 START(); 563 __ Mov(x0, 0xffffffffffffffff); 564 __ Mov(x1, 0xffffffffffffffff); 565 __ Mov(x2, 0xffffffffffffffff); 566 __ Mov(x3, 0xffffffffffffffff); 567 568 __ Mov(x0, 0x0123456789abcdef); 569 570 { 571 ExactAssemblyScope scope(&masm, 3 * kInstructionSize); 572 __ movz(x1, UINT64_C(0xabcd) << 16); 573 __ movk(x2, UINT64_C(0xabcd) << 32); 574 __ movn(x3, UINT64_C(0xabcd) << 48); 575 } 576 577 __ Mov(x4, 0x0123456789abcdef); 578 __ Mov(x5, x4); 579 580 __ Mov(w6, -1); 581 582 // Test that moves back to the same register have the desired effect. This 583 // is a no-op for X registers, and a truncation for W registers. 584 __ Mov(x7, 0x0123456789abcdef); 585 __ Mov(x7, x7); 586 __ Mov(x8, 0x0123456789abcdef); 587 __ Mov(w8, w8); 588 __ Mov(x9, 0x0123456789abcdef); 589 __ Mov(x9, Operand(x9)); 590 __ Mov(x10, 0x0123456789abcdef); 591 __ Mov(w10, Operand(w10)); 592 593 __ Mov(w11, 0xfff); 594 __ Mov(x12, 0xfff); 595 __ Mov(w13, Operand(w11, LSL, 1)); 596 __ Mov(x14, Operand(x12, LSL, 2)); 597 __ Mov(w15, Operand(w11, LSR, 3)); 598 __ Mov(x18, Operand(x12, LSR, 4)); 599 __ Mov(w19, Operand(w11, ASR, 11)); 600 __ Mov(x20, Operand(x12, ASR, 12)); 601 __ Mov(w21, Operand(w11, ROR, 13)); 602 __ Mov(x22, Operand(x12, ROR, 14)); 603 __ Mov(w23, Operand(w13, UXTB)); 604 __ Mov(x24, Operand(x13, SXTB, 1)); 605 __ Mov(w25, Operand(w13, UXTH, 2)); 606 __ Mov(x26, Operand(x13, SXTH, 3)); 607 __ Mov(x27, Operand(w13, UXTW, 4)); 608 609 __ Mov(x28, 0x0123456789abcdef); 610 __ Mov(w28, w28, kDiscardForSameWReg); 611 END(); 612 613 RUN(); 614 615 ASSERT_EQUAL_64(0x0123456789abcdef, x0); 616 ASSERT_EQUAL_64(0x00000000abcd0000, x1); 617 ASSERT_EQUAL_64(0xffffabcdffffffff, x2); 618 ASSERT_EQUAL_64(0x5432ffffffffffff, x3); 619 ASSERT_EQUAL_64(x4, x5); 620 ASSERT_EQUAL_32(-1, w6); 621 ASSERT_EQUAL_64(0x0123456789abcdef, x7); 622 ASSERT_EQUAL_32(0x89abcdef, w8); 623 ASSERT_EQUAL_64(0x0123456789abcdef, x9); 624 ASSERT_EQUAL_32(0x89abcdef, w10); 625 ASSERT_EQUAL_64(0x00000fff, x11); 626 ASSERT_EQUAL_64(0x0000000000000fff, x12); 627 ASSERT_EQUAL_64(0x00001ffe, x13); 628 ASSERT_EQUAL_64(0x0000000000003ffc, x14); 629 ASSERT_EQUAL_64(0x000001ff, x15); 630 ASSERT_EQUAL_64(0x00000000000000ff, x18); 631 ASSERT_EQUAL_64(0x00000001, x19); 632 ASSERT_EQUAL_64(0x0000000000000000, x20); 633 ASSERT_EQUAL_64(0x7ff80000, x21); 634 ASSERT_EQUAL_64(0x3ffc000000000000, x22); 635 ASSERT_EQUAL_64(0x000000fe, x23); 636 ASSERT_EQUAL_64(0xfffffffffffffffc, x24); 637 ASSERT_EQUAL_64(0x00007ff8, x25); 638 ASSERT_EQUAL_64(0x000000000000fff0, x26); 639 ASSERT_EQUAL_64(0x000000000001ffe0, x27); 640 ASSERT_EQUAL_64(0x0123456789abcdef, x28); 641 642 TEARDOWN(); 643 } 644 645 646 TEST(mov_negative) { 647 SETUP(); 648 649 START(); 650 __ Mov(w11, 0xffffffff); 651 __ Mov(x12, 0xffffffffffffffff); 652 653 __ Mov(w13, Operand(w11, LSL, 1)); 654 __ Mov(w14, Operand(w11, LSR, 1)); 655 __ Mov(w15, Operand(w11, ASR, 1)); 656 __ Mov(w18, Operand(w11, ROR, 1)); 657 __ Mov(w19, Operand(w11, UXTB, 1)); 658 __ Mov(w20, Operand(w11, SXTB, 1)); 659 __ Mov(w21, Operand(w11, UXTH, 1)); 660 __ Mov(w22, Operand(w11, SXTH, 1)); 661 662 __ Mov(x23, Operand(x12, LSL, 1)); 663 __ Mov(x24, Operand(x12, LSR, 1)); 664 __ Mov(x25, Operand(x12, ASR, 1)); 665 __ Mov(x26, Operand(x12, ROR, 1)); 666 __ Mov(x27, Operand(x12, UXTH, 1)); 667 __ Mov(x28, Operand(x12, SXTH, 1)); 668 __ Mov(x29, Operand(x12, UXTW, 1)); 669 __ Mov(x30, Operand(x12, SXTW, 1)); 670 END(); 671 672 RUN(); 673 674 ASSERT_EQUAL_64(0xfffffffe, x13); 675 ASSERT_EQUAL_64(0x7fffffff, x14); 676 ASSERT_EQUAL_64(0xffffffff, x15); 677 ASSERT_EQUAL_64(0xffffffff, x18); 678 ASSERT_EQUAL_64(0x000001fe, x19); 679 ASSERT_EQUAL_64(0xfffffffe, x20); 680 ASSERT_EQUAL_64(0x0001fffe, x21); 681 ASSERT_EQUAL_64(0xfffffffe, x22); 682 683 ASSERT_EQUAL_64(0xfffffffffffffffe, x23); 684 ASSERT_EQUAL_64(0x7fffffffffffffff, x24); 685 ASSERT_EQUAL_64(0xffffffffffffffff, x25); 686 ASSERT_EQUAL_64(0xffffffffffffffff, x26); 687 ASSERT_EQUAL_64(0x000000000001fffe, x27); 688 ASSERT_EQUAL_64(0xfffffffffffffffe, x28); 689 ASSERT_EQUAL_64(0x00000001fffffffe, x29); 690 ASSERT_EQUAL_64(0xfffffffffffffffe, x30); 691 692 TEARDOWN(); 693 } 694 695 696 TEST(orr) { 697 SETUP(); 698 699 START(); 700 __ Mov(x0, 0xf0f0); 701 __ Mov(x1, 0xf00000ff); 702 703 __ Orr(x2, x0, Operand(x1)); 704 __ Orr(w3, w0, Operand(w1, LSL, 28)); 705 __ Orr(x4, x0, Operand(x1, LSL, 32)); 706 __ Orr(x5, x0, Operand(x1, LSR, 4)); 707 __ Orr(w6, w0, Operand(w1, ASR, 4)); 708 __ Orr(x7, x0, Operand(x1, ASR, 4)); 709 __ Orr(w8, w0, Operand(w1, ROR, 12)); 710 __ Orr(x9, x0, Operand(x1, ROR, 12)); 711 __ Orr(w10, w0, 0xf); 712 __ Orr(x11, x0, 0xf0000000f0000000); 713 END(); 714 715 RUN(); 716 717 ASSERT_EQUAL_64(0x00000000f000f0ff, x2); 718 ASSERT_EQUAL_64(0xf000f0f0, x3); 719 ASSERT_EQUAL_64(0xf00000ff0000f0f0, x4); 720 ASSERT_EQUAL_64(0x000000000f00f0ff, x5); 721 ASSERT_EQUAL_64(0xff00f0ff, x6); 722 ASSERT_EQUAL_64(0x000000000f00f0ff, x7); 723 ASSERT_EQUAL_64(0x0ffff0f0, x8); 724 ASSERT_EQUAL_64(0x0ff00000000ff0f0, x9); 725 ASSERT_EQUAL_64(0x0000f0ff, x10); 726 ASSERT_EQUAL_64(0xf0000000f000f0f0, x11); 727 728 TEARDOWN(); 729 } 730 731 732 TEST(orr_extend) { 733 SETUP(); 734 735 START(); 736 __ Mov(x0, 1); 737 __ Mov(x1, 0x8000000080008080); 738 __ Orr(w6, w0, Operand(w1, UXTB)); 739 __ Orr(x7, x0, Operand(x1, UXTH, 1)); 740 __ Orr(w8, w0, Operand(w1, UXTW, 2)); 741 __ Orr(x9, x0, Operand(x1, UXTX, 3)); 742 __ Orr(w10, w0, Operand(w1, SXTB)); 743 __ Orr(x11, x0, Operand(x1, SXTH, 1)); 744 __ Orr(x12, x0, Operand(x1, SXTW, 2)); 745 __ Orr(x13, x0, Operand(x1, SXTX, 3)); 746 END(); 747 748 RUN(); 749 750 ASSERT_EQUAL_64(0x00000081, x6); 751 ASSERT_EQUAL_64(0x0000000000010101, x7); 752 ASSERT_EQUAL_64(0x00020201, x8); 753 ASSERT_EQUAL_64(0x0000000400040401, x9); 754 ASSERT_EQUAL_64(0xffffff81, x10); 755 ASSERT_EQUAL_64(0xffffffffffff0101, x11); 756 ASSERT_EQUAL_64(0xfffffffe00020201, x12); 757 ASSERT_EQUAL_64(0x0000000400040401, x13); 758 759 TEARDOWN(); 760 } 761 762 763 TEST(bitwise_wide_imm) { 764 SETUP(); 765 766 START(); 767 __ Mov(x0, 0); 768 __ Mov(x1, 0xf0f0f0f0f0f0f0f0); 769 770 __ Orr(x10, x0, 0x1234567890abcdef); 771 __ Orr(w11, w1, 0x90abcdef); 772 773 __ Orr(w12, w0, kWMinInt); 774 __ Eor(w13, w0, kWMinInt); 775 END(); 776 777 RUN(); 778 779 ASSERT_EQUAL_64(0, x0); 780 ASSERT_EQUAL_64(0xf0f0f0f0f0f0f0f0, x1); 781 ASSERT_EQUAL_64(0x1234567890abcdef, x10); 782 ASSERT_EQUAL_64(0x00000000f0fbfdff, x11); 783 ASSERT_EQUAL_32(kWMinInt, w12); 784 ASSERT_EQUAL_32(kWMinInt, w13); 785 786 TEARDOWN(); 787 } 788 789 790 TEST(orn) { 791 SETUP(); 792 793 START(); 794 __ Mov(x0, 0xf0f0); 795 __ Mov(x1, 0xf00000ff); 796 797 __ Orn(x2, x0, Operand(x1)); 798 __ Orn(w3, w0, Operand(w1, LSL, 4)); 799 __ Orn(x4, x0, Operand(x1, LSL, 4)); 800 __ Orn(x5, x0, Operand(x1, LSR, 1)); 801 __ Orn(w6, w0, Operand(w1, ASR, 1)); 802 __ Orn(x7, x0, Operand(x1, ASR, 1)); 803 __ Orn(w8, w0, Operand(w1, ROR, 16)); 804 __ Orn(x9, x0, Operand(x1, ROR, 16)); 805 __ Orn(w10, w0, 0x0000ffff); 806 __ Orn(x11, x0, 0x0000ffff0000ffff); 807 END(); 808 809 RUN(); 810 811 ASSERT_EQUAL_64(0xffffffff0ffffff0, x2); 812 ASSERT_EQUAL_64(0xfffff0ff, x3); 813 ASSERT_EQUAL_64(0xfffffff0fffff0ff, x4); 814 ASSERT_EQUAL_64(0xffffffff87fffff0, x5); 815 ASSERT_EQUAL_64(0x07fffff0, x6); 816 ASSERT_EQUAL_64(0xffffffff87fffff0, x7); 817 ASSERT_EQUAL_64(0xff00ffff, x8); 818 ASSERT_EQUAL_64(0xff00ffffffffffff, x9); 819 ASSERT_EQUAL_64(0xfffff0f0, x10); 820 ASSERT_EQUAL_64(0xffff0000fffff0f0, x11); 821 822 TEARDOWN(); 823 } 824 825 826 TEST(orn_extend) { 827 SETUP(); 828 829 START(); 830 __ Mov(x0, 1); 831 __ Mov(x1, 0x8000000080008081); 832 __ Orn(w6, w0, Operand(w1, UXTB)); 833 __ Orn(x7, x0, Operand(x1, UXTH, 1)); 834 __ Orn(w8, w0, Operand(w1, UXTW, 2)); 835 __ Orn(x9, x0, Operand(x1, UXTX, 3)); 836 __ Orn(w10, w0, Operand(w1, SXTB)); 837 __ Orn(x11, x0, Operand(x1, SXTH, 1)); 838 __ Orn(x12, x0, Operand(x1, SXTW, 2)); 839 __ Orn(x13, x0, Operand(x1, SXTX, 3)); 840 END(); 841 842 RUN(); 843 844 ASSERT_EQUAL_64(0xffffff7f, x6); 845 ASSERT_EQUAL_64(0xfffffffffffefefd, x7); 846 ASSERT_EQUAL_64(0xfffdfdfb, x8); 847 ASSERT_EQUAL_64(0xfffffffbfffbfbf7, x9); 848 ASSERT_EQUAL_64(0x0000007f, x10); 849 ASSERT_EQUAL_64(0x000000000000fefd, x11); 850 ASSERT_EQUAL_64(0x00000001fffdfdfb, x12); 851 ASSERT_EQUAL_64(0xfffffffbfffbfbf7, x13); 852 853 TEARDOWN(); 854 } 855 856 857 TEST(and_) { 858 SETUP(); 859 860 START(); 861 __ Mov(x0, 0xfff0); 862 __ Mov(x1, 0xf00000ff); 863 864 __ And(x2, x0, Operand(x1)); 865 __ And(w3, w0, Operand(w1, LSL, 4)); 866 __ And(x4, x0, Operand(x1, LSL, 4)); 867 __ And(x5, x0, Operand(x1, LSR, 1)); 868 __ And(w6, w0, Operand(w1, ASR, 20)); 869 __ And(x7, x0, Operand(x1, ASR, 20)); 870 __ And(w8, w0, Operand(w1, ROR, 28)); 871 __ And(x9, x0, Operand(x1, ROR, 28)); 872 __ And(w10, w0, Operand(0xff00)); 873 __ And(x11, x0, Operand(0xff)); 874 END(); 875 876 RUN(); 877 878 ASSERT_EQUAL_64(0x000000f0, x2); 879 ASSERT_EQUAL_64(0x00000ff0, x3); 880 ASSERT_EQUAL_64(0x00000ff0, x4); 881 ASSERT_EQUAL_64(0x00000070, x5); 882 ASSERT_EQUAL_64(0x0000ff00, x6); 883 ASSERT_EQUAL_64(0x00000f00, x7); 884 ASSERT_EQUAL_64(0x00000ff0, x8); 885 ASSERT_EQUAL_64(0x00000000, x9); 886 ASSERT_EQUAL_64(0x0000ff00, x10); 887 ASSERT_EQUAL_64(0x000000f0, x11); 888 889 TEARDOWN(); 890 } 891 892 893 TEST(and_extend) { 894 SETUP(); 895 896 START(); 897 __ Mov(x0, 0xffffffffffffffff); 898 __ Mov(x1, 0x8000000080008081); 899 __ And(w6, w0, Operand(w1, UXTB)); 900 __ And(x7, x0, Operand(x1, UXTH, 1)); 901 __ And(w8, w0, Operand(w1, UXTW, 2)); 902 __ And(x9, x0, Operand(x1, UXTX, 3)); 903 __ And(w10, w0, Operand(w1, SXTB)); 904 __ And(x11, x0, Operand(x1, SXTH, 1)); 905 __ And(x12, x0, Operand(x1, SXTW, 2)); 906 __ And(x13, x0, Operand(x1, SXTX, 3)); 907 END(); 908 909 RUN(); 910 911 ASSERT_EQUAL_64(0x00000081, x6); 912 ASSERT_EQUAL_64(0x0000000000010102, x7); 913 ASSERT_EQUAL_64(0x00020204, x8); 914 ASSERT_EQUAL_64(0x0000000400040408, x9); 915 ASSERT_EQUAL_64(0xffffff81, x10); 916 ASSERT_EQUAL_64(0xffffffffffff0102, x11); 917 ASSERT_EQUAL_64(0xfffffffe00020204, x12); 918 ASSERT_EQUAL_64(0x0000000400040408, x13); 919 920 TEARDOWN(); 921 } 922 923 924 TEST(ands) { 925 SETUP(); 926 927 START(); 928 __ Mov(x1, 0xf00000ff); 929 __ Ands(w0, w1, Operand(w1)); 930 END(); 931 932 RUN(); 933 934 ASSERT_EQUAL_NZCV(NFlag); 935 ASSERT_EQUAL_64(0xf00000ff, x0); 936 937 START(); 938 __ Mov(x0, 0xfff0); 939 __ Mov(x1, 0xf00000ff); 940 __ Ands(w0, w0, Operand(w1, LSR, 4)); 941 END(); 942 943 RUN(); 944 945 ASSERT_EQUAL_NZCV(ZFlag); 946 ASSERT_EQUAL_64(0x00000000, x0); 947 948 START(); 949 __ Mov(x0, 0x8000000000000000); 950 __ Mov(x1, 0x00000001); 951 __ Ands(x0, x0, Operand(x1, ROR, 1)); 952 END(); 953 954 RUN(); 955 956 ASSERT_EQUAL_NZCV(NFlag); 957 ASSERT_EQUAL_64(0x8000000000000000, x0); 958 959 START(); 960 __ Mov(x0, 0xfff0); 961 __ Ands(w0, w0, Operand(0xf)); 962 END(); 963 964 RUN(); 965 966 ASSERT_EQUAL_NZCV(ZFlag); 967 ASSERT_EQUAL_64(0x00000000, x0); 968 969 START(); 970 __ Mov(x0, 0xff000000); 971 __ Ands(w0, w0, Operand(0x80000000)); 972 END(); 973 974 RUN(); 975 976 ASSERT_EQUAL_NZCV(NFlag); 977 ASSERT_EQUAL_64(0x80000000, x0); 978 979 TEARDOWN(); 980 } 981 982 983 TEST(bic) { 984 SETUP(); 985 986 START(); 987 __ Mov(x0, 0xfff0); 988 __ Mov(x1, 0xf00000ff); 989 990 __ Bic(x2, x0, Operand(x1)); 991 __ Bic(w3, w0, Operand(w1, LSL, 4)); 992 __ Bic(x4, x0, Operand(x1, LSL, 4)); 993 __ Bic(x5, x0, Operand(x1, LSR, 1)); 994 __ Bic(w6, w0, Operand(w1, ASR, 20)); 995 __ Bic(x7, x0, Operand(x1, ASR, 20)); 996 __ Bic(w8, w0, Operand(w1, ROR, 28)); 997 __ Bic(x9, x0, Operand(x1, ROR, 24)); 998 __ Bic(x10, x0, Operand(0x1f)); 999 __ Bic(x11, x0, Operand(0x100)); 1000 1001 // Test bic into sp when the constant cannot be encoded in the immediate 1002 // field. 1003 // Use x20 to preserve sp. We check for the result via x21 because the 1004 // test infrastructure requires that sp be restored to its original value. 1005 __ Mov(x20, sp); 1006 __ Mov(x0, 0xffffff); 1007 __ Bic(sp, x0, Operand(0xabcdef)); 1008 __ Mov(x21, sp); 1009 __ Mov(sp, x20); 1010 END(); 1011 1012 RUN(); 1013 1014 ASSERT_EQUAL_64(0x0000ff00, x2); 1015 ASSERT_EQUAL_64(0x0000f000, x3); 1016 ASSERT_EQUAL_64(0x0000f000, x4); 1017 ASSERT_EQUAL_64(0x0000ff80, x5); 1018 ASSERT_EQUAL_64(0x000000f0, x6); 1019 ASSERT_EQUAL_64(0x0000f0f0, x7); 1020 ASSERT_EQUAL_64(0x0000f000, x8); 1021 ASSERT_EQUAL_64(0x0000ff00, x9); 1022 ASSERT_EQUAL_64(0x0000ffe0, x10); 1023 ASSERT_EQUAL_64(0x0000fef0, x11); 1024 1025 ASSERT_EQUAL_64(0x543210, x21); 1026 1027 TEARDOWN(); 1028 } 1029 1030 1031 TEST(bic_extend) { 1032 SETUP(); 1033 1034 START(); 1035 __ Mov(x0, 0xffffffffffffffff); 1036 __ Mov(x1, 0x8000000080008081); 1037 __ Bic(w6, w0, Operand(w1, UXTB)); 1038 __ Bic(x7, x0, Operand(x1, UXTH, 1)); 1039 __ Bic(w8, w0, Operand(w1, UXTW, 2)); 1040 __ Bic(x9, x0, Operand(x1, UXTX, 3)); 1041 __ Bic(w10, w0, Operand(w1, SXTB)); 1042 __ Bic(x11, x0, Operand(x1, SXTH, 1)); 1043 __ Bic(x12, x0, Operand(x1, SXTW, 2)); 1044 __ Bic(x13, x0, Operand(x1, SXTX, 3)); 1045 END(); 1046 1047 RUN(); 1048 1049 ASSERT_EQUAL_64(0xffffff7e, x6); 1050 ASSERT_EQUAL_64(0xfffffffffffefefd, x7); 1051 ASSERT_EQUAL_64(0xfffdfdfb, x8); 1052 ASSERT_EQUAL_64(0xfffffffbfffbfbf7, x9); 1053 ASSERT_EQUAL_64(0x0000007e, x10); 1054 ASSERT_EQUAL_64(0x000000000000fefd, x11); 1055 ASSERT_EQUAL_64(0x00000001fffdfdfb, x12); 1056 ASSERT_EQUAL_64(0xfffffffbfffbfbf7, x13); 1057 1058 TEARDOWN(); 1059 } 1060 1061 1062 TEST(bics) { 1063 SETUP(); 1064 1065 START(); 1066 __ Mov(x1, 0xffff); 1067 __ Bics(w0, w1, Operand(w1)); 1068 END(); 1069 1070 RUN(); 1071 1072 ASSERT_EQUAL_NZCV(ZFlag); 1073 ASSERT_EQUAL_64(0x00000000, x0); 1074 1075 START(); 1076 __ Mov(x0, 0xffffffff); 1077 __ Bics(w0, w0, Operand(w0, LSR, 1)); 1078 END(); 1079 1080 RUN(); 1081 1082 ASSERT_EQUAL_NZCV(NFlag); 1083 ASSERT_EQUAL_64(0x80000000, x0); 1084 1085 START(); 1086 __ Mov(x0, 0x8000000000000000); 1087 __ Mov(x1, 0x00000001); 1088 __ Bics(x0, x0, Operand(x1, ROR, 1)); 1089 END(); 1090 1091 RUN(); 1092 1093 ASSERT_EQUAL_NZCV(ZFlag); 1094 ASSERT_EQUAL_64(0x00000000, x0); 1095 1096 START(); 1097 __ Mov(x0, 0xffffffffffffffff); 1098 __ Bics(x0, x0, 0x7fffffffffffffff); 1099 END(); 1100 1101 RUN(); 1102 1103 ASSERT_EQUAL_NZCV(NFlag); 1104 ASSERT_EQUAL_64(0x8000000000000000, x0); 1105 1106 START(); 1107 __ Mov(w0, 0xffff0000); 1108 __ Bics(w0, w0, 0xfffffff0); 1109 END(); 1110 1111 RUN(); 1112 1113 ASSERT_EQUAL_NZCV(ZFlag); 1114 ASSERT_EQUAL_64(0x00000000, x0); 1115 1116 TEARDOWN(); 1117 } 1118 1119 1120 TEST(eor) { 1121 SETUP(); 1122 1123 START(); 1124 __ Mov(x0, 0xfff0); 1125 __ Mov(x1, 0xf00000ff); 1126 1127 __ Eor(x2, x0, Operand(x1)); 1128 __ Eor(w3, w0, Operand(w1, LSL, 4)); 1129 __ Eor(x4, x0, Operand(x1, LSL, 4)); 1130 __ Eor(x5, x0, Operand(x1, LSR, 1)); 1131 __ Eor(w6, w0, Operand(w1, ASR, 20)); 1132 __ Eor(x7, x0, Operand(x1, ASR, 20)); 1133 __ Eor(w8, w0, Operand(w1, ROR, 28)); 1134 __ Eor(x9, x0, Operand(x1, ROR, 28)); 1135 __ Eor(w10, w0, 0xff00ff00); 1136 __ Eor(x11, x0, 0xff00ff00ff00ff00); 1137 END(); 1138 1139 RUN(); 1140 1141 ASSERT_EQUAL_64(0x00000000f000ff0f, x2); 1142 ASSERT_EQUAL_64(0x0000f000, x3); 1143 ASSERT_EQUAL_64(0x0000000f0000f000, x4); 1144 ASSERT_EQUAL_64(0x000000007800ff8f, x5); 1145 ASSERT_EQUAL_64(0xffff00f0, x6); 1146 ASSERT_EQUAL_64(0x000000000000f0f0, x7); 1147 ASSERT_EQUAL_64(0x0000f00f, x8); 1148 ASSERT_EQUAL_64(0x00000ff00000ffff, x9); 1149 ASSERT_EQUAL_64(0xff0000f0, x10); 1150 ASSERT_EQUAL_64(0xff00ff00ff0000f0, x11); 1151 1152 TEARDOWN(); 1153 } 1154 1155 TEST(eor_extend) { 1156 SETUP(); 1157 1158 START(); 1159 __ Mov(x0, 0x1111111111111111); 1160 __ Mov(x1, 0x8000000080008081); 1161 __ Eor(w6, w0, Operand(w1, UXTB)); 1162 __ Eor(x7, x0, Operand(x1, UXTH, 1)); 1163 __ Eor(w8, w0, Operand(w1, UXTW, 2)); 1164 __ Eor(x9, x0, Operand(x1, UXTX, 3)); 1165 __ Eor(w10, w0, Operand(w1, SXTB)); 1166 __ Eor(x11, x0, Operand(x1, SXTH, 1)); 1167 __ Eor(x12, x0, Operand(x1, SXTW, 2)); 1168 __ Eor(x13, x0, Operand(x1, SXTX, 3)); 1169 END(); 1170 1171 RUN(); 1172 1173 ASSERT_EQUAL_64(0x11111190, x6); 1174 ASSERT_EQUAL_64(0x1111111111101013, x7); 1175 ASSERT_EQUAL_64(0x11131315, x8); 1176 ASSERT_EQUAL_64(0x1111111511151519, x9); 1177 ASSERT_EQUAL_64(0xeeeeee90, x10); 1178 ASSERT_EQUAL_64(0xeeeeeeeeeeee1013, x11); 1179 ASSERT_EQUAL_64(0xeeeeeeef11131315, x12); 1180 ASSERT_EQUAL_64(0x1111111511151519, x13); 1181 1182 TEARDOWN(); 1183 } 1184 1185 1186 TEST(eon) { 1187 SETUP(); 1188 1189 START(); 1190 __ Mov(x0, 0xfff0); 1191 __ Mov(x1, 0xf00000ff); 1192 1193 __ Eon(x2, x0, Operand(x1)); 1194 __ Eon(w3, w0, Operand(w1, LSL, 4)); 1195 __ Eon(x4, x0, Operand(x1, LSL, 4)); 1196 __ Eon(x5, x0, Operand(x1, LSR, 1)); 1197 __ Eon(w6, w0, Operand(w1, ASR, 20)); 1198 __ Eon(x7, x0, Operand(x1, ASR, 20)); 1199 __ Eon(w8, w0, Operand(w1, ROR, 28)); 1200 __ Eon(x9, x0, Operand(x1, ROR, 28)); 1201 __ Eon(w10, w0, 0x03c003c0); 1202 __ Eon(x11, x0, 0x0000100000001000); 1203 END(); 1204 1205 RUN(); 1206 1207 ASSERT_EQUAL_64(0xffffffff0fff00f0, x2); 1208 ASSERT_EQUAL_64(0xffff0fff, x3); 1209 ASSERT_EQUAL_64(0xfffffff0ffff0fff, x4); 1210 ASSERT_EQUAL_64(0xffffffff87ff0070, x5); 1211 ASSERT_EQUAL_64(0x0000ff0f, x6); 1212 ASSERT_EQUAL_64(0xffffffffffff0f0f, x7); 1213 ASSERT_EQUAL_64(0xffff0ff0, x8); 1214 ASSERT_EQUAL_64(0xfffff00fffff0000, x9); 1215 ASSERT_EQUAL_64(0xfc3f03cf, x10); 1216 ASSERT_EQUAL_64(0xffffefffffff100f, x11); 1217 1218 TEARDOWN(); 1219 } 1220 1221 1222 TEST(eon_extend) { 1223 SETUP(); 1224 1225 START(); 1226 __ Mov(x0, 0x1111111111111111); 1227 __ Mov(x1, 0x8000000080008081); 1228 __ Eon(w6, w0, Operand(w1, UXTB)); 1229 __ Eon(x7, x0, Operand(x1, UXTH, 1)); 1230 __ Eon(w8, w0, Operand(w1, UXTW, 2)); 1231 __ Eon(x9, x0, Operand(x1, UXTX, 3)); 1232 __ Eon(w10, w0, Operand(w1, SXTB)); 1233 __ Eon(x11, x0, Operand(x1, SXTH, 1)); 1234 __ Eon(x12, x0, Operand(x1, SXTW, 2)); 1235 __ Eon(x13, x0, Operand(x1, SXTX, 3)); 1236 END(); 1237 1238 RUN(); 1239 1240 ASSERT_EQUAL_64(0xeeeeee6f, x6); 1241 ASSERT_EQUAL_64(0xeeeeeeeeeeefefec, x7); 1242 ASSERT_EQUAL_64(0xeeececea, x8); 1243 ASSERT_EQUAL_64(0xeeeeeeeaeeeaeae6, x9); 1244 ASSERT_EQUAL_64(0x1111116f, x10); 1245 ASSERT_EQUAL_64(0x111111111111efec, x11); 1246 ASSERT_EQUAL_64(0x11111110eeececea, x12); 1247 ASSERT_EQUAL_64(0xeeeeeeeaeeeaeae6, x13); 1248 1249 TEARDOWN(); 1250 } 1251 1252 1253 TEST(mul) { 1254 SETUP(); 1255 1256 START(); 1257 __ Mov(x25, 0); 1258 __ Mov(x26, 1); 1259 __ Mov(x18, 0xffffffff); 1260 __ Mov(x19, 0xffffffffffffffff); 1261 1262 __ Mul(w0, w25, w25); 1263 __ Mul(w1, w25, w26); 1264 __ Mul(w2, w26, w18); 1265 __ Mul(w3, w18, w19); 1266 __ Mul(x4, x25, x25); 1267 __ Mul(x5, x26, x18); 1268 __ Mul(x6, x18, x19); 1269 __ Mul(x7, x19, x19); 1270 __ Smull(x8, w26, w18); 1271 __ Smull(x9, w18, w18); 1272 __ Smull(x10, w19, w19); 1273 __ Mneg(w11, w25, w25); 1274 __ Mneg(w12, w25, w26); 1275 __ Mneg(w13, w26, w18); 1276 __ Mneg(w14, w18, w19); 1277 __ Mneg(x20, x25, x25); 1278 __ Mneg(x21, x26, x18); 1279 __ Mneg(x22, x18, x19); 1280 __ Mneg(x23, x19, x19); 1281 END(); 1282 1283 RUN(); 1284 1285 ASSERT_EQUAL_64(0, x0); 1286 ASSERT_EQUAL_64(0, x1); 1287 ASSERT_EQUAL_64(0xffffffff, x2); 1288 ASSERT_EQUAL_64(1, x3); 1289 ASSERT_EQUAL_64(0, x4); 1290 ASSERT_EQUAL_64(0xffffffff, x5); 1291 ASSERT_EQUAL_64(0xffffffff00000001, x6); 1292 ASSERT_EQUAL_64(1, x7); 1293 ASSERT_EQUAL_64(0xffffffffffffffff, x8); 1294 ASSERT_EQUAL_64(1, x9); 1295 ASSERT_EQUAL_64(1, x10); 1296 ASSERT_EQUAL_64(0, x11); 1297 ASSERT_EQUAL_64(0, x12); 1298 ASSERT_EQUAL_64(1, x13); 1299 ASSERT_EQUAL_64(0xffffffff, x14); 1300 ASSERT_EQUAL_64(0, x20); 1301 ASSERT_EQUAL_64(0xffffffff00000001, x21); 1302 ASSERT_EQUAL_64(0xffffffff, x22); 1303 ASSERT_EQUAL_64(0xffffffffffffffff, x23); 1304 1305 TEARDOWN(); 1306 } 1307 1308 1309 static void SmullHelper(int64_t expected, int64_t a, int64_t b) { 1310 SETUP(); 1311 START(); 1312 __ Mov(w0, a); 1313 __ Mov(w1, b); 1314 __ Smull(x2, w0, w1); 1315 END(); 1316 RUN(); 1317 ASSERT_EQUAL_64(expected, x2); 1318 TEARDOWN(); 1319 } 1320 1321 1322 TEST(smull) { 1323 SmullHelper(0, 0, 0); 1324 SmullHelper(1, 1, 1); 1325 SmullHelper(-1, -1, 1); 1326 SmullHelper(1, -1, -1); 1327 SmullHelper(0xffffffff80000000, 0x80000000, 1); 1328 SmullHelper(0x0000000080000000, 0x00010000, 0x00008000); 1329 } 1330 1331 1332 TEST(madd) { 1333 SETUP(); 1334 1335 START(); 1336 __ Mov(x16, 0); 1337 __ Mov(x17, 1); 1338 __ Mov(x18, 0xffffffff); 1339 __ Mov(x19, 0xffffffffffffffff); 1340 1341 __ Madd(w0, w16, w16, w16); 1342 __ Madd(w1, w16, w16, w17); 1343 __ Madd(w2, w16, w16, w18); 1344 __ Madd(w3, w16, w16, w19); 1345 __ Madd(w4, w16, w17, w17); 1346 __ Madd(w5, w17, w17, w18); 1347 __ Madd(w6, w17, w17, w19); 1348 __ Madd(w7, w17, w18, w16); 1349 __ Madd(w8, w17, w18, w18); 1350 __ Madd(w9, w18, w18, w17); 1351 __ Madd(w10, w18, w19, w18); 1352 __ Madd(w11, w19, w19, w19); 1353 1354 __ Madd(x12, x16, x16, x16); 1355 __ Madd(x13, x16, x16, x17); 1356 __ Madd(x14, x16, x16, x18); 1357 __ Madd(x15, x16, x16, x19); 1358 __ Madd(x20, x16, x17, x17); 1359 __ Madd(x21, x17, x17, x18); 1360 __ Madd(x22, x17, x17, x19); 1361 __ Madd(x23, x17, x18, x16); 1362 __ Madd(x24, x17, x18, x18); 1363 __ Madd(x25, x18, x18, x17); 1364 __ Madd(x26, x18, x19, x18); 1365 __ Madd(x27, x19, x19, x19); 1366 1367 END(); 1368 1369 RUN(); 1370 1371 ASSERT_EQUAL_64(0, x0); 1372 ASSERT_EQUAL_64(1, x1); 1373 ASSERT_EQUAL_64(0xffffffff, x2); 1374 ASSERT_EQUAL_64(0xffffffff, x3); 1375 ASSERT_EQUAL_64(1, x4); 1376 ASSERT_EQUAL_64(0, x5); 1377 ASSERT_EQUAL_64(0, x6); 1378 ASSERT_EQUAL_64(0xffffffff, x7); 1379 ASSERT_EQUAL_64(0xfffffffe, x8); 1380 ASSERT_EQUAL_64(2, x9); 1381 ASSERT_EQUAL_64(0, x10); 1382 ASSERT_EQUAL_64(0, x11); 1383 1384 ASSERT_EQUAL_64(0, x12); 1385 ASSERT_EQUAL_64(1, x13); 1386 ASSERT_EQUAL_64(0x00000000ffffffff, x14); 1387 ASSERT_EQUAL_64(0xffffffffffffffff, x15); 1388 ASSERT_EQUAL_64(1, x20); 1389 ASSERT_EQUAL_64(0x0000000100000000, x21); 1390 ASSERT_EQUAL_64(0, x22); 1391 ASSERT_EQUAL_64(0x00000000ffffffff, x23); 1392 ASSERT_EQUAL_64(0x00000001fffffffe, x24); 1393 ASSERT_EQUAL_64(0xfffffffe00000002, x25); 1394 ASSERT_EQUAL_64(0, x26); 1395 ASSERT_EQUAL_64(0, x27); 1396 1397 TEARDOWN(); 1398 } 1399 1400 1401 TEST(msub) { 1402 SETUP(); 1403 1404 START(); 1405 __ Mov(x16, 0); 1406 __ Mov(x17, 1); 1407 __ Mov(x18, 0xffffffff); 1408 __ Mov(x19, 0xffffffffffffffff); 1409 1410 __ Msub(w0, w16, w16, w16); 1411 __ Msub(w1, w16, w16, w17); 1412 __ Msub(w2, w16, w16, w18); 1413 __ Msub(w3, w16, w16, w19); 1414 __ Msub(w4, w16, w17, w17); 1415 __ Msub(w5, w17, w17, w18); 1416 __ Msub(w6, w17, w17, w19); 1417 __ Msub(w7, w17, w18, w16); 1418 __ Msub(w8, w17, w18, w18); 1419 __ Msub(w9, w18, w18, w17); 1420 __ Msub(w10, w18, w19, w18); 1421 __ Msub(w11, w19, w19, w19); 1422 1423 __ Msub(x12, x16, x16, x16); 1424 __ Msub(x13, x16, x16, x17); 1425 __ Msub(x14, x16, x16, x18); 1426 __ Msub(x15, x16, x16, x19); 1427 __ Msub(x20, x16, x17, x17); 1428 __ Msub(x21, x17, x17, x18); 1429 __ Msub(x22, x17, x17, x19); 1430 __ Msub(x23, x17, x18, x16); 1431 __ Msub(x24, x17, x18, x18); 1432 __ Msub(x25, x18, x18, x17); 1433 __ Msub(x26, x18, x19, x18); 1434 __ Msub(x27, x19, x19, x19); 1435 1436 END(); 1437 1438 RUN(); 1439 1440 ASSERT_EQUAL_64(0, x0); 1441 ASSERT_EQUAL_64(1, x1); 1442 ASSERT_EQUAL_64(0xffffffff, x2); 1443 ASSERT_EQUAL_64(0xffffffff, x3); 1444 ASSERT_EQUAL_64(1, x4); 1445 ASSERT_EQUAL_64(0xfffffffe, x5); 1446 ASSERT_EQUAL_64(0xfffffffe, x6); 1447 ASSERT_EQUAL_64(1, x7); 1448 ASSERT_EQUAL_64(0, x8); 1449 ASSERT_EQUAL_64(0, x9); 1450 ASSERT_EQUAL_64(0xfffffffe, x10); 1451 ASSERT_EQUAL_64(0xfffffffe, x11); 1452 1453 ASSERT_EQUAL_64(0, x12); 1454 ASSERT_EQUAL_64(1, x13); 1455 ASSERT_EQUAL_64(0x00000000ffffffff, x14); 1456 ASSERT_EQUAL_64(0xffffffffffffffff, x15); 1457 ASSERT_EQUAL_64(1, x20); 1458 ASSERT_EQUAL_64(0x00000000fffffffe, x21); 1459 ASSERT_EQUAL_64(0xfffffffffffffffe, x22); 1460 ASSERT_EQUAL_64(0xffffffff00000001, x23); 1461 ASSERT_EQUAL_64(0, x24); 1462 ASSERT_EQUAL_64(0x0000000200000000, x25); 1463 ASSERT_EQUAL_64(0x00000001fffffffe, x26); 1464 ASSERT_EQUAL_64(0xfffffffffffffffe, x27); 1465 1466 TEARDOWN(); 1467 } 1468 1469 1470 TEST(smulh) { 1471 SETUP(); 1472 1473 START(); 1474 __ Mov(x20, 0); 1475 __ Mov(x21, 1); 1476 __ Mov(x22, 0x0000000100000000); 1477 __ Mov(x23, 0x0000000012345678); 1478 __ Mov(x24, 0x0123456789abcdef); 1479 __ Mov(x25, 0x0000000200000000); 1480 __ Mov(x26, 0x8000000000000000); 1481 __ Mov(x27, 0xffffffffffffffff); 1482 __ Mov(x28, 0x5555555555555555); 1483 __ Mov(x29, 0xaaaaaaaaaaaaaaaa); 1484 1485 __ Smulh(x0, x20, x24); 1486 __ Smulh(x1, x21, x24); 1487 __ Smulh(x2, x22, x23); 1488 __ Smulh(x3, x22, x24); 1489 __ Smulh(x4, x24, x25); 1490 __ Smulh(x5, x23, x27); 1491 __ Smulh(x6, x26, x26); 1492 __ Smulh(x7, x26, x27); 1493 __ Smulh(x8, x27, x27); 1494 __ Smulh(x9, x28, x28); 1495 __ Smulh(x10, x28, x29); 1496 __ Smulh(x11, x29, x29); 1497 END(); 1498 1499 RUN(); 1500 1501 ASSERT_EQUAL_64(0, x0); 1502 ASSERT_EQUAL_64(0, x1); 1503 ASSERT_EQUAL_64(0, x2); 1504 ASSERT_EQUAL_64(0x0000000001234567, x3); 1505 ASSERT_EQUAL_64(0x0000000002468acf, x4); 1506 ASSERT_EQUAL_64(0xffffffffffffffff, x5); 1507 ASSERT_EQUAL_64(0x4000000000000000, x6); 1508 ASSERT_EQUAL_64(0, x7); 1509 ASSERT_EQUAL_64(0, x8); 1510 ASSERT_EQUAL_64(0x1c71c71c71c71c71, x9); 1511 ASSERT_EQUAL_64(0xe38e38e38e38e38e, x10); 1512 ASSERT_EQUAL_64(0x1c71c71c71c71c72, x11); 1513 1514 TEARDOWN(); 1515 } 1516 1517 1518 TEST(umulh) { 1519 SETUP(); 1520 1521 START(); 1522 __ Mov(x20, 0); 1523 __ Mov(x21, 1); 1524 __ Mov(x22, 0x0000000100000000); 1525 __ Mov(x23, 0x0000000012345678); 1526 __ Mov(x24, 0x0123456789abcdef); 1527 __ Mov(x25, 0x0000000200000000); 1528 __ Mov(x26, 0x8000000000000000); 1529 __ Mov(x27, 0xffffffffffffffff); 1530 __ Mov(x28, 0x5555555555555555); 1531 __ Mov(x29, 0xaaaaaaaaaaaaaaaa); 1532 1533 __ Umulh(x0, x20, x24); 1534 __ Umulh(x1, x21, x24); 1535 __ Umulh(x2, x22, x23); 1536 __ Umulh(x3, x22, x24); 1537 __ Umulh(x4, x24, x25); 1538 __ Umulh(x5, x23, x27); 1539 __ Umulh(x6, x26, x26); 1540 __ Umulh(x7, x26, x27); 1541 __ Umulh(x8, x27, x27); 1542 __ Umulh(x9, x28, x28); 1543 __ Umulh(x10, x28, x29); 1544 __ Umulh(x11, x29, x29); 1545 END(); 1546 1547 RUN(); 1548 1549 ASSERT_EQUAL_64(0, x0); 1550 ASSERT_EQUAL_64(0, x1); 1551 ASSERT_EQUAL_64(0, x2); 1552 ASSERT_EQUAL_64(0x0000000001234567, x3); 1553 ASSERT_EQUAL_64(0x0000000002468acf, x4); 1554 ASSERT_EQUAL_64(0x0000000012345677, x5); 1555 ASSERT_EQUAL_64(0x4000000000000000, x6); 1556 ASSERT_EQUAL_64(0x7fffffffffffffff, x7); 1557 ASSERT_EQUAL_64(0xfffffffffffffffe, x8); 1558 ASSERT_EQUAL_64(0x1c71c71c71c71c71, x9); 1559 ASSERT_EQUAL_64(0x38e38e38e38e38e3, x10); 1560 ASSERT_EQUAL_64(0x71c71c71c71c71c6, x11); 1561 1562 TEARDOWN(); 1563 } 1564 1565 1566 TEST(smaddl_umaddl_umull) { 1567 SETUP(); 1568 1569 START(); 1570 __ Mov(x17, 1); 1571 __ Mov(x18, 0x00000000ffffffff); 1572 __ Mov(x19, 0xffffffffffffffff); 1573 __ Mov(x20, 4); 1574 __ Mov(x21, 0x0000000200000000); 1575 1576 __ Smaddl(x9, w17, w18, x20); 1577 __ Smaddl(x10, w18, w18, x20); 1578 __ Smaddl(x11, w19, w19, x20); 1579 __ Smaddl(x12, w19, w19, x21); 1580 __ Umaddl(x13, w17, w18, x20); 1581 __ Umaddl(x14, w18, w18, x20); 1582 __ Umaddl(x15, w19, w19, x20); 1583 __ Umaddl(x22, w19, w19, x21); 1584 __ Umull(x24, w19, w19); 1585 __ Umull(x25, w17, w18); 1586 END(); 1587 1588 RUN(); 1589 1590 ASSERT_EQUAL_64(3, x9); 1591 ASSERT_EQUAL_64(5, x10); 1592 ASSERT_EQUAL_64(5, x11); 1593 ASSERT_EQUAL_64(0x0000000200000001, x12); 1594 ASSERT_EQUAL_64(0x0000000100000003, x13); 1595 ASSERT_EQUAL_64(0xfffffffe00000005, x14); 1596 ASSERT_EQUAL_64(0xfffffffe00000005, x15); 1597 ASSERT_EQUAL_64(1, x22); 1598 ASSERT_EQUAL_64(0xfffffffe00000001, x24); 1599 ASSERT_EQUAL_64(0x00000000ffffffff, x25); 1600 1601 TEARDOWN(); 1602 } 1603 1604 1605 TEST(smsubl_umsubl) { 1606 SETUP(); 1607 1608 START(); 1609 __ Mov(x17, 1); 1610 __ Mov(x18, 0x00000000ffffffff); 1611 __ Mov(x19, 0xffffffffffffffff); 1612 __ Mov(x20, 4); 1613 __ Mov(x21, 0x0000000200000000); 1614 1615 __ Smsubl(x9, w17, w18, x20); 1616 __ Smsubl(x10, w18, w18, x20); 1617 __ Smsubl(x11, w19, w19, x20); 1618 __ Smsubl(x12, w19, w19, x21); 1619 __ Umsubl(x13, w17, w18, x20); 1620 __ Umsubl(x14, w18, w18, x20); 1621 __ Umsubl(x15, w19, w19, x20); 1622 __ Umsubl(x22, w19, w19, x21); 1623 END(); 1624 1625 RUN(); 1626 1627 ASSERT_EQUAL_64(5, x9); 1628 ASSERT_EQUAL_64(3, x10); 1629 ASSERT_EQUAL_64(3, x11); 1630 ASSERT_EQUAL_64(0x00000001ffffffff, x12); 1631 ASSERT_EQUAL_64(0xffffffff00000005, x13); 1632 ASSERT_EQUAL_64(0x0000000200000003, x14); 1633 ASSERT_EQUAL_64(0x0000000200000003, x15); 1634 ASSERT_EQUAL_64(0x00000003ffffffff, x22); 1635 1636 TEARDOWN(); 1637 } 1638 1639 1640 TEST(div) { 1641 SETUP(); 1642 1643 START(); 1644 __ Mov(x16, 1); 1645 __ Mov(x17, 0xffffffff); 1646 __ Mov(x18, 0xffffffffffffffff); 1647 __ Mov(x19, 0x80000000); 1648 __ Mov(x20, 0x8000000000000000); 1649 __ Mov(x21, 2); 1650 1651 __ Udiv(w0, w16, w16); 1652 __ Udiv(w1, w17, w16); 1653 __ Sdiv(w2, w16, w16); 1654 __ Sdiv(w3, w16, w17); 1655 __ Sdiv(w4, w17, w18); 1656 1657 __ Udiv(x5, x16, x16); 1658 __ Udiv(x6, x17, x18); 1659 __ Sdiv(x7, x16, x16); 1660 __ Sdiv(x8, x16, x17); 1661 __ Sdiv(x9, x17, x18); 1662 1663 __ Udiv(w10, w19, w21); 1664 __ Sdiv(w11, w19, w21); 1665 __ Udiv(x12, x19, x21); 1666 __ Sdiv(x13, x19, x21); 1667 __ Udiv(x14, x20, x21); 1668 __ Sdiv(x15, x20, x21); 1669 1670 __ Udiv(w22, w19, w17); 1671 __ Sdiv(w23, w19, w17); 1672 __ Udiv(x24, x20, x18); 1673 __ Sdiv(x25, x20, x18); 1674 1675 __ Udiv(x26, x16, x21); 1676 __ Sdiv(x27, x16, x21); 1677 __ Udiv(x28, x18, x21); 1678 __ Sdiv(x29, x18, x21); 1679 1680 __ Mov(x17, 0); 1681 __ Udiv(w18, w16, w17); 1682 __ Sdiv(w19, w16, w17); 1683 __ Udiv(x20, x16, x17); 1684 __ Sdiv(x21, x16, x17); 1685 END(); 1686 1687 RUN(); 1688 1689 ASSERT_EQUAL_64(1, x0); 1690 ASSERT_EQUAL_64(0xffffffff, x1); 1691 ASSERT_EQUAL_64(1, x2); 1692 ASSERT_EQUAL_64(0xffffffff, x3); 1693 ASSERT_EQUAL_64(1, x4); 1694 ASSERT_EQUAL_64(1, x5); 1695 ASSERT_EQUAL_64(0, x6); 1696 ASSERT_EQUAL_64(1, x7); 1697 ASSERT_EQUAL_64(0, x8); 1698 ASSERT_EQUAL_64(0xffffffff00000001, x9); 1699 ASSERT_EQUAL_64(0x40000000, x10); 1700 ASSERT_EQUAL_64(0xc0000000, x11); 1701 ASSERT_EQUAL_64(0x0000000040000000, x12); 1702 ASSERT_EQUAL_64(0x0000000040000000, x13); 1703 ASSERT_EQUAL_64(0x4000000000000000, x14); 1704 ASSERT_EQUAL_64(0xc000000000000000, x15); 1705 ASSERT_EQUAL_64(0, x22); 1706 ASSERT_EQUAL_64(0x80000000, x23); 1707 ASSERT_EQUAL_64(0, x24); 1708 ASSERT_EQUAL_64(0x8000000000000000, x25); 1709 ASSERT_EQUAL_64(0, x26); 1710 ASSERT_EQUAL_64(0, x27); 1711 ASSERT_EQUAL_64(0x7fffffffffffffff, x28); 1712 ASSERT_EQUAL_64(0, x29); 1713 ASSERT_EQUAL_64(0, x18); 1714 ASSERT_EQUAL_64(0, x19); 1715 ASSERT_EQUAL_64(0, x20); 1716 ASSERT_EQUAL_64(0, x21); 1717 1718 TEARDOWN(); 1719 } 1720 1721 1722 TEST(rbit_rev) { 1723 SETUP(); 1724 1725 START(); 1726 __ Mov(x24, 0xfedcba9876543210); 1727 __ Rbit(w0, w24); 1728 __ Rbit(x1, x24); 1729 __ Rev16(w2, w24); 1730 __ Rev16(x3, x24); 1731 __ Rev(w4, w24); 1732 __ Rev32(x5, x24); 1733 __ Rev(x6, x24); 1734 END(); 1735 1736 RUN(); 1737 1738 ASSERT_EQUAL_64(0x084c2a6e, x0); 1739 ASSERT_EQUAL_64(0x084c2a6e195d3b7f, x1); 1740 ASSERT_EQUAL_64(0x54761032, x2); 1741 ASSERT_EQUAL_64(0xdcfe98ba54761032, x3); 1742 ASSERT_EQUAL_64(0x10325476, x4); 1743 ASSERT_EQUAL_64(0x98badcfe10325476, x5); 1744 ASSERT_EQUAL_64(0x1032547698badcfe, x6); 1745 1746 TEARDOWN(); 1747 } 1748 1749 typedef void (MacroAssembler::*TestBranchSignature)(const Register& rt, 1750 unsigned bit_pos, 1751 Label* label); 1752 1753 static void TbzRangePoolLimitHelper(TestBranchSignature test_branch) { 1754 const int kTbzRange = 32768; 1755 const int kNumLdrLiteral = kTbzRange / 4; 1756 const int fuzzRange = 2; 1757 for (int n = kNumLdrLiteral - fuzzRange; n <= kNumLdrLiteral + fuzzRange; 1758 ++n) { 1759 for (int margin = -32; margin < 32; margin += 4) { 1760 SETUP(); 1761 1762 START(); 1763 1764 // Emit 32KB of literals (equal to the range of TBZ). 1765 for (int i = 0; i < n; ++i) { 1766 __ Ldr(w0, 0x12345678); 1767 } 1768 1769 const int kLiteralMargin = 128 * KBytes; 1770 1771 // Emit enough NOPs to be just about to emit the literal pool. 1772 ptrdiff_t end = 1773 masm.GetCursorOffset() + (kLiteralMargin - n * 4 + margin); 1774 while (masm.GetCursorOffset() < end) { 1775 __ Nop(); 1776 } 1777 1778 // Add a TBZ instruction. 1779 Label label; 1780 1781 (masm.*test_branch)(x0, 2, &label); 1782 1783 // Add enough NOPs to surpass its range, to make sure we can encode the 1784 // veneer. 1785 end = masm.GetCursorOffset() + (kTbzRange - 4); 1786 { 1787 ExactAssemblyScope scope(&masm, 1788 kTbzRange, 1789 ExactAssemblyScope::kMaximumSize); 1790 while (masm.GetCursorOffset() < end) __ nop(); 1791 } 1792 1793 // Finally, bind the label. 1794 __ Bind(&label); 1795 1796 END(); 1797 1798 RUN(); 1799 1800 TEARDOWN(); 1801 } 1802 } 1803 } 1804 1805 TEST(test_branch_limits_literal_pool_size) { 1806 TbzRangePoolLimitHelper(&MacroAssembler::Tbz); 1807 TbzRangePoolLimitHelper(&MacroAssembler::Tbnz); 1808 } 1809 1810 TEST(clz_cls) { 1811 SETUP(); 1812 1813 START(); 1814 __ Mov(x24, 0x0008000000800000); 1815 __ Mov(x25, 0xff800000fff80000); 1816 __ Mov(x26, 0); 1817 __ Clz(w0, w24); 1818 __ Clz(x1, x24); 1819 __ Clz(w2, w25); 1820 __ Clz(x3, x25); 1821 __ Clz(w4, w26); 1822 __ Clz(x5, x26); 1823 __ Cls(w6, w24); 1824 __ Cls(x7, x24); 1825 __ Cls(w8, w25); 1826 __ Cls(x9, x25); 1827 __ Cls(w10, w26); 1828 __ Cls(x11, x26); 1829 END(); 1830 1831 RUN(); 1832 1833 ASSERT_EQUAL_64(8, x0); 1834 ASSERT_EQUAL_64(12, x1); 1835 ASSERT_EQUAL_64(0, x2); 1836 ASSERT_EQUAL_64(0, x3); 1837 ASSERT_EQUAL_64(32, x4); 1838 ASSERT_EQUAL_64(64, x5); 1839 ASSERT_EQUAL_64(7, x6); 1840 ASSERT_EQUAL_64(11, x7); 1841 ASSERT_EQUAL_64(12, x8); 1842 ASSERT_EQUAL_64(8, x9); 1843 ASSERT_EQUAL_64(31, x10); 1844 ASSERT_EQUAL_64(63, x11); 1845 1846 TEARDOWN(); 1847 } 1848 1849 1850 TEST(label) { 1851 SETUP(); 1852 1853 Label label_1, label_2, label_3, label_4; 1854 1855 START(); 1856 __ Mov(x0, 0x1); 1857 __ Mov(x1, 0x0); 1858 __ Mov(x22, lr); // Save lr. 1859 1860 __ B(&label_1); 1861 __ B(&label_1); 1862 __ B(&label_1); // Multiple branches to the same label. 1863 __ Mov(x0, 0x0); 1864 __ Bind(&label_2); 1865 __ B(&label_3); // Forward branch. 1866 __ Mov(x0, 0x0); 1867 __ Bind(&label_1); 1868 __ B(&label_2); // Backward branch. 1869 __ Mov(x0, 0x0); 1870 __ Bind(&label_3); 1871 __ Bl(&label_4); 1872 END(); 1873 1874 __ Bind(&label_4); 1875 __ Mov(x1, 0x1); 1876 __ Mov(lr, x22); 1877 END(); 1878 1879 RUN(); 1880 1881 ASSERT_EQUAL_64(0x1, x0); 1882 ASSERT_EQUAL_64(0x1, x1); 1883 1884 TEARDOWN(); 1885 } 1886 1887 1888 TEST(label_2) { 1889 SETUP(); 1890 1891 Label label_1, label_2, label_3; 1892 Label first_jump_to_3; 1893 1894 START(); 1895 __ Mov(x0, 0x0); 1896 1897 __ B(&label_1); 1898 ptrdiff_t offset_2 = masm.GetCursorOffset(); 1899 __ Orr(x0, x0, 1 << 1); 1900 __ B(&label_3); 1901 ptrdiff_t offset_1 = masm.GetCursorOffset(); 1902 __ Orr(x0, x0, 1 << 0); 1903 __ B(&label_2); 1904 ptrdiff_t offset_3 = masm.GetCursorOffset(); 1905 __ Tbz(x0, 2, &first_jump_to_3); 1906 __ Orr(x0, x0, 1 << 3); 1907 __ Bind(&first_jump_to_3); 1908 __ Orr(x0, x0, 1 << 2); 1909 __ Tbz(x0, 3, &label_3); 1910 1911 // Labels 1, 2, and 3 are bound before the current buffer offset. Branches to 1912 // label_1 and label_2 branch respectively forward and backward. Branches to 1913 // label 3 include both forward and backward branches. 1914 masm.BindToOffset(&label_1, offset_1); 1915 masm.BindToOffset(&label_2, offset_2); 1916 masm.BindToOffset(&label_3, offset_3); 1917 1918 END(); 1919 1920 RUN(); 1921 1922 ASSERT_EQUAL_64(0xf, x0); 1923 1924 TEARDOWN(); 1925 } 1926 1927 1928 TEST(adr) { 1929 SETUP(); 1930 1931 Label label_1, label_2, label_3, label_4; 1932 1933 START(); 1934 __ Mov(x0, 0x0); // Set to non-zero to indicate failure. 1935 __ Adr(x1, &label_3); // Set to zero to indicate success. 1936 1937 __ Adr(x2, &label_1); // Multiple forward references to the same label. 1938 __ Adr(x3, &label_1); 1939 __ Adr(x4, &label_1); 1940 1941 __ Bind(&label_2); 1942 __ Eor(x5, x2, Operand(x3)); // Ensure that x2,x3 and x4 are identical. 1943 __ Eor(x6, x2, Operand(x4)); 1944 __ Orr(x0, x0, Operand(x5)); 1945 __ Orr(x0, x0, Operand(x6)); 1946 __ Br(x2); // label_1, label_3 1947 1948 __ Bind(&label_3); 1949 __ Adr(x2, &label_3); // Self-reference (offset 0). 1950 __ Eor(x1, x1, Operand(x2)); 1951 __ Adr(x2, &label_4); // Simple forward reference. 1952 __ Br(x2); // label_4 1953 1954 __ Bind(&label_1); 1955 __ Adr(x2, &label_3); // Multiple reverse references to the same label. 1956 __ Adr(x3, &label_3); 1957 __ Adr(x4, &label_3); 1958 __ Adr(x5, &label_2); // Simple reverse reference. 1959 __ Br(x5); // label_2 1960 1961 __ Bind(&label_4); 1962 END(); 1963 1964 RUN(); 1965 1966 ASSERT_EQUAL_64(0x0, x0); 1967 ASSERT_EQUAL_64(0x0, x1); 1968 1969 TEARDOWN(); 1970 } 1971 1972 1973 // Simple adrp tests: check that labels are linked and handled properly. 1974 // This is similar to the adr test, but all the adrp instructions are put on the 1975 // same page so that they return the same value. 1976 TEST(adrp) { 1977 Label start; 1978 Label label_1, label_2, label_3; 1979 1980 SETUP_CUSTOM(2 * kPageSize, PageOffsetDependentCode); 1981 START(); 1982 1983 // Waste space until the start of a page. 1984 { 1985 ExactAssemblyScope scope(&masm, 1986 kPageSize, 1987 ExactAssemblyScope::kMaximumSize); 1988 const uintptr_t kPageOffsetMask = kPageSize - 1; 1989 while ((masm.GetCursorAddress<uintptr_t>() & kPageOffsetMask) != 0) { 1990 __ b(&start); 1991 } 1992 __ bind(&start); 1993 } 1994 1995 // Simple forward reference. 1996 __ Adrp(x0, &label_2); 1997 1998 __ Bind(&label_1); 1999 2000 // Multiple forward references to the same label. 2001 __ Adrp(x1, &label_3); 2002 __ Adrp(x2, &label_3); 2003 __ Adrp(x3, &label_3); 2004 2005 __ Bind(&label_2); 2006 2007 // Self-reference (offset 0). 2008 __ Adrp(x4, &label_2); 2009 2010 __ Bind(&label_3); 2011 2012 // Simple reverse reference. 2013 __ Adrp(x5, &label_1); 2014 2015 // Multiple reverse references to the same label. 2016 __ Adrp(x6, &label_2); 2017 __ Adrp(x7, &label_2); 2018 __ Adrp(x8, &label_2); 2019 2020 VIXL_ASSERT(masm.GetSizeOfCodeGeneratedSince(&start) < kPageSize); 2021 END(); 2022 RUN_CUSTOM(); 2023 2024 uint64_t expected = reinterpret_cast<uint64_t>( 2025 AlignDown(masm.GetLabelAddress<uint64_t*>(&start), kPageSize)); 2026 ASSERT_EQUAL_64(expected, x0); 2027 ASSERT_EQUAL_64(expected, x1); 2028 ASSERT_EQUAL_64(expected, x2); 2029 ASSERT_EQUAL_64(expected, x3); 2030 ASSERT_EQUAL_64(expected, x4); 2031 ASSERT_EQUAL_64(expected, x5); 2032 ASSERT_EQUAL_64(expected, x6); 2033 ASSERT_EQUAL_64(expected, x7); 2034 ASSERT_EQUAL_64(expected, x8); 2035 2036 TEARDOWN_CUSTOM(); 2037 } 2038 2039 2040 static void AdrpPageBoundaryHelper(unsigned offset_into_page) { 2041 VIXL_ASSERT(offset_into_page < kPageSize); 2042 VIXL_ASSERT((offset_into_page % kInstructionSize) == 0); 2043 2044 const uintptr_t kPageOffsetMask = kPageSize - 1; 2045 2046 // The test label is always bound on page 0. Adrp instructions are generated 2047 // on pages from kStartPage to kEndPage (inclusive). 2048 const int kStartPage = -16; 2049 const int kEndPage = 16; 2050 const int kMaxCodeSize = (kEndPage - kStartPage + 2) * kPageSize; 2051 2052 SETUP_CUSTOM(kMaxCodeSize, PageOffsetDependentCode); 2053 START(); 2054 2055 Label test; 2056 Label start; 2057 2058 { 2059 ExactAssemblyScope scope(&masm, 2060 kMaxCodeSize, 2061 ExactAssemblyScope::kMaximumSize); 2062 // Initialize NZCV with `eq` flags. 2063 __ cmp(wzr, wzr); 2064 // Waste space until the start of a page. 2065 while ((masm.GetCursorAddress<uintptr_t>() & kPageOffsetMask) != 0) { 2066 __ b(&start); 2067 } 2068 2069 // The first page. 2070 VIXL_STATIC_ASSERT(kStartPage < 0); 2071 { 2072 ExactAssemblyScope scope_page(&masm, kPageSize); 2073 __ bind(&start); 2074 __ adrp(x0, &test); 2075 __ adrp(x1, &test); 2076 for (size_t i = 2; i < (kPageSize / kInstructionSize); i += 2) { 2077 __ ccmp(x0, x1, NoFlag, eq); 2078 __ adrp(x1, &test); 2079 } 2080 } 2081 2082 // Subsequent pages. 2083 VIXL_STATIC_ASSERT(kEndPage >= 0); 2084 for (int page = (kStartPage + 1); page <= kEndPage; page++) { 2085 ExactAssemblyScope scope_page(&masm, kPageSize); 2086 if (page == 0) { 2087 for (size_t i = 0; i < (kPageSize / kInstructionSize);) { 2088 if (i++ == (offset_into_page / kInstructionSize)) __ bind(&test); 2089 __ ccmp(x0, x1, NoFlag, eq); 2090 if (i++ == (offset_into_page / kInstructionSize)) __ bind(&test); 2091 __ adrp(x1, &test); 2092 } 2093 } else { 2094 for (size_t i = 0; i < (kPageSize / kInstructionSize); i += 2) { 2095 __ ccmp(x0, x1, NoFlag, eq); 2096 __ adrp(x1, &test); 2097 } 2098 } 2099 } 2100 } 2101 2102 // Every adrp instruction pointed to the same label (`test`), so they should 2103 // all have produced the same result. 2104 2105 END(); 2106 RUN_CUSTOM(); 2107 2108 uintptr_t expected = 2109 AlignDown(masm.GetLabelAddress<uintptr_t>(&test), kPageSize); 2110 ASSERT_EQUAL_64(expected, x0); 2111 ASSERT_EQUAL_64(expected, x1); 2112 ASSERT_EQUAL_NZCV(ZCFlag); 2113 2114 TEARDOWN_CUSTOM(); 2115 } 2116 2117 2118 // Test that labels are correctly referenced by adrp across page boundaries. 2119 TEST(adrp_page_boundaries) { 2120 VIXL_STATIC_ASSERT(kPageSize == 4096); 2121 AdrpPageBoundaryHelper(kInstructionSize * 0); 2122 AdrpPageBoundaryHelper(kInstructionSize * 1); 2123 AdrpPageBoundaryHelper(kInstructionSize * 512); 2124 AdrpPageBoundaryHelper(kInstructionSize * 1022); 2125 AdrpPageBoundaryHelper(kInstructionSize * 1023); 2126 } 2127 2128 2129 static void AdrpOffsetHelper(int64_t offset) { 2130 const size_t kPageOffsetMask = kPageSize - 1; 2131 const int kMaxCodeSize = 2 * kPageSize; 2132 2133 SETUP_CUSTOM(kMaxCodeSize, PageOffsetDependentCode); 2134 START(); 2135 2136 Label page; 2137 2138 { 2139 ExactAssemblyScope scope(&masm, 2140 kMaxCodeSize, 2141 ExactAssemblyScope::kMaximumSize); 2142 // Initialize NZCV with `eq` flags. 2143 __ cmp(wzr, wzr); 2144 // Waste space until the start of a page. 2145 while ((masm.GetCursorAddress<uintptr_t>() & kPageOffsetMask) != 0) { 2146 __ b(&page); 2147 } 2148 __ bind(&page); 2149 2150 { 2151 ExactAssemblyScope scope_page(&masm, kPageSize); 2152 // Every adrp instruction on this page should return the same value. 2153 __ adrp(x0, offset); 2154 __ adrp(x1, offset); 2155 for (size_t i = 2; i < kPageSize / kInstructionSize; i += 2) { 2156 __ ccmp(x0, x1, NoFlag, eq); 2157 __ adrp(x1, offset); 2158 } 2159 } 2160 } 2161 2162 END(); 2163 RUN_CUSTOM(); 2164 2165 uintptr_t expected = 2166 masm.GetLabelAddress<uintptr_t>(&page) + (kPageSize * offset); 2167 ASSERT_EQUAL_64(expected, x0); 2168 ASSERT_EQUAL_64(expected, x1); 2169 ASSERT_EQUAL_NZCV(ZCFlag); 2170 2171 TEARDOWN_CUSTOM(); 2172 } 2173 2174 2175 // Check that adrp produces the correct result for a specific offset. 2176 TEST(adrp_offset) { 2177 AdrpOffsetHelper(0); 2178 AdrpOffsetHelper(1); 2179 AdrpOffsetHelper(-1); 2180 AdrpOffsetHelper(4); 2181 AdrpOffsetHelper(-4); 2182 AdrpOffsetHelper(0x000fffff); 2183 AdrpOffsetHelper(-0x000fffff); 2184 AdrpOffsetHelper(-0x00100000); 2185 } 2186 2187 2188 TEST(branch_cond) { 2189 SETUP(); 2190 2191 Label done, wrong; 2192 2193 START(); 2194 __ Mov(x0, 0x1); 2195 __ Mov(x1, 0x1); 2196 __ Mov(x2, 0x8000000000000000); 2197 2198 // For each 'cmp' instruction below, condition codes other than the ones 2199 // following it would branch. 2200 2201 __ Cmp(x1, 0); 2202 __ B(&wrong, eq); 2203 __ B(&wrong, lo); 2204 __ B(&wrong, mi); 2205 __ B(&wrong, vs); 2206 __ B(&wrong, ls); 2207 __ B(&wrong, lt); 2208 __ B(&wrong, le); 2209 Label ok_1; 2210 __ B(&ok_1, ne); 2211 __ Mov(x0, 0x0); 2212 __ Bind(&ok_1); 2213 2214 __ Cmp(x1, 1); 2215 __ B(&wrong, ne); 2216 __ B(&wrong, lo); 2217 __ B(&wrong, mi); 2218 __ B(&wrong, vs); 2219 __ B(&wrong, hi); 2220 __ B(&wrong, lt); 2221 __ B(&wrong, gt); 2222 Label ok_2; 2223 __ B(&ok_2, pl); 2224 __ Mov(x0, 0x0); 2225 __ Bind(&ok_2); 2226 2227 __ Cmp(x1, 2); 2228 __ B(&wrong, eq); 2229 __ B(&wrong, hs); 2230 __ B(&wrong, pl); 2231 __ B(&wrong, vs); 2232 __ B(&wrong, hi); 2233 __ B(&wrong, ge); 2234 __ B(&wrong, gt); 2235 Label ok_3; 2236 __ B(&ok_3, vc); 2237 __ Mov(x0, 0x0); 2238 __ Bind(&ok_3); 2239 2240 __ Cmp(x2, 1); 2241 __ B(&wrong, eq); 2242 __ B(&wrong, lo); 2243 __ B(&wrong, mi); 2244 __ B(&wrong, vc); 2245 __ B(&wrong, ls); 2246 __ B(&wrong, ge); 2247 __ B(&wrong, gt); 2248 Label ok_4; 2249 __ B(&ok_4, le); 2250 __ Mov(x0, 0x0); 2251 __ Bind(&ok_4); 2252 2253 // The MacroAssembler does not allow al as a branch condition. 2254 Label ok_5; 2255 { 2256 ExactAssemblyScope scope(&masm, kInstructionSize); 2257 __ b(&ok_5, al); 2258 } 2259 __ Mov(x0, 0x0); 2260 __ Bind(&ok_5); 2261 2262 // The MacroAssembler does not allow nv as a branch condition. 2263 Label ok_6; 2264 { 2265 ExactAssemblyScope scope(&masm, kInstructionSize); 2266 __ b(&ok_6, nv); 2267 } 2268 __ Mov(x0, 0x0); 2269 __ Bind(&ok_6); 2270 2271 __ B(&done); 2272 2273 __ Bind(&wrong); 2274 __ Mov(x0, 0x0); 2275 2276 __ Bind(&done); 2277 END(); 2278 2279 RUN(); 2280 2281 ASSERT_EQUAL_64(0x1, x0); 2282 2283 TEARDOWN(); 2284 } 2285 2286 2287 TEST(branch_to_reg) { 2288 SETUP(); 2289 2290 // Test br. 2291 Label fn1, after_fn1; 2292 2293 START(); 2294 __ Mov(x29, lr); 2295 2296 __ Mov(x1, 0); 2297 __ B(&after_fn1); 2298 2299 __ Bind(&fn1); 2300 __ Mov(x0, lr); 2301 __ Mov(x1, 42); 2302 __ Br(x0); 2303 2304 __ Bind(&after_fn1); 2305 __ Bl(&fn1); 2306 2307 // Test blr. 2308 Label fn2, after_fn2; 2309 2310 __ Mov(x2, 0); 2311 __ B(&after_fn2); 2312 2313 __ Bind(&fn2); 2314 __ Mov(x0, lr); 2315 __ Mov(x2, 84); 2316 __ Blr(x0); 2317 2318 __ Bind(&after_fn2); 2319 __ Bl(&fn2); 2320 __ Mov(x3, lr); 2321 2322 __ Mov(lr, x29); 2323 END(); 2324 2325 RUN(); 2326 2327 ASSERT_EQUAL_64(core.xreg(3) + kInstructionSize, x0); 2328 ASSERT_EQUAL_64(42, x1); 2329 ASSERT_EQUAL_64(84, x2); 2330 2331 TEARDOWN(); 2332 } 2333 2334 2335 TEST(compare_branch) { 2336 SETUP(); 2337 2338 START(); 2339 __ Mov(x0, 0); 2340 __ Mov(x1, 0); 2341 __ Mov(x2, 0); 2342 __ Mov(x3, 0); 2343 __ Mov(x4, 0); 2344 __ Mov(x5, 0); 2345 __ Mov(x16, 0); 2346 __ Mov(x17, 42); 2347 2348 Label zt, zt_end; 2349 __ Cbz(w16, &zt); 2350 __ B(&zt_end); 2351 __ Bind(&zt); 2352 __ Mov(x0, 1); 2353 __ Bind(&zt_end); 2354 2355 Label zf, zf_end; 2356 __ Cbz(x17, &zf); 2357 __ B(&zf_end); 2358 __ Bind(&zf); 2359 __ Mov(x1, 1); 2360 __ Bind(&zf_end); 2361 2362 Label nzt, nzt_end; 2363 __ Cbnz(w17, &nzt); 2364 __ B(&nzt_end); 2365 __ Bind(&nzt); 2366 __ Mov(x2, 1); 2367 __ Bind(&nzt_end); 2368 2369 Label nzf, nzf_end; 2370 __ Cbnz(x16, &nzf); 2371 __ B(&nzf_end); 2372 __ Bind(&nzf); 2373 __ Mov(x3, 1); 2374 __ Bind(&nzf_end); 2375 2376 __ Mov(x18, 0xffffffff00000000); 2377 2378 Label a, a_end; 2379 __ Cbz(w18, &a); 2380 __ B(&a_end); 2381 __ Bind(&a); 2382 __ Mov(x4, 1); 2383 __ Bind(&a_end); 2384 2385 Label b, b_end; 2386 __ Cbnz(w18, &b); 2387 __ B(&b_end); 2388 __ Bind(&b); 2389 __ Mov(x5, 1); 2390 __ Bind(&b_end); 2391 2392 END(); 2393 2394 RUN(); 2395 2396 ASSERT_EQUAL_64(1, x0); 2397 ASSERT_EQUAL_64(0, x1); 2398 ASSERT_EQUAL_64(1, x2); 2399 ASSERT_EQUAL_64(0, x3); 2400 ASSERT_EQUAL_64(1, x4); 2401 ASSERT_EQUAL_64(0, x5); 2402 2403 TEARDOWN(); 2404 } 2405 2406 2407 TEST(test_branch) { 2408 SETUP(); 2409 2410 START(); 2411 __ Mov(x0, 0); 2412 __ Mov(x1, 0); 2413 __ Mov(x2, 0); 2414 __ Mov(x3, 0); 2415 __ Mov(x16, 0xaaaaaaaaaaaaaaaa); 2416 2417 Label bz, bz_end; 2418 __ Tbz(w16, 0, &bz); 2419 __ B(&bz_end); 2420 __ Bind(&bz); 2421 __ Mov(x0, 1); 2422 __ Bind(&bz_end); 2423 2424 Label bo, bo_end; 2425 __ Tbz(x16, 63, &bo); 2426 __ B(&bo_end); 2427 __ Bind(&bo); 2428 __ Mov(x1, 1); 2429 __ Bind(&bo_end); 2430 2431 Label nbz, nbz_end; 2432 __ Tbnz(x16, 61, &nbz); 2433 __ B(&nbz_end); 2434 __ Bind(&nbz); 2435 __ Mov(x2, 1); 2436 __ Bind(&nbz_end); 2437 2438 Label nbo, nbo_end; 2439 __ Tbnz(w16, 2, &nbo); 2440 __ B(&nbo_end); 2441 __ Bind(&nbo); 2442 __ Mov(x3, 1); 2443 __ Bind(&nbo_end); 2444 END(); 2445 2446 RUN(); 2447 2448 ASSERT_EQUAL_64(1, x0); 2449 ASSERT_EQUAL_64(0, x1); 2450 ASSERT_EQUAL_64(1, x2); 2451 ASSERT_EQUAL_64(0, x3); 2452 2453 TEARDOWN(); 2454 } 2455 2456 2457 TEST(branch_type) { 2458 SETUP(); 2459 2460 Label fail, done; 2461 2462 START(); 2463 __ Mov(x0, 0x0); 2464 __ Mov(x10, 0x7); 2465 __ Mov(x11, 0x0); 2466 2467 // Test non taken branches. 2468 __ Cmp(x10, 0x7); 2469 __ B(&fail, ne); 2470 __ B(&fail, never); 2471 __ B(&fail, reg_zero, x10); 2472 __ B(&fail, reg_not_zero, x11); 2473 __ B(&fail, reg_bit_clear, x10, 0); 2474 __ B(&fail, reg_bit_set, x10, 3); 2475 2476 // Test taken branches. 2477 Label l1, l2, l3, l4, l5; 2478 __ Cmp(x10, 0x7); 2479 __ B(&l1, eq); 2480 __ B(&fail); 2481 __ Bind(&l1); 2482 __ B(&l2, always); 2483 __ B(&fail); 2484 __ Bind(&l2); 2485 __ B(&l3, reg_not_zero, x10); 2486 __ B(&fail); 2487 __ Bind(&l3); 2488 __ B(&l4, reg_bit_clear, x10, 15); 2489 __ B(&fail); 2490 __ Bind(&l4); 2491 __ B(&l5, reg_bit_set, x10, 1); 2492 __ B(&fail); 2493 __ Bind(&l5); 2494 2495 __ B(&done); 2496 2497 __ Bind(&fail); 2498 __ Mov(x0, 0x1); 2499 2500 __ Bind(&done); 2501 2502 END(); 2503 2504 RUN(); 2505 2506 ASSERT_EQUAL_64(0x0, x0); 2507 2508 TEARDOWN(); 2509 } 2510 2511 2512 TEST(ldr_str_offset) { 2513 SETUP(); 2514 2515 uint64_t src[2] = {0xfedcba9876543210, 0x0123456789abcdef}; 2516 uint64_t dst[5] = {0, 0, 0, 0, 0}; 2517 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2518 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 2519 2520 START(); 2521 __ Mov(x17, src_base); 2522 __ Mov(x18, dst_base); 2523 __ Ldr(w0, MemOperand(x17)); 2524 __ Str(w0, MemOperand(x18)); 2525 __ Ldr(w1, MemOperand(x17, 4)); 2526 __ Str(w1, MemOperand(x18, 12)); 2527 __ Ldr(x2, MemOperand(x17, 8)); 2528 __ Str(x2, MemOperand(x18, 16)); 2529 __ Ldrb(w3, MemOperand(x17, 1)); 2530 __ Strb(w3, MemOperand(x18, 25)); 2531 __ Ldrh(w4, MemOperand(x17, 2)); 2532 __ Strh(w4, MemOperand(x18, 33)); 2533 END(); 2534 2535 RUN(); 2536 2537 ASSERT_EQUAL_64(0x76543210, x0); 2538 ASSERT_EQUAL_64(0x76543210, dst[0]); 2539 ASSERT_EQUAL_64(0xfedcba98, x1); 2540 ASSERT_EQUAL_64(0xfedcba9800000000, dst[1]); 2541 ASSERT_EQUAL_64(0x0123456789abcdef, x2); 2542 ASSERT_EQUAL_64(0x0123456789abcdef, dst[2]); 2543 ASSERT_EQUAL_64(0x32, x3); 2544 ASSERT_EQUAL_64(0x3200, dst[3]); 2545 ASSERT_EQUAL_64(0x7654, x4); 2546 ASSERT_EQUAL_64(0x765400, dst[4]); 2547 ASSERT_EQUAL_64(src_base, x17); 2548 ASSERT_EQUAL_64(dst_base, x18); 2549 2550 TEARDOWN(); 2551 } 2552 2553 2554 TEST(ldr_str_wide) { 2555 SETUP(); 2556 2557 uint32_t src[8192]; 2558 uint32_t dst[8192]; 2559 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2560 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 2561 memset(src, 0xaa, 8192 * sizeof(src[0])); 2562 memset(dst, 0xaa, 8192 * sizeof(dst[0])); 2563 src[0] = 0; 2564 src[6144] = 6144; 2565 src[8191] = 8191; 2566 2567 START(); 2568 __ Mov(x22, src_base); 2569 __ Mov(x23, dst_base); 2570 __ Mov(x24, src_base); 2571 __ Mov(x25, dst_base); 2572 __ Mov(x26, src_base); 2573 __ Mov(x27, dst_base); 2574 2575 __ Ldr(w0, MemOperand(x22, 8191 * sizeof(src[0]))); 2576 __ Str(w0, MemOperand(x23, 8191 * sizeof(dst[0]))); 2577 __ Ldr(w1, MemOperand(x24, 4096 * sizeof(src[0]), PostIndex)); 2578 __ Str(w1, MemOperand(x25, 4096 * sizeof(dst[0]), PostIndex)); 2579 __ Ldr(w2, MemOperand(x26, 6144 * sizeof(src[0]), PreIndex)); 2580 __ Str(w2, MemOperand(x27, 6144 * sizeof(dst[0]), PreIndex)); 2581 END(); 2582 2583 RUN(); 2584 2585 ASSERT_EQUAL_32(8191, w0); 2586 ASSERT_EQUAL_32(8191, dst[8191]); 2587 ASSERT_EQUAL_64(src_base, x22); 2588 ASSERT_EQUAL_64(dst_base, x23); 2589 ASSERT_EQUAL_32(0, w1); 2590 ASSERT_EQUAL_32(0, dst[0]); 2591 ASSERT_EQUAL_64(src_base + 4096 * sizeof(src[0]), x24); 2592 ASSERT_EQUAL_64(dst_base + 4096 * sizeof(dst[0]), x25); 2593 ASSERT_EQUAL_32(6144, w2); 2594 ASSERT_EQUAL_32(6144, dst[6144]); 2595 ASSERT_EQUAL_64(src_base + 6144 * sizeof(src[0]), x26); 2596 ASSERT_EQUAL_64(dst_base + 6144 * sizeof(dst[0]), x27); 2597 2598 TEARDOWN(); 2599 } 2600 2601 2602 TEST(ldr_str_preindex) { 2603 SETUP(); 2604 2605 uint64_t src[2] = {0xfedcba9876543210, 0x0123456789abcdef}; 2606 uint64_t dst[6] = {0, 0, 0, 0, 0, 0}; 2607 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2608 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 2609 2610 START(); 2611 __ Mov(x17, src_base); 2612 __ Mov(x18, dst_base); 2613 __ Mov(x19, src_base); 2614 __ Mov(x20, dst_base); 2615 __ Mov(x21, src_base + 16); 2616 __ Mov(x22, dst_base + 40); 2617 __ Mov(x23, src_base); 2618 __ Mov(x24, dst_base); 2619 __ Mov(x25, src_base); 2620 __ Mov(x26, dst_base); 2621 __ Ldr(w0, MemOperand(x17, 4, PreIndex)); 2622 __ Str(w0, MemOperand(x18, 12, PreIndex)); 2623 __ Ldr(x1, MemOperand(x19, 8, PreIndex)); 2624 __ Str(x1, MemOperand(x20, 16, PreIndex)); 2625 __ Ldr(w2, MemOperand(x21, -4, PreIndex)); 2626 __ Str(w2, MemOperand(x22, -4, PreIndex)); 2627 __ Ldrb(w3, MemOperand(x23, 1, PreIndex)); 2628 __ Strb(w3, MemOperand(x24, 25, PreIndex)); 2629 __ Ldrh(w4, MemOperand(x25, 3, PreIndex)); 2630 __ Strh(w4, MemOperand(x26, 41, PreIndex)); 2631 END(); 2632 2633 RUN(); 2634 2635 ASSERT_EQUAL_64(0xfedcba98, x0); 2636 ASSERT_EQUAL_64(0xfedcba9800000000, dst[1]); 2637 ASSERT_EQUAL_64(0x0123456789abcdef, x1); 2638 ASSERT_EQUAL_64(0x0123456789abcdef, dst[2]); 2639 ASSERT_EQUAL_64(0x01234567, x2); 2640 ASSERT_EQUAL_64(0x0123456700000000, dst[4]); 2641 ASSERT_EQUAL_64(0x32, x3); 2642 ASSERT_EQUAL_64(0x3200, dst[3]); 2643 ASSERT_EQUAL_64(0x9876, x4); 2644 ASSERT_EQUAL_64(0x987600, dst[5]); 2645 ASSERT_EQUAL_64(src_base + 4, x17); 2646 ASSERT_EQUAL_64(dst_base + 12, x18); 2647 ASSERT_EQUAL_64(src_base + 8, x19); 2648 ASSERT_EQUAL_64(dst_base + 16, x20); 2649 ASSERT_EQUAL_64(src_base + 12, x21); 2650 ASSERT_EQUAL_64(dst_base + 36, x22); 2651 ASSERT_EQUAL_64(src_base + 1, x23); 2652 ASSERT_EQUAL_64(dst_base + 25, x24); 2653 ASSERT_EQUAL_64(src_base + 3, x25); 2654 ASSERT_EQUAL_64(dst_base + 41, x26); 2655 2656 TEARDOWN(); 2657 } 2658 2659 2660 TEST(ldr_str_postindex) { 2661 SETUP(); 2662 2663 uint64_t src[2] = {0xfedcba9876543210, 0x0123456789abcdef}; 2664 uint64_t dst[6] = {0, 0, 0, 0, 0, 0}; 2665 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2666 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 2667 2668 START(); 2669 __ Mov(x17, src_base + 4); 2670 __ Mov(x18, dst_base + 12); 2671 __ Mov(x19, src_base + 8); 2672 __ Mov(x20, dst_base + 16); 2673 __ Mov(x21, src_base + 8); 2674 __ Mov(x22, dst_base + 32); 2675 __ Mov(x23, src_base + 1); 2676 __ Mov(x24, dst_base + 25); 2677 __ Mov(x25, src_base + 3); 2678 __ Mov(x26, dst_base + 41); 2679 __ Ldr(w0, MemOperand(x17, 4, PostIndex)); 2680 __ Str(w0, MemOperand(x18, 12, PostIndex)); 2681 __ Ldr(x1, MemOperand(x19, 8, PostIndex)); 2682 __ Str(x1, MemOperand(x20, 16, PostIndex)); 2683 __ Ldr(x2, MemOperand(x21, -8, PostIndex)); 2684 __ Str(x2, MemOperand(x22, -32, PostIndex)); 2685 __ Ldrb(w3, MemOperand(x23, 1, PostIndex)); 2686 __ Strb(w3, MemOperand(x24, 5, PostIndex)); 2687 __ Ldrh(w4, MemOperand(x25, -3, PostIndex)); 2688 __ Strh(w4, MemOperand(x26, -41, PostIndex)); 2689 END(); 2690 2691 RUN(); 2692 2693 ASSERT_EQUAL_64(0xfedcba98, x0); 2694 ASSERT_EQUAL_64(0xfedcba9800000000, dst[1]); 2695 ASSERT_EQUAL_64(0x0123456789abcdef, x1); 2696 ASSERT_EQUAL_64(0x0123456789abcdef, dst[2]); 2697 ASSERT_EQUAL_64(0x0123456789abcdef, x2); 2698 ASSERT_EQUAL_64(0x0123456789abcdef, dst[4]); 2699 ASSERT_EQUAL_64(0x32, x3); 2700 ASSERT_EQUAL_64(0x3200, dst[3]); 2701 ASSERT_EQUAL_64(0x9876, x4); 2702 ASSERT_EQUAL_64(0x987600, dst[5]); 2703 ASSERT_EQUAL_64(src_base + 8, x17); 2704 ASSERT_EQUAL_64(dst_base + 24, x18); 2705 ASSERT_EQUAL_64(src_base + 16, x19); 2706 ASSERT_EQUAL_64(dst_base + 32, x20); 2707 ASSERT_EQUAL_64(src_base, x21); 2708 ASSERT_EQUAL_64(dst_base, x22); 2709 ASSERT_EQUAL_64(src_base + 2, x23); 2710 ASSERT_EQUAL_64(dst_base + 30, x24); 2711 ASSERT_EQUAL_64(src_base, x25); 2712 ASSERT_EQUAL_64(dst_base, x26); 2713 2714 TEARDOWN(); 2715 } 2716 2717 2718 TEST(ldr_str_largeindex) { 2719 SETUP(); 2720 2721 // This value won't fit in the immediate offset field of ldr/str instructions. 2722 int largeoffset = 0xabcdef; 2723 2724 int64_t data[3] = {0x1122334455667788, 0, 0}; 2725 uint64_t base_addr = reinterpret_cast<uintptr_t>(data); 2726 uint64_t drifted_addr = base_addr - largeoffset; 2727 2728 // This test checks that we we can use large immediate offsets when 2729 // using PreIndex or PostIndex addressing mode of the MacroAssembler 2730 // Ldr/Str instructions. 2731 2732 START(); 2733 __ Mov(x19, drifted_addr); 2734 __ Ldr(x0, MemOperand(x19, largeoffset, PreIndex)); 2735 2736 __ Mov(x20, base_addr); 2737 __ Ldr(x1, MemOperand(x20, largeoffset, PostIndex)); 2738 2739 __ Mov(x21, drifted_addr); 2740 __ Str(x0, MemOperand(x21, largeoffset + 8, PreIndex)); 2741 2742 __ Mov(x22, base_addr + 16); 2743 __ Str(x0, MemOperand(x22, largeoffset, PostIndex)); 2744 END(); 2745 2746 RUN(); 2747 2748 ASSERT_EQUAL_64(0x1122334455667788, data[0]); 2749 ASSERT_EQUAL_64(0x1122334455667788, data[1]); 2750 ASSERT_EQUAL_64(0x1122334455667788, data[2]); 2751 ASSERT_EQUAL_64(0x1122334455667788, x0); 2752 ASSERT_EQUAL_64(0x1122334455667788, x1); 2753 2754 ASSERT_EQUAL_64(base_addr, x19); 2755 ASSERT_EQUAL_64(base_addr + largeoffset, x20); 2756 ASSERT_EQUAL_64(base_addr + 8, x21); 2757 ASSERT_EQUAL_64(base_addr + 16 + largeoffset, x22); 2758 2759 TEARDOWN(); 2760 } 2761 2762 2763 TEST(load_signed) { 2764 SETUP(); 2765 2766 uint32_t src[2] = {0x80008080, 0x7fff7f7f}; 2767 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2768 2769 START(); 2770 __ Mov(x24, src_base); 2771 __ Ldrsb(w0, MemOperand(x24)); 2772 __ Ldrsb(w1, MemOperand(x24, 4)); 2773 __ Ldrsh(w2, MemOperand(x24)); 2774 __ Ldrsh(w3, MemOperand(x24, 4)); 2775 __ Ldrsb(x4, MemOperand(x24)); 2776 __ Ldrsb(x5, MemOperand(x24, 4)); 2777 __ Ldrsh(x6, MemOperand(x24)); 2778 __ Ldrsh(x7, MemOperand(x24, 4)); 2779 __ Ldrsw(x8, MemOperand(x24)); 2780 __ Ldrsw(x9, MemOperand(x24, 4)); 2781 END(); 2782 2783 RUN(); 2784 2785 ASSERT_EQUAL_64(0xffffff80, x0); 2786 ASSERT_EQUAL_64(0x0000007f, x1); 2787 ASSERT_EQUAL_64(0xffff8080, x2); 2788 ASSERT_EQUAL_64(0x00007f7f, x3); 2789 ASSERT_EQUAL_64(0xffffffffffffff80, x4); 2790 ASSERT_EQUAL_64(0x000000000000007f, x5); 2791 ASSERT_EQUAL_64(0xffffffffffff8080, x6); 2792 ASSERT_EQUAL_64(0x0000000000007f7f, x7); 2793 ASSERT_EQUAL_64(0xffffffff80008080, x8); 2794 ASSERT_EQUAL_64(0x000000007fff7f7f, x9); 2795 2796 TEARDOWN(); 2797 } 2798 2799 2800 TEST(load_store_regoffset) { 2801 SETUP(); 2802 2803 uint32_t src[3] = {1, 2, 3}; 2804 uint32_t dst[4] = {0, 0, 0, 0}; 2805 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2806 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 2807 2808 START(); 2809 __ Mov(x16, src_base); 2810 __ Mov(x17, dst_base); 2811 __ Mov(x18, src_base + 3 * sizeof(src[0])); 2812 __ Mov(x19, dst_base + 3 * sizeof(dst[0])); 2813 __ Mov(x20, dst_base + 4 * sizeof(dst[0])); 2814 __ Mov(x24, 0); 2815 __ Mov(x25, 4); 2816 __ Mov(x26, -4); 2817 __ Mov(x27, 0xfffffffc); // 32-bit -4. 2818 __ Mov(x28, 0xfffffffe); // 32-bit -2. 2819 __ Mov(x29, 0xffffffff); // 32-bit -1. 2820 2821 __ Ldr(w0, MemOperand(x16, x24)); 2822 __ Ldr(x1, MemOperand(x16, x25)); 2823 __ Ldr(w2, MemOperand(x18, x26)); 2824 __ Ldr(w3, MemOperand(x18, x27, SXTW)); 2825 __ Ldr(w4, MemOperand(x18, x28, SXTW, 2)); 2826 __ Str(w0, MemOperand(x17, x24)); 2827 __ Str(x1, MemOperand(x17, x25)); 2828 __ Str(w2, MemOperand(x20, x29, SXTW, 2)); 2829 END(); 2830 2831 RUN(); 2832 2833 ASSERT_EQUAL_64(1, x0); 2834 ASSERT_EQUAL_64(0x0000000300000002, x1); 2835 ASSERT_EQUAL_64(3, x2); 2836 ASSERT_EQUAL_64(3, x3); 2837 ASSERT_EQUAL_64(2, x4); 2838 ASSERT_EQUAL_32(1, dst[0]); 2839 ASSERT_EQUAL_32(2, dst[1]); 2840 ASSERT_EQUAL_32(3, dst[2]); 2841 ASSERT_EQUAL_32(3, dst[3]); 2842 2843 TEARDOWN(); 2844 } 2845 2846 2847 TEST(load_store_float) { 2848 SETUP(); 2849 2850 float src[3] = {1.0, 2.0, 3.0}; 2851 float dst[3] = {0.0, 0.0, 0.0}; 2852 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2853 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 2854 2855 START(); 2856 __ Mov(x17, src_base); 2857 __ Mov(x18, dst_base); 2858 __ Mov(x19, src_base); 2859 __ Mov(x20, dst_base); 2860 __ Mov(x21, src_base); 2861 __ Mov(x22, dst_base); 2862 __ Ldr(s0, MemOperand(x17, sizeof(src[0]))); 2863 __ Str(s0, MemOperand(x18, sizeof(dst[0]), PostIndex)); 2864 __ Ldr(s1, MemOperand(x19, sizeof(src[0]), PostIndex)); 2865 __ Str(s1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex)); 2866 __ Ldr(s2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex)); 2867 __ Str(s2, MemOperand(x22, sizeof(dst[0]))); 2868 END(); 2869 2870 RUN(); 2871 2872 ASSERT_EQUAL_FP32(2.0, s0); 2873 ASSERT_EQUAL_FP32(2.0, dst[0]); 2874 ASSERT_EQUAL_FP32(1.0, s1); 2875 ASSERT_EQUAL_FP32(1.0, dst[2]); 2876 ASSERT_EQUAL_FP32(3.0, s2); 2877 ASSERT_EQUAL_FP32(3.0, dst[1]); 2878 ASSERT_EQUAL_64(src_base, x17); 2879 ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18); 2880 ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19); 2881 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20); 2882 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21); 2883 ASSERT_EQUAL_64(dst_base, x22); 2884 2885 TEARDOWN(); 2886 } 2887 2888 2889 TEST(load_store_double) { 2890 SETUP(); 2891 2892 double src[3] = {1.0, 2.0, 3.0}; 2893 double dst[3] = {0.0, 0.0, 0.0}; 2894 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2895 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 2896 2897 START(); 2898 __ Mov(x17, src_base); 2899 __ Mov(x18, dst_base); 2900 __ Mov(x19, src_base); 2901 __ Mov(x20, dst_base); 2902 __ Mov(x21, src_base); 2903 __ Mov(x22, dst_base); 2904 __ Ldr(d0, MemOperand(x17, sizeof(src[0]))); 2905 __ Str(d0, MemOperand(x18, sizeof(dst[0]), PostIndex)); 2906 __ Ldr(d1, MemOperand(x19, sizeof(src[0]), PostIndex)); 2907 __ Str(d1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex)); 2908 __ Ldr(d2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex)); 2909 __ Str(d2, MemOperand(x22, sizeof(dst[0]))); 2910 END(); 2911 2912 RUN(); 2913 2914 ASSERT_EQUAL_FP64(2.0, d0); 2915 ASSERT_EQUAL_FP64(2.0, dst[0]); 2916 ASSERT_EQUAL_FP64(1.0, d1); 2917 ASSERT_EQUAL_FP64(1.0, dst[2]); 2918 ASSERT_EQUAL_FP64(3.0, d2); 2919 ASSERT_EQUAL_FP64(3.0, dst[1]); 2920 ASSERT_EQUAL_64(src_base, x17); 2921 ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18); 2922 ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19); 2923 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20); 2924 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21); 2925 ASSERT_EQUAL_64(dst_base, x22); 2926 2927 TEARDOWN(); 2928 } 2929 2930 2931 TEST(load_store_b) { 2932 SETUP(); 2933 2934 uint8_t src[3] = {0x12, 0x23, 0x34}; 2935 uint8_t dst[3] = {0, 0, 0}; 2936 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2937 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 2938 2939 START(); 2940 __ Mov(x17, src_base); 2941 __ Mov(x18, dst_base); 2942 __ Mov(x19, src_base); 2943 __ Mov(x20, dst_base); 2944 __ Mov(x21, src_base); 2945 __ Mov(x22, dst_base); 2946 __ Ldr(b0, MemOperand(x17, sizeof(src[0]))); 2947 __ Str(b0, MemOperand(x18, sizeof(dst[0]), PostIndex)); 2948 __ Ldr(b1, MemOperand(x19, sizeof(src[0]), PostIndex)); 2949 __ Str(b1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex)); 2950 __ Ldr(b2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex)); 2951 __ Str(b2, MemOperand(x22, sizeof(dst[0]))); 2952 END(); 2953 2954 RUN(); 2955 2956 ASSERT_EQUAL_128(0, 0x23, q0); 2957 ASSERT_EQUAL_64(0x23, dst[0]); 2958 ASSERT_EQUAL_128(0, 0x12, q1); 2959 ASSERT_EQUAL_64(0x12, dst[2]); 2960 ASSERT_EQUAL_128(0, 0x34, q2); 2961 ASSERT_EQUAL_64(0x34, dst[1]); 2962 ASSERT_EQUAL_64(src_base, x17); 2963 ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18); 2964 ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19); 2965 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20); 2966 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21); 2967 ASSERT_EQUAL_64(dst_base, x22); 2968 2969 TEARDOWN(); 2970 } 2971 2972 2973 TEST(load_store_h) { 2974 SETUP(); 2975 2976 uint16_t src[3] = {0x1234, 0x2345, 0x3456}; 2977 uint16_t dst[3] = {0, 0, 0}; 2978 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2979 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 2980 2981 START(); 2982 __ Mov(x17, src_base); 2983 __ Mov(x18, dst_base); 2984 __ Mov(x19, src_base); 2985 __ Mov(x20, dst_base); 2986 __ Mov(x21, src_base); 2987 __ Mov(x22, dst_base); 2988 __ Ldr(h0, MemOperand(x17, sizeof(src[0]))); 2989 __ Str(h0, MemOperand(x18, sizeof(dst[0]), PostIndex)); 2990 __ Ldr(h1, MemOperand(x19, sizeof(src[0]), PostIndex)); 2991 __ Str(h1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex)); 2992 __ Ldr(h2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex)); 2993 __ Str(h2, MemOperand(x22, sizeof(dst[0]))); 2994 END(); 2995 2996 RUN(); 2997 2998 ASSERT_EQUAL_128(0, 0x2345, q0); 2999 ASSERT_EQUAL_64(0x2345, dst[0]); 3000 ASSERT_EQUAL_128(0, 0x1234, q1); 3001 ASSERT_EQUAL_64(0x1234, dst[2]); 3002 ASSERT_EQUAL_128(0, 0x3456, q2); 3003 ASSERT_EQUAL_64(0x3456, dst[1]); 3004 ASSERT_EQUAL_64(src_base, x17); 3005 ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18); 3006 ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19); 3007 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20); 3008 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21); 3009 ASSERT_EQUAL_64(dst_base, x22); 3010 3011 TEARDOWN(); 3012 } 3013 3014 3015 TEST(load_store_q) { 3016 SETUP(); 3017 3018 uint8_t src[48] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe, 0x01, 0x23, 3019 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x21, 0x43, 0x65, 0x87, 3020 0xa9, 0xcb, 0xed, 0x0f, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 3021 0xde, 0xf0, 0x24, 0x46, 0x68, 0x8a, 0xac, 0xce, 0xe0, 0x02, 3022 0x42, 0x64, 0x86, 0xa8, 0xca, 0xec, 0x0e, 0x20}; 3023 3024 uint64_t dst[6] = {0, 0, 0, 0, 0, 0}; 3025 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3026 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 3027 3028 START(); 3029 __ Mov(x17, src_base); 3030 __ Mov(x18, dst_base); 3031 __ Mov(x19, src_base); 3032 __ Mov(x20, dst_base); 3033 __ Mov(x21, src_base); 3034 __ Mov(x22, dst_base); 3035 __ Ldr(q0, MemOperand(x17, 16)); 3036 __ Str(q0, MemOperand(x18, 16, PostIndex)); 3037 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 3038 __ Str(q1, MemOperand(x20, 32, PreIndex)); 3039 __ Ldr(q2, MemOperand(x21, 32, PreIndex)); 3040 __ Str(q2, MemOperand(x22, 16)); 3041 END(); 3042 3043 RUN(); 3044 3045 ASSERT_EQUAL_128(0xf0debc9a78563412, 0x0fedcba987654321, q0); 3046 ASSERT_EQUAL_64(0x0fedcba987654321, dst[0]); 3047 ASSERT_EQUAL_64(0xf0debc9a78563412, dst[1]); 3048 ASSERT_EQUAL_128(0xefcdab8967452301, 0xfedcba9876543210, q1); 3049 ASSERT_EQUAL_64(0xfedcba9876543210, dst[4]); 3050 ASSERT_EQUAL_64(0xefcdab8967452301, dst[5]); 3051 ASSERT_EQUAL_128(0x200eeccaa8866442, 0x02e0ceac8a684624, q2); 3052 ASSERT_EQUAL_64(0x02e0ceac8a684624, dst[2]); 3053 ASSERT_EQUAL_64(0x200eeccaa8866442, dst[3]); 3054 ASSERT_EQUAL_64(src_base, x17); 3055 ASSERT_EQUAL_64(dst_base + 16, x18); 3056 ASSERT_EQUAL_64(src_base + 16, x19); 3057 ASSERT_EQUAL_64(dst_base + 32, x20); 3058 ASSERT_EQUAL_64(src_base + 32, x21); 3059 ASSERT_EQUAL_64(dst_base, x22); 3060 3061 TEARDOWN(); 3062 } 3063 3064 3065 TEST(load_store_v_regoffset) { 3066 SETUP(); 3067 3068 uint8_t src[64]; 3069 for (unsigned i = 0; i < sizeof(src); i++) { 3070 src[i] = i; 3071 } 3072 uint8_t dst[64]; 3073 memset(dst, 0, sizeof(dst)); 3074 3075 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3076 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 3077 3078 START(); 3079 __ Mov(x17, src_base + 16); 3080 __ Mov(x18, 1); 3081 __ Mov(w19, -1); 3082 __ Mov(x20, dst_base - 1); 3083 3084 __ Ldr(b0, MemOperand(x17, x18)); 3085 __ Ldr(b1, MemOperand(x17, x19, SXTW)); 3086 3087 __ Ldr(h2, MemOperand(x17, x18)); 3088 __ Ldr(h3, MemOperand(x17, x18, UXTW, 1)); 3089 __ Ldr(h4, MemOperand(x17, x19, SXTW, 1)); 3090 __ Ldr(h5, MemOperand(x17, x18, LSL, 1)); 3091 3092 __ Ldr(s16, MemOperand(x17, x18)); 3093 __ Ldr(s17, MemOperand(x17, x18, UXTW, 2)); 3094 __ Ldr(s18, MemOperand(x17, x19, SXTW, 2)); 3095 __ Ldr(s19, MemOperand(x17, x18, LSL, 2)); 3096 3097 __ Ldr(d20, MemOperand(x17, x18)); 3098 __ Ldr(d21, MemOperand(x17, x18, UXTW, 3)); 3099 __ Ldr(d22, MemOperand(x17, x19, SXTW, 3)); 3100 __ Ldr(d23, MemOperand(x17, x18, LSL, 3)); 3101 3102 __ Ldr(q24, MemOperand(x17, x18)); 3103 __ Ldr(q25, MemOperand(x17, x18, UXTW, 4)); 3104 __ Ldr(q26, MemOperand(x17, x19, SXTW, 4)); 3105 __ Ldr(q27, MemOperand(x17, x18, LSL, 4)); 3106 3107 // Store [bhsdq]27 to adjacent memory locations, then load again to check. 3108 __ Str(b27, MemOperand(x20, x18)); 3109 __ Str(h27, MemOperand(x20, x18, UXTW, 1)); 3110 __ Add(x20, x20, 8); 3111 __ Str(s27, MemOperand(x20, x19, SXTW, 2)); 3112 __ Sub(x20, x20, 8); 3113 __ Str(d27, MemOperand(x20, x18, LSL, 3)); 3114 __ Add(x20, x20, 32); 3115 __ Str(q27, MemOperand(x20, x19, SXTW, 4)); 3116 3117 __ Sub(x20, x20, 32); 3118 __ Ldr(q6, MemOperand(x20, x18)); 3119 __ Ldr(q7, MemOperand(x20, x18, LSL, 4)); 3120 3121 END(); 3122 3123 RUN(); 3124 3125 ASSERT_EQUAL_128(0, 0x11, q0); 3126 ASSERT_EQUAL_128(0, 0x0f, q1); 3127 ASSERT_EQUAL_128(0, 0x1211, q2); 3128 ASSERT_EQUAL_128(0, 0x1312, q3); 3129 ASSERT_EQUAL_128(0, 0x0f0e, q4); 3130 ASSERT_EQUAL_128(0, 0x1312, q5); 3131 ASSERT_EQUAL_128(0, 0x14131211, q16); 3132 ASSERT_EQUAL_128(0, 0x17161514, q17); 3133 ASSERT_EQUAL_128(0, 0x0f0e0d0c, q18); 3134 ASSERT_EQUAL_128(0, 0x17161514, q19); 3135 ASSERT_EQUAL_128(0, 0x1817161514131211, q20); 3136 ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q21); 3137 ASSERT_EQUAL_128(0, 0x0f0e0d0c0b0a0908, q22); 3138 ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q23); 3139 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q24); 3140 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q25); 3141 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q26); 3142 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q27); 3143 ASSERT_EQUAL_128(0x2027262524232221, 0x2023222120212020, q6); 3144 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q7); 3145 3146 TEARDOWN(); 3147 } 3148 3149 3150 TEST(neon_ld1_d) { 3151 SETUP(); 3152 3153 uint8_t src[32 + 5]; 3154 for (unsigned i = 0; i < sizeof(src); i++) { 3155 src[i] = i; 3156 } 3157 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3158 3159 START(); 3160 __ Mov(x17, src_base); 3161 __ Ldr(q2, MemOperand(x17)); // Initialise top 64-bits of Q register. 3162 __ Ld1(v2.V8B(), MemOperand(x17)); 3163 __ Add(x17, x17, 1); 3164 __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x17)); 3165 __ Add(x17, x17, 1); 3166 __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x17)); 3167 __ Add(x17, x17, 1); 3168 __ Ld1(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17)); 3169 __ Add(x17, x17, 1); 3170 __ Ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17)); 3171 __ Add(x17, x17, 1); 3172 __ Ld1(v20.V1D(), v21.V1D(), v22.V1D(), v23.V1D(), MemOperand(x17)); 3173 END(); 3174 3175 RUN(); 3176 3177 ASSERT_EQUAL_128(0, 0x0706050403020100, q2); 3178 ASSERT_EQUAL_128(0, 0x0807060504030201, q3); 3179 ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4); 3180 ASSERT_EQUAL_128(0, 0x0908070605040302, q5); 3181 ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6); 3182 ASSERT_EQUAL_128(0, 0x1918171615141312, q7); 3183 ASSERT_EQUAL_128(0, 0x0a09080706050403, q16); 3184 ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17); 3185 ASSERT_EQUAL_128(0, 0x1a19181716151413, q18); 3186 ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19); 3187 ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30); 3188 ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31); 3189 ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0); 3190 ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1); 3191 ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20); 3192 ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21); 3193 ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22); 3194 ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23); 3195 3196 TEARDOWN(); 3197 } 3198 3199 3200 TEST(neon_ld1_d_postindex) { 3201 SETUP(); 3202 3203 uint8_t src[32 + 5]; 3204 for (unsigned i = 0; i < sizeof(src); i++) { 3205 src[i] = i; 3206 } 3207 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3208 3209 START(); 3210 __ Mov(x17, src_base); 3211 __ Mov(x18, src_base + 1); 3212 __ Mov(x19, src_base + 2); 3213 __ Mov(x20, src_base + 3); 3214 __ Mov(x21, src_base + 4); 3215 __ Mov(x22, src_base + 5); 3216 __ Mov(x23, 1); 3217 __ Ldr(q2, MemOperand(x17)); // Initialise top 64-bits of Q register. 3218 __ Ld1(v2.V8B(), MemOperand(x17, x23, PostIndex)); 3219 __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x18, 16, PostIndex)); 3220 __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x19, 24, PostIndex)); 3221 __ Ld1(v16.V2S(), 3222 v17.V2S(), 3223 v18.V2S(), 3224 v19.V2S(), 3225 MemOperand(x20, 32, PostIndex)); 3226 __ Ld1(v30.V2S(), 3227 v31.V2S(), 3228 v0.V2S(), 3229 v1.V2S(), 3230 MemOperand(x21, 32, PostIndex)); 3231 __ Ld1(v20.V1D(), 3232 v21.V1D(), 3233 v22.V1D(), 3234 v23.V1D(), 3235 MemOperand(x22, 32, PostIndex)); 3236 END(); 3237 3238 RUN(); 3239 3240 ASSERT_EQUAL_128(0, 0x0706050403020100, q2); 3241 ASSERT_EQUAL_128(0, 0x0807060504030201, q3); 3242 ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4); 3243 ASSERT_EQUAL_128(0, 0x0908070605040302, q5); 3244 ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6); 3245 ASSERT_EQUAL_128(0, 0x1918171615141312, q7); 3246 ASSERT_EQUAL_128(0, 0x0a09080706050403, q16); 3247 ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17); 3248 ASSERT_EQUAL_128(0, 0x1a19181716151413, q18); 3249 ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19); 3250 ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30); 3251 ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31); 3252 ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0); 3253 ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1); 3254 ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20); 3255 ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21); 3256 ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22); 3257 ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23); 3258 ASSERT_EQUAL_64(src_base + 1, x17); 3259 ASSERT_EQUAL_64(src_base + 1 + 16, x18); 3260 ASSERT_EQUAL_64(src_base + 2 + 24, x19); 3261 ASSERT_EQUAL_64(src_base + 3 + 32, x20); 3262 ASSERT_EQUAL_64(src_base + 4 + 32, x21); 3263 ASSERT_EQUAL_64(src_base + 5 + 32, x22); 3264 3265 TEARDOWN(); 3266 } 3267 3268 3269 TEST(neon_ld1_q) { 3270 SETUP(); 3271 3272 uint8_t src[64 + 4]; 3273 for (unsigned i = 0; i < sizeof(src); i++) { 3274 src[i] = i; 3275 } 3276 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3277 3278 START(); 3279 __ Mov(x17, src_base); 3280 __ Ld1(v2.V16B(), MemOperand(x17)); 3281 __ Add(x17, x17, 1); 3282 __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x17)); 3283 __ Add(x17, x17, 1); 3284 __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x17)); 3285 __ Add(x17, x17, 1); 3286 __ Ld1(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x17)); 3287 __ Add(x17, x17, 1); 3288 __ Ld1(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17)); 3289 END(); 3290 3291 RUN(); 3292 3293 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2); 3294 ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3); 3295 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4); 3296 ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5); 3297 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6); 3298 ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7); 3299 ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16); 3300 ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17); 3301 ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18); 3302 ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19); 3303 ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30); 3304 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31); 3305 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0); 3306 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1); 3307 3308 TEARDOWN(); 3309 } 3310 3311 3312 TEST(neon_ld1_q_postindex) { 3313 SETUP(); 3314 3315 uint8_t src[64 + 4]; 3316 for (unsigned i = 0; i < sizeof(src); i++) { 3317 src[i] = i; 3318 } 3319 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3320 3321 START(); 3322 __ Mov(x17, src_base); 3323 __ Mov(x18, src_base + 1); 3324 __ Mov(x19, src_base + 2); 3325 __ Mov(x20, src_base + 3); 3326 __ Mov(x21, src_base + 4); 3327 __ Mov(x22, 1); 3328 __ Ld1(v2.V16B(), MemOperand(x17, x22, PostIndex)); 3329 __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x18, 32, PostIndex)); 3330 __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x19, 48, PostIndex)); 3331 __ Ld1(v16.V4S(), 3332 v17.V4S(), 3333 v18.V4S(), 3334 v19.V4S(), 3335 MemOperand(x20, 64, PostIndex)); 3336 __ Ld1(v30.V2D(), 3337 v31.V2D(), 3338 v0.V2D(), 3339 v1.V2D(), 3340 MemOperand(x21, 64, PostIndex)); 3341 END(); 3342 3343 RUN(); 3344 3345 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2); 3346 ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3); 3347 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4); 3348 ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5); 3349 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6); 3350 ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7); 3351 ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16); 3352 ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17); 3353 ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18); 3354 ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19); 3355 ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30); 3356 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31); 3357 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0); 3358 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1); 3359 ASSERT_EQUAL_64(src_base + 1, x17); 3360 ASSERT_EQUAL_64(src_base + 1 + 32, x18); 3361 ASSERT_EQUAL_64(src_base + 2 + 48, x19); 3362 ASSERT_EQUAL_64(src_base + 3 + 64, x20); 3363 ASSERT_EQUAL_64(src_base + 4 + 64, x21); 3364 3365 TEARDOWN(); 3366 } 3367 3368 3369 TEST(neon_ld1_lane) { 3370 SETUP(); 3371 3372 uint8_t src[64]; 3373 for (unsigned i = 0; i < sizeof(src); i++) { 3374 src[i] = i; 3375 } 3376 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3377 3378 START(); 3379 3380 // Test loading whole register by element. 3381 __ Mov(x17, src_base); 3382 for (int i = 15; i >= 0; i--) { 3383 __ Ld1(v0.B(), i, MemOperand(x17)); 3384 __ Add(x17, x17, 1); 3385 } 3386 3387 __ Mov(x17, src_base); 3388 for (int i = 7; i >= 0; i--) { 3389 __ Ld1(v1.H(), i, MemOperand(x17)); 3390 __ Add(x17, x17, 1); 3391 } 3392 3393 __ Mov(x17, src_base); 3394 for (int i = 3; i >= 0; i--) { 3395 __ Ld1(v2.S(), i, MemOperand(x17)); 3396 __ Add(x17, x17, 1); 3397 } 3398 3399 __ Mov(x17, src_base); 3400 for (int i = 1; i >= 0; i--) { 3401 __ Ld1(v3.D(), i, MemOperand(x17)); 3402 __ Add(x17, x17, 1); 3403 } 3404 3405 // Test loading a single element into an initialised register. 3406 __ Mov(x17, src_base); 3407 __ Ldr(q4, MemOperand(x17)); 3408 __ Ld1(v4.B(), 4, MemOperand(x17)); 3409 __ Ldr(q5, MemOperand(x17)); 3410 __ Ld1(v5.H(), 3, MemOperand(x17)); 3411 __ Ldr(q6, MemOperand(x17)); 3412 __ Ld1(v6.S(), 2, MemOperand(x17)); 3413 __ Ldr(q7, MemOperand(x17)); 3414 __ Ld1(v7.D(), 1, MemOperand(x17)); 3415 3416 END(); 3417 3418 RUN(); 3419 3420 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0); 3421 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q1); 3422 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q2); 3423 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q3); 3424 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4); 3425 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5); 3426 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6); 3427 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7); 3428 3429 TEARDOWN(); 3430 } 3431 3432 TEST(neon_ld2_d) { 3433 SETUP(); 3434 3435 uint8_t src[64 + 4]; 3436 for (unsigned i = 0; i < sizeof(src); i++) { 3437 src[i] = i; 3438 } 3439 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3440 3441 START(); 3442 __ Mov(x17, src_base); 3443 __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17)); 3444 __ Add(x17, x17, 1); 3445 __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x17)); 3446 __ Add(x17, x17, 1); 3447 __ Ld2(v6.V4H(), v7.V4H(), MemOperand(x17)); 3448 __ Add(x17, x17, 1); 3449 __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x17)); 3450 END(); 3451 3452 RUN(); 3453 3454 ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2); 3455 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3); 3456 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4); 3457 ASSERT_EQUAL_128(0, 0x100e0c0a08060402, q5); 3458 ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q6); 3459 ASSERT_EQUAL_128(0, 0x11100d0c09080504, q7); 3460 ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q31); 3461 ASSERT_EQUAL_128(0, 0x1211100f0a090807, q0); 3462 3463 TEARDOWN(); 3464 } 3465 3466 TEST(neon_ld2_d_postindex) { 3467 SETUP(); 3468 3469 uint8_t src[32 + 4]; 3470 for (unsigned i = 0; i < sizeof(src); i++) { 3471 src[i] = i; 3472 } 3473 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3474 3475 START(); 3476 __ Mov(x17, src_base); 3477 __ Mov(x18, src_base + 1); 3478 __ Mov(x19, src_base + 2); 3479 __ Mov(x20, src_base + 3); 3480 __ Mov(x21, src_base + 4); 3481 __ Mov(x22, 1); 3482 __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17, x22, PostIndex)); 3483 __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x18, 16, PostIndex)); 3484 __ Ld2(v5.V4H(), v6.V4H(), MemOperand(x19, 16, PostIndex)); 3485 __ Ld2(v16.V2S(), v17.V2S(), MemOperand(x20, 16, PostIndex)); 3486 __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x21, 16, PostIndex)); 3487 END(); 3488 3489 RUN(); 3490 3491 ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2); 3492 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3); 3493 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4); 3494 ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q5); 3495 ASSERT_EQUAL_128(0, 0x11100d0c09080504, q6); 3496 ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q16); 3497 ASSERT_EQUAL_128(0, 0x1211100f0a090807, q17); 3498 ASSERT_EQUAL_128(0, 0x0f0e0d0c07060504, q31); 3499 ASSERT_EQUAL_128(0, 0x131211100b0a0908, q0); 3500 3501 ASSERT_EQUAL_64(src_base + 1, x17); 3502 ASSERT_EQUAL_64(src_base + 1 + 16, x18); 3503 ASSERT_EQUAL_64(src_base + 2 + 16, x19); 3504 ASSERT_EQUAL_64(src_base + 3 + 16, x20); 3505 ASSERT_EQUAL_64(src_base + 4 + 16, x21); 3506 3507 TEARDOWN(); 3508 } 3509 3510 3511 TEST(neon_ld2_q) { 3512 SETUP(); 3513 3514 uint8_t src[64 + 4]; 3515 for (unsigned i = 0; i < sizeof(src); i++) { 3516 src[i] = i; 3517 } 3518 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3519 3520 START(); 3521 __ Mov(x17, src_base); 3522 __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17)); 3523 __ Add(x17, x17, 1); 3524 __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x17)); 3525 __ Add(x17, x17, 1); 3526 __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x17)); 3527 __ Add(x17, x17, 1); 3528 __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x17)); 3529 __ Add(x17, x17, 1); 3530 __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x17)); 3531 END(); 3532 3533 RUN(); 3534 3535 ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2); 3536 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3); 3537 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4); 3538 ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5); 3539 ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6); 3540 ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7); 3541 ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16); 3542 ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17); 3543 ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31); 3544 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0); 3545 3546 TEARDOWN(); 3547 } 3548 3549 3550 TEST(neon_ld2_q_postindex) { 3551 SETUP(); 3552 3553 uint8_t src[64 + 4]; 3554 for (unsigned i = 0; i < sizeof(src); i++) { 3555 src[i] = i; 3556 } 3557 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3558 3559 START(); 3560 __ Mov(x17, src_base); 3561 __ Mov(x18, src_base + 1); 3562 __ Mov(x19, src_base + 2); 3563 __ Mov(x20, src_base + 3); 3564 __ Mov(x21, src_base + 4); 3565 __ Mov(x22, 1); 3566 __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17, x22, PostIndex)); 3567 __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x18, 32, PostIndex)); 3568 __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x19, 32, PostIndex)); 3569 __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x20, 32, PostIndex)); 3570 __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x21, 32, PostIndex)); 3571 END(); 3572 3573 RUN(); 3574 3575 ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2); 3576 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3); 3577 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4); 3578 ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5); 3579 ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6); 3580 ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7); 3581 ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16); 3582 ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17); 3583 ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31); 3584 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0); 3585 3586 3587 ASSERT_EQUAL_64(src_base + 1, x17); 3588 ASSERT_EQUAL_64(src_base + 1 + 32, x18); 3589 ASSERT_EQUAL_64(src_base + 2 + 32, x19); 3590 ASSERT_EQUAL_64(src_base + 3 + 32, x20); 3591 ASSERT_EQUAL_64(src_base + 4 + 32, x21); 3592 3593 TEARDOWN(); 3594 } 3595 3596 3597 TEST(neon_ld2_lane) { 3598 SETUP(); 3599 3600 uint8_t src[64]; 3601 for (unsigned i = 0; i < sizeof(src); i++) { 3602 src[i] = i; 3603 } 3604 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3605 3606 START(); 3607 3608 // Test loading whole register by element. 3609 __ Mov(x17, src_base); 3610 for (int i = 15; i >= 0; i--) { 3611 __ Ld2(v0.B(), v1.B(), i, MemOperand(x17)); 3612 __ Add(x17, x17, 1); 3613 } 3614 3615 __ Mov(x17, src_base); 3616 for (int i = 7; i >= 0; i--) { 3617 __ Ld2(v2.H(), v3.H(), i, MemOperand(x17)); 3618 __ Add(x17, x17, 1); 3619 } 3620 3621 __ Mov(x17, src_base); 3622 for (int i = 3; i >= 0; i--) { 3623 __ Ld2(v4.S(), v5.S(), i, MemOperand(x17)); 3624 __ Add(x17, x17, 1); 3625 } 3626 3627 __ Mov(x17, src_base); 3628 for (int i = 1; i >= 0; i--) { 3629 __ Ld2(v6.D(), v7.D(), i, MemOperand(x17)); 3630 __ Add(x17, x17, 1); 3631 } 3632 3633 // Test loading a single element into an initialised register. 3634 __ Mov(x17, src_base); 3635 __ Mov(x4, x17); 3636 __ Ldr(q8, MemOperand(x4, 16, PostIndex)); 3637 __ Ldr(q9, MemOperand(x4)); 3638 __ Ld2(v8.B(), v9.B(), 4, MemOperand(x17)); 3639 __ Mov(x5, x17); 3640 __ Ldr(q10, MemOperand(x5, 16, PostIndex)); 3641 __ Ldr(q11, MemOperand(x5)); 3642 __ Ld2(v10.H(), v11.H(), 3, MemOperand(x17)); 3643 __ Mov(x6, x17); 3644 __ Ldr(q12, MemOperand(x6, 16, PostIndex)); 3645 __ Ldr(q13, MemOperand(x6)); 3646 __ Ld2(v12.S(), v13.S(), 2, MemOperand(x17)); 3647 __ Mov(x7, x17); 3648 __ Ldr(q14, MemOperand(x7, 16, PostIndex)); 3649 __ Ldr(q15, MemOperand(x7)); 3650 __ Ld2(v14.D(), v15.D(), 1, MemOperand(x17)); 3651 3652 END(); 3653 3654 RUN(); 3655 3656 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0); 3657 ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1); 3658 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q2); 3659 ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q3); 3660 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q4); 3661 ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q5); 3662 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q6); 3663 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q7); 3664 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8); 3665 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9); 3666 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10); 3667 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11); 3668 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12); 3669 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13); 3670 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14); 3671 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15); 3672 3673 TEARDOWN(); 3674 } 3675 3676 3677 TEST(neon_ld2_lane_postindex) { 3678 SETUP(); 3679 3680 uint8_t src[64]; 3681 for (unsigned i = 0; i < sizeof(src); i++) { 3682 src[i] = i; 3683 } 3684 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3685 3686 START(); 3687 __ Mov(x17, src_base); 3688 __ Mov(x18, src_base); 3689 __ Mov(x19, src_base); 3690 __ Mov(x20, src_base); 3691 __ Mov(x21, src_base); 3692 __ Mov(x22, src_base); 3693 __ Mov(x23, src_base); 3694 __ Mov(x24, src_base); 3695 3696 // Test loading whole register by element. 3697 for (int i = 15; i >= 0; i--) { 3698 __ Ld2(v0.B(), v1.B(), i, MemOperand(x17, 2, PostIndex)); 3699 } 3700 3701 for (int i = 7; i >= 0; i--) { 3702 __ Ld2(v2.H(), v3.H(), i, MemOperand(x18, 4, PostIndex)); 3703 } 3704 3705 for (int i = 3; i >= 0; i--) { 3706 __ Ld2(v4.S(), v5.S(), i, MemOperand(x19, 8, PostIndex)); 3707 } 3708 3709 for (int i = 1; i >= 0; i--) { 3710 __ Ld2(v6.D(), v7.D(), i, MemOperand(x20, 16, PostIndex)); 3711 } 3712 3713 // Test loading a single element into an initialised register. 3714 __ Mov(x25, 1); 3715 __ Mov(x4, x21); 3716 __ Ldr(q8, MemOperand(x4, 16, PostIndex)); 3717 __ Ldr(q9, MemOperand(x4)); 3718 __ Ld2(v8.B(), v9.B(), 4, MemOperand(x21, x25, PostIndex)); 3719 __ Add(x25, x25, 1); 3720 3721 __ Mov(x5, x22); 3722 __ Ldr(q10, MemOperand(x5, 16, PostIndex)); 3723 __ Ldr(q11, MemOperand(x5)); 3724 __ Ld2(v10.H(), v11.H(), 3, MemOperand(x22, x25, PostIndex)); 3725 __ Add(x25, x25, 1); 3726 3727 __ Mov(x6, x23); 3728 __ Ldr(q12, MemOperand(x6, 16, PostIndex)); 3729 __ Ldr(q13, MemOperand(x6)); 3730 __ Ld2(v12.S(), v13.S(), 2, MemOperand(x23, x25, PostIndex)); 3731 __ Add(x25, x25, 1); 3732 3733 __ Mov(x7, x24); 3734 __ Ldr(q14, MemOperand(x7, 16, PostIndex)); 3735 __ Ldr(q15, MemOperand(x7)); 3736 __ Ld2(v14.D(), v15.D(), 1, MemOperand(x24, x25, PostIndex)); 3737 3738 END(); 3739 3740 RUN(); 3741 3742 ASSERT_EQUAL_128(0x00020406080a0c0e, 0x10121416181a1c1e, q0); 3743 ASSERT_EQUAL_128(0x01030507090b0d0f, 0x11131517191b1d1f, q1); 3744 ASSERT_EQUAL_128(0x0100050409080d0c, 0x1110151419181d1c, q2); 3745 ASSERT_EQUAL_128(0x030207060b0a0f0e, 0x131217161b1a1f1e, q3); 3746 ASSERT_EQUAL_128(0x030201000b0a0908, 0x131211101b1a1918, q4); 3747 ASSERT_EQUAL_128(0x070605040f0e0d0c, 0x171615141f1e1d1c, q5); 3748 ASSERT_EQUAL_128(0x0706050403020100, 0x1716151413121110, q6); 3749 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1f1e1d1c1b1a1918, q7); 3750 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8); 3751 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9); 3752 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10); 3753 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11); 3754 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12); 3755 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13); 3756 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14); 3757 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15); 3758 3759 3760 ASSERT_EQUAL_64(src_base + 32, x17); 3761 ASSERT_EQUAL_64(src_base + 32, x18); 3762 ASSERT_EQUAL_64(src_base + 32, x19); 3763 ASSERT_EQUAL_64(src_base + 32, x20); 3764 ASSERT_EQUAL_64(src_base + 1, x21); 3765 ASSERT_EQUAL_64(src_base + 2, x22); 3766 ASSERT_EQUAL_64(src_base + 3, x23); 3767 ASSERT_EQUAL_64(src_base + 4, x24); 3768 3769 TEARDOWN(); 3770 } 3771 3772 3773 TEST(neon_ld2_alllanes) { 3774 SETUP(); 3775 3776 uint8_t src[64]; 3777 for (unsigned i = 0; i < sizeof(src); i++) { 3778 src[i] = i; 3779 } 3780 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3781 3782 START(); 3783 __ Mov(x17, src_base + 1); 3784 __ Mov(x18, 1); 3785 __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17)); 3786 __ Add(x17, x17, 2); 3787 __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17)); 3788 __ Add(x17, x17, 1); 3789 __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17)); 3790 __ Add(x17, x17, 1); 3791 __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17)); 3792 __ Add(x17, x17, 4); 3793 __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17)); 3794 __ Add(x17, x17, 1); 3795 __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17)); 3796 __ Add(x17, x17, 8); 3797 __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17)); 3798 END(); 3799 3800 RUN(); 3801 3802 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0); 3803 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1); 3804 ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2); 3805 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3); 3806 ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4); 3807 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5); 3808 ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6); 3809 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7); 3810 ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8); 3811 ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9); 3812 ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10); 3813 ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11); 3814 ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12); 3815 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13); 3816 3817 TEARDOWN(); 3818 } 3819 3820 3821 TEST(neon_ld2_alllanes_postindex) { 3822 SETUP(); 3823 3824 uint8_t src[64]; 3825 for (unsigned i = 0; i < sizeof(src); i++) { 3826 src[i] = i; 3827 } 3828 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3829 3830 START(); 3831 __ Mov(x17, src_base + 1); 3832 __ Mov(x18, 1); 3833 __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17, 2, PostIndex)); 3834 __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17, x18, PostIndex)); 3835 __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17, x18, PostIndex)); 3836 __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17, 4, PostIndex)); 3837 __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17, x18, PostIndex)); 3838 __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17, 8, PostIndex)); 3839 __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17, 16, PostIndex)); 3840 END(); 3841 3842 RUN(); 3843 3844 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0); 3845 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1); 3846 ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2); 3847 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3); 3848 ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4); 3849 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5); 3850 ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6); 3851 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7); 3852 ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8); 3853 ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9); 3854 ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10); 3855 ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11); 3856 ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12); 3857 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13); 3858 ASSERT_EQUAL_64(src_base + 34, x17); 3859 3860 TEARDOWN(); 3861 } 3862 3863 3864 TEST(neon_ld3_d) { 3865 SETUP(); 3866 3867 uint8_t src[64 + 4]; 3868 for (unsigned i = 0; i < sizeof(src); i++) { 3869 src[i] = i; 3870 } 3871 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3872 3873 START(); 3874 __ Mov(x17, src_base); 3875 __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17)); 3876 __ Add(x17, x17, 1); 3877 __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x17)); 3878 __ Add(x17, x17, 1); 3879 __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x17)); 3880 __ Add(x17, x17, 1); 3881 __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17)); 3882 END(); 3883 3884 RUN(); 3885 3886 ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2); 3887 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3); 3888 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4); 3889 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5); 3890 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6); 3891 ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7); 3892 ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8); 3893 ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9); 3894 ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10); 3895 ASSERT_EQUAL_128(0, 0x1211100f06050403, q31); 3896 ASSERT_EQUAL_128(0, 0x161514130a090807, q0); 3897 ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q1); 3898 3899 TEARDOWN(); 3900 } 3901 3902 3903 TEST(neon_ld3_d_postindex) { 3904 SETUP(); 3905 3906 uint8_t src[32 + 4]; 3907 for (unsigned i = 0; i < sizeof(src); i++) { 3908 src[i] = i; 3909 } 3910 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3911 3912 START(); 3913 __ Mov(x17, src_base); 3914 __ Mov(x18, src_base + 1); 3915 __ Mov(x19, src_base + 2); 3916 __ Mov(x20, src_base + 3); 3917 __ Mov(x21, src_base + 4); 3918 __ Mov(x22, 1); 3919 __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17, x22, PostIndex)); 3920 __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x18, 24, PostIndex)); 3921 __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x19, 24, PostIndex)); 3922 __ Ld3(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x20, 24, PostIndex)); 3923 __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x21, 24, PostIndex)); 3924 END(); 3925 3926 RUN(); 3927 3928 ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2); 3929 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3); 3930 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4); 3931 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5); 3932 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6); 3933 ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7); 3934 ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8); 3935 ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9); 3936 ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10); 3937 ASSERT_EQUAL_128(0, 0x1211100f06050403, q11); 3938 ASSERT_EQUAL_128(0, 0x161514130a090807, q12); 3939 ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q13); 3940 ASSERT_EQUAL_128(0, 0x1312111007060504, q31); 3941 ASSERT_EQUAL_128(0, 0x171615140b0a0908, q0); 3942 ASSERT_EQUAL_128(0, 0x1b1a19180f0e0d0c, q1); 3943 3944 ASSERT_EQUAL_64(src_base + 1, x17); 3945 ASSERT_EQUAL_64(src_base + 1 + 24, x18); 3946 ASSERT_EQUAL_64(src_base + 2 + 24, x19); 3947 ASSERT_EQUAL_64(src_base + 3 + 24, x20); 3948 ASSERT_EQUAL_64(src_base + 4 + 24, x21); 3949 3950 TEARDOWN(); 3951 } 3952 3953 3954 TEST(neon_ld3_q) { 3955 SETUP(); 3956 3957 uint8_t src[64 + 4]; 3958 for (unsigned i = 0; i < sizeof(src); i++) { 3959 src[i] = i; 3960 } 3961 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3962 3963 START(); 3964 __ Mov(x17, src_base); 3965 __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17)); 3966 __ Add(x17, x17, 1); 3967 __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17)); 3968 __ Add(x17, x17, 1); 3969 __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x17)); 3970 __ Add(x17, x17, 1); 3971 __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x17)); 3972 __ Add(x17, x17, 1); 3973 __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17)); 3974 END(); 3975 3976 RUN(); 3977 3978 ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2); 3979 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3); 3980 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4); 3981 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5); 3982 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6); 3983 ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7); 3984 ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8); 3985 ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9); 3986 ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10); 3987 ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11); 3988 ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12); 3989 ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13); 3990 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31); 3991 ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0); 3992 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1); 3993 3994 TEARDOWN(); 3995 } 3996 3997 3998 TEST(neon_ld3_q_postindex) { 3999 SETUP(); 4000 4001 uint8_t src[64 + 4]; 4002 for (unsigned i = 0; i < sizeof(src); i++) { 4003 src[i] = i; 4004 } 4005 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4006 4007 START(); 4008 __ Mov(x17, src_base); 4009 __ Mov(x18, src_base + 1); 4010 __ Mov(x19, src_base + 2); 4011 __ Mov(x20, src_base + 3); 4012 __ Mov(x21, src_base + 4); 4013 __ Mov(x22, 1); 4014 4015 __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17, x22, PostIndex)); 4016 __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x18, 48, PostIndex)); 4017 __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x19, 48, PostIndex)); 4018 __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x20, 48, PostIndex)); 4019 __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x21, 48, PostIndex)); 4020 END(); 4021 4022 RUN(); 4023 4024 ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2); 4025 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3); 4026 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4); 4027 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5); 4028 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6); 4029 ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7); 4030 ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8); 4031 ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9); 4032 ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10); 4033 ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11); 4034 ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12); 4035 ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13); 4036 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31); 4037 ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0); 4038 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1); 4039 4040 ASSERT_EQUAL_64(src_base + 1, x17); 4041 ASSERT_EQUAL_64(src_base + 1 + 48, x18); 4042 ASSERT_EQUAL_64(src_base + 2 + 48, x19); 4043 ASSERT_EQUAL_64(src_base + 3 + 48, x20); 4044 ASSERT_EQUAL_64(src_base + 4 + 48, x21); 4045 4046 TEARDOWN(); 4047 } 4048 4049 4050 TEST(neon_ld3_lane) { 4051 SETUP(); 4052 4053 uint8_t src[64]; 4054 for (unsigned i = 0; i < sizeof(src); i++) { 4055 src[i] = i; 4056 } 4057 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4058 4059 START(); 4060 4061 // Test loading whole register by element. 4062 __ Mov(x17, src_base); 4063 for (int i = 15; i >= 0; i--) { 4064 __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17)); 4065 __ Add(x17, x17, 1); 4066 } 4067 4068 __ Mov(x17, src_base); 4069 for (int i = 7; i >= 0; i--) { 4070 __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x17)); 4071 __ Add(x17, x17, 1); 4072 } 4073 4074 __ Mov(x17, src_base); 4075 for (int i = 3; i >= 0; i--) { 4076 __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x17)); 4077 __ Add(x17, x17, 1); 4078 } 4079 4080 __ Mov(x17, src_base); 4081 for (int i = 1; i >= 0; i--) { 4082 __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x17)); 4083 __ Add(x17, x17, 1); 4084 } 4085 4086 // Test loading a single element into an initialised register. 4087 __ Mov(x17, src_base); 4088 __ Mov(x4, x17); 4089 __ Ldr(q12, MemOperand(x4, 16, PostIndex)); 4090 __ Ldr(q13, MemOperand(x4, 16, PostIndex)); 4091 __ Ldr(q14, MemOperand(x4)); 4092 __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x17)); 4093 __ Mov(x5, x17); 4094 __ Ldr(q15, MemOperand(x5, 16, PostIndex)); 4095 __ Ldr(q16, MemOperand(x5, 16, PostIndex)); 4096 __ Ldr(q17, MemOperand(x5)); 4097 __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x17)); 4098 __ Mov(x6, x17); 4099 __ Ldr(q18, MemOperand(x6, 16, PostIndex)); 4100 __ Ldr(q19, MemOperand(x6, 16, PostIndex)); 4101 __ Ldr(q20, MemOperand(x6)); 4102 __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x17)); 4103 __ Mov(x7, x17); 4104 __ Ldr(q21, MemOperand(x7, 16, PostIndex)); 4105 __ Ldr(q22, MemOperand(x7, 16, PostIndex)); 4106 __ Ldr(q23, MemOperand(x7)); 4107 __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x17)); 4108 4109 END(); 4110 4111 RUN(); 4112 4113 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0); 4114 ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1); 4115 ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2); 4116 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q3); 4117 ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q4); 4118 ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q5); 4119 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q6); 4120 ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q7); 4121 ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q8); 4122 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q9); 4123 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q10); 4124 ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q11); 4125 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12); 4126 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13); 4127 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14); 4128 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15); 4129 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16); 4130 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17); 4131 4132 TEARDOWN(); 4133 } 4134 4135 4136 TEST(neon_ld3_lane_postindex) { 4137 SETUP(); 4138 4139 uint8_t src[64]; 4140 for (unsigned i = 0; i < sizeof(src); i++) { 4141 src[i] = i; 4142 } 4143 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4144 4145 START(); 4146 4147 // Test loading whole register by element. 4148 __ Mov(x17, src_base); 4149 __ Mov(x18, src_base); 4150 __ Mov(x19, src_base); 4151 __ Mov(x20, src_base); 4152 __ Mov(x21, src_base); 4153 __ Mov(x22, src_base); 4154 __ Mov(x23, src_base); 4155 __ Mov(x24, src_base); 4156 for (int i = 15; i >= 0; i--) { 4157 __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17, 3, PostIndex)); 4158 } 4159 4160 for (int i = 7; i >= 0; i--) { 4161 __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x18, 6, PostIndex)); 4162 } 4163 4164 for (int i = 3; i >= 0; i--) { 4165 __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x19, 12, PostIndex)); 4166 } 4167 4168 for (int i = 1; i >= 0; i--) { 4169 __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x20, 24, PostIndex)); 4170 } 4171 4172 4173 // Test loading a single element into an initialised register. 4174 __ Mov(x25, 1); 4175 __ Mov(x4, x21); 4176 __ Ldr(q12, MemOperand(x4, 16, PostIndex)); 4177 __ Ldr(q13, MemOperand(x4, 16, PostIndex)); 4178 __ Ldr(q14, MemOperand(x4)); 4179 __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x21, x25, PostIndex)); 4180 __ Add(x25, x25, 1); 4181 4182 __ Mov(x5, x22); 4183 __ Ldr(q15, MemOperand(x5, 16, PostIndex)); 4184 __ Ldr(q16, MemOperand(x5, 16, PostIndex)); 4185 __ Ldr(q17, MemOperand(x5)); 4186 __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x22, x25, PostIndex)); 4187 __ Add(x25, x25, 1); 4188 4189 __ Mov(x6, x23); 4190 __ Ldr(q18, MemOperand(x6, 16, PostIndex)); 4191 __ Ldr(q19, MemOperand(x6, 16, PostIndex)); 4192 __ Ldr(q20, MemOperand(x6)); 4193 __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x23, x25, PostIndex)); 4194 __ Add(x25, x25, 1); 4195 4196 __ Mov(x7, x24); 4197 __ Ldr(q21, MemOperand(x7, 16, PostIndex)); 4198 __ Ldr(q22, MemOperand(x7, 16, PostIndex)); 4199 __ Ldr(q23, MemOperand(x7)); 4200 __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x24, x25, PostIndex)); 4201 4202 END(); 4203 4204 RUN(); 4205 4206 ASSERT_EQUAL_128(0x000306090c0f1215, 0x181b1e2124272a2d, q0); 4207 ASSERT_EQUAL_128(0x0104070a0d101316, 0x191c1f2225282b2e, q1); 4208 ASSERT_EQUAL_128(0x0205080b0e111417, 0x1a1d202326292c2f, q2); 4209 ASSERT_EQUAL_128(0x010007060d0c1312, 0x19181f1e25242b2a, q3); 4210 ASSERT_EQUAL_128(0x030209080f0e1514, 0x1b1a212027262d2c, q4); 4211 ASSERT_EQUAL_128(0x05040b0a11101716, 0x1d1c232229282f2e, q5); 4212 ASSERT_EQUAL_128(0x030201000f0e0d0c, 0x1b1a191827262524, q6); 4213 ASSERT_EQUAL_128(0x0706050413121110, 0x1f1e1d1c2b2a2928, q7); 4214 ASSERT_EQUAL_128(0x0b0a090817161514, 0x232221202f2e2d2c, q8); 4215 ASSERT_EQUAL_128(0x0706050403020100, 0x1f1e1d1c1b1a1918, q9); 4216 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2726252423222120, q10); 4217 ASSERT_EQUAL_128(0x1716151413121110, 0x2f2e2d2c2b2a2928, q11); 4218 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12); 4219 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13); 4220 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14); 4221 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15); 4222 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16); 4223 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17); 4224 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q18); 4225 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q19); 4226 ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q20); 4227 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q21); 4228 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q22); 4229 ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q23); 4230 4231 ASSERT_EQUAL_64(src_base + 48, x17); 4232 ASSERT_EQUAL_64(src_base + 48, x18); 4233 ASSERT_EQUAL_64(src_base + 48, x19); 4234 ASSERT_EQUAL_64(src_base + 48, x20); 4235 ASSERT_EQUAL_64(src_base + 1, x21); 4236 ASSERT_EQUAL_64(src_base + 2, x22); 4237 ASSERT_EQUAL_64(src_base + 3, x23); 4238 ASSERT_EQUAL_64(src_base + 4, x24); 4239 4240 TEARDOWN(); 4241 } 4242 4243 4244 TEST(neon_ld3_alllanes) { 4245 SETUP(); 4246 4247 uint8_t src[64]; 4248 for (unsigned i = 0; i < sizeof(src); i++) { 4249 src[i] = i; 4250 } 4251 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4252 4253 START(); 4254 __ Mov(x17, src_base + 1); 4255 __ Mov(x18, 1); 4256 __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17)); 4257 __ Add(x17, x17, 3); 4258 __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17)); 4259 __ Add(x17, x17, 1); 4260 __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17)); 4261 __ Add(x17, x17, 1); 4262 __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17)); 4263 __ Add(x17, x17, 6); 4264 __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17)); 4265 __ Add(x17, x17, 1); 4266 __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17)); 4267 __ Add(x17, x17, 12); 4268 __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17)); 4269 END(); 4270 4271 RUN(); 4272 4273 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0); 4274 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1); 4275 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2); 4276 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3); 4277 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4); 4278 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5); 4279 ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6); 4280 ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7); 4281 ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8); 4282 ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9); 4283 ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10); 4284 ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11); 4285 ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12); 4286 ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13); 4287 ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14); 4288 ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15); 4289 ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16); 4290 ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17); 4291 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18); 4292 ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19); 4293 ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20); 4294 4295 TEARDOWN(); 4296 } 4297 4298 4299 TEST(neon_ld3_alllanes_postindex) { 4300 SETUP(); 4301 4302 uint8_t src[64]; 4303 for (unsigned i = 0; i < sizeof(src); i++) { 4304 src[i] = i; 4305 } 4306 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4307 __ Mov(x17, src_base + 1); 4308 __ Mov(x18, 1); 4309 4310 START(); 4311 __ Mov(x17, src_base + 1); 4312 __ Mov(x18, 1); 4313 __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17, 3, PostIndex)); 4314 __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17, x18, PostIndex)); 4315 __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17, x18, PostIndex)); 4316 __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17, 6, PostIndex)); 4317 __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17, x18, PostIndex)); 4318 __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17, 12, PostIndex)); 4319 __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17, 24, PostIndex)); 4320 END(); 4321 4322 RUN(); 4323 4324 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0); 4325 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1); 4326 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2); 4327 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3); 4328 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4); 4329 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5); 4330 ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6); 4331 ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7); 4332 ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8); 4333 ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9); 4334 ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10); 4335 ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11); 4336 ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12); 4337 ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13); 4338 ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14); 4339 ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15); 4340 ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16); 4341 ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17); 4342 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18); 4343 ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19); 4344 ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20); 4345 4346 TEARDOWN(); 4347 } 4348 4349 4350 TEST(neon_ld4_d) { 4351 SETUP(); 4352 4353 uint8_t src[64 + 4]; 4354 for (unsigned i = 0; i < sizeof(src); i++) { 4355 src[i] = i; 4356 } 4357 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4358 4359 START(); 4360 __ Mov(x17, src_base); 4361 __ Ld4(v2.V8B(), v3.V8B(), v4.V8B(), v5.V8B(), MemOperand(x17)); 4362 __ Add(x17, x17, 1); 4363 __ Ld4(v6.V8B(), v7.V8B(), v8.V8B(), v9.V8B(), MemOperand(x17)); 4364 __ Add(x17, x17, 1); 4365 __ Ld4(v10.V4H(), v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x17)); 4366 __ Add(x17, x17, 1); 4367 __ Ld4(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17)); 4368 END(); 4369 4370 RUN(); 4371 4372 ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2); 4373 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3); 4374 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4); 4375 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5); 4376 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6); 4377 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7); 4378 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8); 4379 ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9); 4380 ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10); 4381 ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11); 4382 ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12); 4383 ASSERT_EQUAL_128(0, 0x2120191811100908, q13); 4384 ASSERT_EQUAL_128(0, 0x1615141306050403, q30); 4385 ASSERT_EQUAL_128(0, 0x1a1918170a090807, q31); 4386 ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q0); 4387 ASSERT_EQUAL_128(0, 0x2221201f1211100f, q1); 4388 4389 TEARDOWN(); 4390 } 4391 4392 4393 TEST(neon_ld4_d_postindex) { 4394 SETUP(); 4395 4396 uint8_t src[32 + 4]; 4397 for (unsigned i = 0; i < sizeof(src); i++) { 4398 src[i] = i; 4399 } 4400 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4401 4402 START(); 4403 __ Mov(x17, src_base); 4404 __ Mov(x18, src_base + 1); 4405 __ Mov(x19, src_base + 2); 4406 __ Mov(x20, src_base + 3); 4407 __ Mov(x21, src_base + 4); 4408 __ Mov(x22, 1); 4409 __ Ld4(v2.V8B(), 4410 v3.V8B(), 4411 v4.V8B(), 4412 v5.V8B(), 4413 MemOperand(x17, x22, PostIndex)); 4414 __ Ld4(v6.V8B(), 4415 v7.V8B(), 4416 v8.V8B(), 4417 v9.V8B(), 4418 MemOperand(x18, 32, PostIndex)); 4419 __ Ld4(v10.V4H(), 4420 v11.V4H(), 4421 v12.V4H(), 4422 v13.V4H(), 4423 MemOperand(x19, 32, PostIndex)); 4424 __ Ld4(v14.V2S(), 4425 v15.V2S(), 4426 v16.V2S(), 4427 v17.V2S(), 4428 MemOperand(x20, 32, PostIndex)); 4429 __ Ld4(v30.V2S(), 4430 v31.V2S(), 4431 v0.V2S(), 4432 v1.V2S(), 4433 MemOperand(x21, 32, PostIndex)); 4434 END(); 4435 4436 RUN(); 4437 4438 ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2); 4439 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3); 4440 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4); 4441 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5); 4442 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6); 4443 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7); 4444 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8); 4445 ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9); 4446 ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10); 4447 ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11); 4448 ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12); 4449 ASSERT_EQUAL_128(0, 0x2120191811100908, q13); 4450 ASSERT_EQUAL_128(0, 0x1615141306050403, q14); 4451 ASSERT_EQUAL_128(0, 0x1a1918170a090807, q15); 4452 ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q16); 4453 ASSERT_EQUAL_128(0, 0x2221201f1211100f, q17); 4454 ASSERT_EQUAL_128(0, 0x1716151407060504, q30); 4455 ASSERT_EQUAL_128(0, 0x1b1a19180b0a0908, q31); 4456 ASSERT_EQUAL_128(0, 0x1f1e1d1c0f0e0d0c, q0); 4457 ASSERT_EQUAL_128(0, 0x2322212013121110, q1); 4458 4459 4460 ASSERT_EQUAL_64(src_base + 1, x17); 4461 ASSERT_EQUAL_64(src_base + 1 + 32, x18); 4462 ASSERT_EQUAL_64(src_base + 2 + 32, x19); 4463 ASSERT_EQUAL_64(src_base + 3 + 32, x20); 4464 ASSERT_EQUAL_64(src_base + 4 + 32, x21); 4465 TEARDOWN(); 4466 } 4467 4468 4469 TEST(neon_ld4_q) { 4470 SETUP(); 4471 4472 uint8_t src[64 + 4]; 4473 for (unsigned i = 0; i < sizeof(src); i++) { 4474 src[i] = i; 4475 } 4476 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4477 4478 START(); 4479 __ Mov(x17, src_base); 4480 __ Ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17)); 4481 __ Add(x17, x17, 1); 4482 __ Ld4(v6.V16B(), v7.V16B(), v8.V16B(), v9.V16B(), MemOperand(x17)); 4483 __ Add(x17, x17, 1); 4484 __ Ld4(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(), MemOperand(x17)); 4485 __ Add(x17, x17, 1); 4486 __ Ld4(v14.V4S(), v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17)); 4487 __ Add(x17, x17, 1); 4488 __ Ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x17)); 4489 END(); 4490 4491 RUN(); 4492 4493 ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2); 4494 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3); 4495 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4); 4496 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5); 4497 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6); 4498 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7); 4499 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8); 4500 ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9); 4501 ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10); 4502 ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11); 4503 ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12); 4504 ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13); 4505 ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14); 4506 ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15); 4507 ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16); 4508 ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17); 4509 ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q18); 4510 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q19); 4511 ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q20); 4512 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q21); 4513 TEARDOWN(); 4514 } 4515 4516 4517 TEST(neon_ld4_q_postindex) { 4518 SETUP(); 4519 4520 uint8_t src[64 + 4]; 4521 for (unsigned i = 0; i < sizeof(src); i++) { 4522 src[i] = i; 4523 } 4524 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4525 4526 START(); 4527 __ Mov(x17, src_base); 4528 __ Mov(x18, src_base + 1); 4529 __ Mov(x19, src_base + 2); 4530 __ Mov(x20, src_base + 3); 4531 __ Mov(x21, src_base + 4); 4532 __ Mov(x22, 1); 4533 4534 __ Ld4(v2.V16B(), 4535 v3.V16B(), 4536 v4.V16B(), 4537 v5.V16B(), 4538 MemOperand(x17, x22, PostIndex)); 4539 __ Ld4(v6.V16B(), 4540 v7.V16B(), 4541 v8.V16B(), 4542 v9.V16B(), 4543 MemOperand(x18, 64, PostIndex)); 4544 __ Ld4(v10.V8H(), 4545 v11.V8H(), 4546 v12.V8H(), 4547 v13.V8H(), 4548 MemOperand(x19, 64, PostIndex)); 4549 __ Ld4(v14.V4S(), 4550 v15.V4S(), 4551 v16.V4S(), 4552 v17.V4S(), 4553 MemOperand(x20, 64, PostIndex)); 4554 __ Ld4(v30.V2D(), 4555 v31.V2D(), 4556 v0.V2D(), 4557 v1.V2D(), 4558 MemOperand(x21, 64, PostIndex)); 4559 END(); 4560 4561 RUN(); 4562 4563 ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2); 4564 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3); 4565 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4); 4566 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5); 4567 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6); 4568 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7); 4569 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8); 4570 ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9); 4571 ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10); 4572 ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11); 4573 ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12); 4574 ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13); 4575 ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14); 4576 ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15); 4577 ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16); 4578 ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17); 4579 ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q30); 4580 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q31); 4581 ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q0); 4582 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q1); 4583 4584 4585 ASSERT_EQUAL_64(src_base + 1, x17); 4586 ASSERT_EQUAL_64(src_base + 1 + 64, x18); 4587 ASSERT_EQUAL_64(src_base + 2 + 64, x19); 4588 ASSERT_EQUAL_64(src_base + 3 + 64, x20); 4589 ASSERT_EQUAL_64(src_base + 4 + 64, x21); 4590 4591 TEARDOWN(); 4592 } 4593 4594 4595 TEST(neon_ld4_lane) { 4596 SETUP(); 4597 4598 uint8_t src[64]; 4599 for (unsigned i = 0; i < sizeof(src); i++) { 4600 src[i] = i; 4601 } 4602 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4603 4604 START(); 4605 4606 // Test loading whole register by element. 4607 __ Mov(x17, src_base); 4608 for (int i = 15; i >= 0; i--) { 4609 __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17)); 4610 __ Add(x17, x17, 1); 4611 } 4612 4613 __ Mov(x17, src_base); 4614 for (int i = 7; i >= 0; i--) { 4615 __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x17)); 4616 __ Add(x17, x17, 1); 4617 } 4618 4619 __ Mov(x17, src_base); 4620 for (int i = 3; i >= 0; i--) { 4621 __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x17)); 4622 __ Add(x17, x17, 1); 4623 } 4624 4625 __ Mov(x17, src_base); 4626 for (int i = 1; i >= 0; i--) { 4627 __ Ld4(v12.D(), v13.D(), v14.D(), v15.D(), i, MemOperand(x17)); 4628 __ Add(x17, x17, 1); 4629 } 4630 4631 // Test loading a single element into an initialised register. 4632 __ Mov(x17, src_base); 4633 __ Mov(x4, x17); 4634 __ Ldr(q16, MemOperand(x4, 16, PostIndex)); 4635 __ Ldr(q17, MemOperand(x4, 16, PostIndex)); 4636 __ Ldr(q18, MemOperand(x4, 16, PostIndex)); 4637 __ Ldr(q19, MemOperand(x4)); 4638 __ Ld4(v16.B(), v17.B(), v18.B(), v19.B(), 4, MemOperand(x17)); 4639 4640 __ Mov(x5, x17); 4641 __ Ldr(q20, MemOperand(x5, 16, PostIndex)); 4642 __ Ldr(q21, MemOperand(x5, 16, PostIndex)); 4643 __ Ldr(q22, MemOperand(x5, 16, PostIndex)); 4644 __ Ldr(q23, MemOperand(x5)); 4645 __ Ld4(v20.H(), v21.H(), v22.H(), v23.H(), 3, MemOperand(x17)); 4646 4647 __ Mov(x6, x17); 4648 __ Ldr(q24, MemOperand(x6, 16, PostIndex)); 4649 __ Ldr(q25, MemOperand(x6, 16, PostIndex)); 4650 __ Ldr(q26, MemOperand(x6, 16, PostIndex)); 4651 __ Ldr(q27, MemOperand(x6)); 4652 __ Ld4(v24.S(), v25.S(), v26.S(), v27.S(), 2, MemOperand(x17)); 4653 4654 __ Mov(x7, x17); 4655 __ Ldr(q28, MemOperand(x7, 16, PostIndex)); 4656 __ Ldr(q29, MemOperand(x7, 16, PostIndex)); 4657 __ Ldr(q30, MemOperand(x7, 16, PostIndex)); 4658 __ Ldr(q31, MemOperand(x7)); 4659 __ Ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x17)); 4660 4661 END(); 4662 4663 RUN(); 4664 4665 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0); 4666 ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1); 4667 ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2); 4668 ASSERT_EQUAL_128(0x030405060708090a, 0x0b0c0d0e0f101112, q3); 4669 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q4); 4670 ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q5); 4671 ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q6); 4672 ASSERT_EQUAL_128(0x0706080709080a09, 0x0b0a0c0b0d0c0e0d, q7); 4673 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q8); 4674 ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q9); 4675 ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q10); 4676 ASSERT_EQUAL_128(0x0f0e0d0c100f0e0d, 0x11100f0e1211100f, q11); 4677 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q12); 4678 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q13); 4679 ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q14); 4680 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x201f1e1d1c1b1a19, q15); 4681 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16); 4682 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17); 4683 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18); 4684 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19); 4685 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20); 4686 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21); 4687 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22); 4688 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23); 4689 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24); 4690 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25); 4691 ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26); 4692 ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27); 4693 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28); 4694 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29); 4695 ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30); 4696 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31); 4697 4698 TEARDOWN(); 4699 } 4700 4701 4702 TEST(neon_ld4_lane_postindex) { 4703 SETUP(); 4704 4705 uint8_t src[64]; 4706 for (unsigned i = 0; i < sizeof(src); i++) { 4707 src[i] = i; 4708 } 4709 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4710 4711 START(); 4712 4713 // Test loading whole register by element. 4714 __ Mov(x17, src_base); 4715 for (int i = 15; i >= 0; i--) { 4716 __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17, 4, PostIndex)); 4717 } 4718 4719 __ Mov(x18, src_base); 4720 for (int i = 7; i >= 0; i--) { 4721 __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x18, 8, PostIndex)); 4722 } 4723 4724 __ Mov(x19, src_base); 4725 for (int i = 3; i >= 0; i--) { 4726 __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x19, 16, PostIndex)); 4727 } 4728 4729 __ Mov(x20, src_base); 4730 for (int i = 1; i >= 0; i--) { 4731 __ Ld4(v12.D(), 4732 v13.D(), 4733 v14.D(), 4734 v15.D(), 4735 i, 4736 MemOperand(x20, 32, PostIndex)); 4737 } 4738 4739 // Test loading a single element into an initialised register. 4740 __ Mov(x25, 1); 4741 __ Mov(x21, src_base); 4742 __ Mov(x22, src_base); 4743 __ Mov(x23, src_base); 4744 __ Mov(x24, src_base); 4745 4746 __ Mov(x4, x21); 4747 __ Ldr(q16, MemOperand(x4, 16, PostIndex)); 4748 __ Ldr(q17, MemOperand(x4, 16, PostIndex)); 4749 __ Ldr(q18, MemOperand(x4, 16, PostIndex)); 4750 __ Ldr(q19, MemOperand(x4)); 4751 __ Ld4(v16.B(), 4752 v17.B(), 4753 v18.B(), 4754 v19.B(), 4755 4, 4756 MemOperand(x21, x25, PostIndex)); 4757 __ Add(x25, x25, 1); 4758 4759 __ Mov(x5, x22); 4760 __ Ldr(q20, MemOperand(x5, 16, PostIndex)); 4761 __ Ldr(q21, MemOperand(x5, 16, PostIndex)); 4762 __ Ldr(q22, MemOperand(x5, 16, PostIndex)); 4763 __ Ldr(q23, MemOperand(x5)); 4764 __ Ld4(v20.H(), 4765 v21.H(), 4766 v22.H(), 4767 v23.H(), 4768 3, 4769 MemOperand(x22, x25, PostIndex)); 4770 __ Add(x25, x25, 1); 4771 4772 __ Mov(x6, x23); 4773 __ Ldr(q24, MemOperand(x6, 16, PostIndex)); 4774 __ Ldr(q25, MemOperand(x6, 16, PostIndex)); 4775 __ Ldr(q26, MemOperand(x6, 16, PostIndex)); 4776 __ Ldr(q27, MemOperand(x6)); 4777 __ Ld4(v24.S(), 4778 v25.S(), 4779 v26.S(), 4780 v27.S(), 4781 2, 4782 MemOperand(x23, x25, PostIndex)); 4783 __ Add(x25, x25, 1); 4784 4785 __ Mov(x7, x24); 4786 __ Ldr(q28, MemOperand(x7, 16, PostIndex)); 4787 __ Ldr(q29, MemOperand(x7, 16, PostIndex)); 4788 __ Ldr(q30, MemOperand(x7, 16, PostIndex)); 4789 __ Ldr(q31, MemOperand(x7)); 4790 __ Ld4(v28.D(), 4791 v29.D(), 4792 v30.D(), 4793 v31.D(), 4794 1, 4795 MemOperand(x24, x25, PostIndex)); 4796 4797 END(); 4798 4799 RUN(); 4800 4801 ASSERT_EQUAL_128(0x0004080c1014181c, 0x2024282c3034383c, q0); 4802 ASSERT_EQUAL_128(0x0105090d1115191d, 0x2125292d3135393d, q1); 4803 ASSERT_EQUAL_128(0x02060a0e12161a1e, 0x22262a2e32363a3e, q2); 4804 ASSERT_EQUAL_128(0x03070b0f13171b1f, 0x23272b2f33373b3f, q3); 4805 ASSERT_EQUAL_128(0x0100090811101918, 0x2120292831303938, q4); 4806 ASSERT_EQUAL_128(0x03020b0a13121b1a, 0x23222b2a33323b3a, q5); 4807 ASSERT_EQUAL_128(0x05040d0c15141d1c, 0x25242d2c35343d3c, q6); 4808 ASSERT_EQUAL_128(0x07060f0e17161f1e, 0x27262f2e37363f3e, q7); 4809 ASSERT_EQUAL_128(0x0302010013121110, 0x2322212033323130, q8); 4810 ASSERT_EQUAL_128(0x0706050417161514, 0x2726252437363534, q9); 4811 ASSERT_EQUAL_128(0x0b0a09081b1a1918, 0x2b2a29283b3a3938, q10); 4812 ASSERT_EQUAL_128(0x0f0e0d0c1f1e1d1c, 0x2f2e2d2c3f3e3d3c, q11); 4813 ASSERT_EQUAL_128(0x0706050403020100, 0x2726252423222120, q12); 4814 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2f2e2d2c2b2a2928, q13); 4815 ASSERT_EQUAL_128(0x1716151413121110, 0x3736353433323130, q14); 4816 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3f3e3d3c3b3a3938, q15); 4817 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16); 4818 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17); 4819 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18); 4820 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19); 4821 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20); 4822 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21); 4823 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22); 4824 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23); 4825 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24); 4826 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25); 4827 ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26); 4828 ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27); 4829 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28); 4830 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29); 4831 ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30); 4832 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31); 4833 4834 ASSERT_EQUAL_64(src_base + 64, x17); 4835 ASSERT_EQUAL_64(src_base + 64, x18); 4836 ASSERT_EQUAL_64(src_base + 64, x19); 4837 ASSERT_EQUAL_64(src_base + 64, x20); 4838 ASSERT_EQUAL_64(src_base + 1, x21); 4839 ASSERT_EQUAL_64(src_base + 2, x22); 4840 ASSERT_EQUAL_64(src_base + 3, x23); 4841 ASSERT_EQUAL_64(src_base + 4, x24); 4842 4843 TEARDOWN(); 4844 } 4845 4846 4847 TEST(neon_ld4_alllanes) { 4848 SETUP(); 4849 4850 uint8_t src[64]; 4851 for (unsigned i = 0; i < sizeof(src); i++) { 4852 src[i] = i; 4853 } 4854 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4855 4856 START(); 4857 __ Mov(x17, src_base + 1); 4858 __ Mov(x18, 1); 4859 __ Ld4r(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x17)); 4860 __ Add(x17, x17, 4); 4861 __ Ld4r(v4.V16B(), v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17)); 4862 __ Add(x17, x17, 1); 4863 __ Ld4r(v8.V4H(), v9.V4H(), v10.V4H(), v11.V4H(), MemOperand(x17)); 4864 __ Add(x17, x17, 1); 4865 __ Ld4r(v12.V8H(), v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x17)); 4866 __ Add(x17, x17, 8); 4867 __ Ld4r(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17)); 4868 __ Add(x17, x17, 1); 4869 __ Ld4r(v20.V4S(), v21.V4S(), v22.V4S(), v23.V4S(), MemOperand(x17)); 4870 __ Add(x17, x17, 16); 4871 __ Ld4r(v24.V2D(), v25.V2D(), v26.V2D(), v27.V2D(), MemOperand(x17)); 4872 4873 4874 END(); 4875 4876 RUN(); 4877 4878 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0); 4879 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1); 4880 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2); 4881 ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3); 4882 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4); 4883 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5); 4884 ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6); 4885 ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7); 4886 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8); 4887 ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9); 4888 ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10); 4889 ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11); 4890 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12); 4891 ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13); 4892 ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14); 4893 ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15); 4894 ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16); 4895 ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17); 4896 ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18); 4897 ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19); 4898 ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20); 4899 ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21); 4900 ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22); 4901 ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23); 4902 ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24); 4903 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25); 4904 ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26); 4905 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27); 4906 4907 TEARDOWN(); 4908 } 4909 4910 4911 TEST(neon_ld4_alllanes_postindex) { 4912 SETUP(); 4913 4914 uint8_t src[64]; 4915 for (unsigned i = 0; i < sizeof(src); i++) { 4916 src[i] = i; 4917 } 4918 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4919 __ Mov(x17, src_base + 1); 4920 __ Mov(x18, 1); 4921 4922 START(); 4923 __ Mov(x17, src_base + 1); 4924 __ Mov(x18, 1); 4925 __ Ld4r(v0.V8B(), 4926 v1.V8B(), 4927 v2.V8B(), 4928 v3.V8B(), 4929 MemOperand(x17, 4, PostIndex)); 4930 __ Ld4r(v4.V16B(), 4931 v5.V16B(), 4932 v6.V16B(), 4933 v7.V16B(), 4934 MemOperand(x17, x18, PostIndex)); 4935 __ Ld4r(v8.V4H(), 4936 v9.V4H(), 4937 v10.V4H(), 4938 v11.V4H(), 4939 MemOperand(x17, x18, PostIndex)); 4940 __ Ld4r(v12.V8H(), 4941 v13.V8H(), 4942 v14.V8H(), 4943 v15.V8H(), 4944 MemOperand(x17, 8, PostIndex)); 4945 __ Ld4r(v16.V2S(), 4946 v17.V2S(), 4947 v18.V2S(), 4948 v19.V2S(), 4949 MemOperand(x17, x18, PostIndex)); 4950 __ Ld4r(v20.V4S(), 4951 v21.V4S(), 4952 v22.V4S(), 4953 v23.V4S(), 4954 MemOperand(x17, 16, PostIndex)); 4955 __ Ld4r(v24.V2D(), 4956 v25.V2D(), 4957 v26.V2D(), 4958 v27.V2D(), 4959 MemOperand(x17, 32, PostIndex)); 4960 END(); 4961 4962 RUN(); 4963 4964 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0); 4965 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1); 4966 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2); 4967 ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3); 4968 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4); 4969 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5); 4970 ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6); 4971 ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7); 4972 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8); 4973 ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9); 4974 ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10); 4975 ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11); 4976 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12); 4977 ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13); 4978 ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14); 4979 ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15); 4980 ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16); 4981 ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17); 4982 ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18); 4983 ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19); 4984 ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20); 4985 ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21); 4986 ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22); 4987 ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23); 4988 ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24); 4989 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25); 4990 ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26); 4991 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27); 4992 ASSERT_EQUAL_64(src_base + 64, x17); 4993 4994 TEARDOWN(); 4995 } 4996 4997 4998 TEST(neon_st1_lane) { 4999 SETUP(); 5000 5001 uint8_t src[64]; 5002 for (unsigned i = 0; i < sizeof(src); i++) { 5003 src[i] = i; 5004 } 5005 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5006 5007 START(); 5008 __ Mov(x17, src_base); 5009 __ Mov(x18, -16); 5010 __ Ldr(q0, MemOperand(x17)); 5011 5012 for (int i = 15; i >= 0; i--) { 5013 __ St1(v0.B(), i, MemOperand(x17)); 5014 __ Add(x17, x17, 1); 5015 } 5016 __ Ldr(q1, MemOperand(x17, x18)); 5017 5018 for (int i = 7; i >= 0; i--) { 5019 __ St1(v0.H(), i, MemOperand(x17)); 5020 __ Add(x17, x17, 2); 5021 } 5022 __ Ldr(q2, MemOperand(x17, x18)); 5023 5024 for (int i = 3; i >= 0; i--) { 5025 __ St1(v0.S(), i, MemOperand(x17)); 5026 __ Add(x17, x17, 4); 5027 } 5028 __ Ldr(q3, MemOperand(x17, x18)); 5029 5030 for (int i = 1; i >= 0; i--) { 5031 __ St1(v0.D(), i, MemOperand(x17)); 5032 __ Add(x17, x17, 8); 5033 } 5034 __ Ldr(q4, MemOperand(x17, x18)); 5035 5036 END(); 5037 5038 RUN(); 5039 5040 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1); 5041 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2); 5042 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3); 5043 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4); 5044 5045 TEARDOWN(); 5046 } 5047 5048 5049 TEST(neon_st2_lane) { 5050 SETUP(); 5051 5052 // Struct size * addressing modes * element sizes * vector size. 5053 uint8_t dst[2 * 2 * 4 * 16]; 5054 memset(dst, 0, sizeof(dst)); 5055 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 5056 5057 START(); 5058 __ Mov(x17, dst_base); 5059 __ Mov(x18, dst_base); 5060 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f); 5061 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f); 5062 5063 // Test B stores with and without post index. 5064 for (int i = 15; i >= 0; i--) { 5065 __ St2(v0.B(), v1.B(), i, MemOperand(x18)); 5066 __ Add(x18, x18, 2); 5067 } 5068 for (int i = 15; i >= 0; i--) { 5069 __ St2(v0.B(), v1.B(), i, MemOperand(x18, 2, PostIndex)); 5070 } 5071 __ Ldr(q2, MemOperand(x17, 0 * 16)); 5072 __ Ldr(q3, MemOperand(x17, 1 * 16)); 5073 __ Ldr(q4, MemOperand(x17, 2 * 16)); 5074 __ Ldr(q5, MemOperand(x17, 3 * 16)); 5075 5076 // Test H stores with and without post index. 5077 __ Mov(x0, 4); 5078 for (int i = 7; i >= 0; i--) { 5079 __ St2(v0.H(), v1.H(), i, MemOperand(x18)); 5080 __ Add(x18, x18, 4); 5081 } 5082 for (int i = 7; i >= 0; i--) { 5083 __ St2(v0.H(), v1.H(), i, MemOperand(x18, x0, PostIndex)); 5084 } 5085 __ Ldr(q6, MemOperand(x17, 4 * 16)); 5086 __ Ldr(q7, MemOperand(x17, 5 * 16)); 5087 __ Ldr(q16, MemOperand(x17, 6 * 16)); 5088 __ Ldr(q17, MemOperand(x17, 7 * 16)); 5089 5090 // Test S stores with and without post index. 5091 for (int i = 3; i >= 0; i--) { 5092 __ St2(v0.S(), v1.S(), i, MemOperand(x18)); 5093 __ Add(x18, x18, 8); 5094 } 5095 for (int i = 3; i >= 0; i--) { 5096 __ St2(v0.S(), v1.S(), i, MemOperand(x18, 8, PostIndex)); 5097 } 5098 __ Ldr(q18, MemOperand(x17, 8 * 16)); 5099 __ Ldr(q19, MemOperand(x17, 9 * 16)); 5100 __ Ldr(q20, MemOperand(x17, 10 * 16)); 5101 __ Ldr(q21, MemOperand(x17, 11 * 16)); 5102 5103 // Test D stores with and without post index. 5104 __ Mov(x0, 16); 5105 __ St2(v0.D(), v1.D(), 1, MemOperand(x18)); 5106 __ Add(x18, x18, 16); 5107 __ St2(v0.D(), v1.D(), 0, MemOperand(x18, 16, PostIndex)); 5108 __ St2(v0.D(), v1.D(), 1, MemOperand(x18, x0, PostIndex)); 5109 __ St2(v0.D(), v1.D(), 0, MemOperand(x18, x0, PostIndex)); 5110 __ Ldr(q22, MemOperand(x17, 12 * 16)); 5111 __ Ldr(q23, MemOperand(x17, 13 * 16)); 5112 __ Ldr(q24, MemOperand(x17, 14 * 16)); 5113 __ Ldr(q25, MemOperand(x17, 15 * 16)); 5114 END(); 5115 5116 RUN(); 5117 5118 ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q2); 5119 ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q3); 5120 ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q4); 5121 ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q5); 5122 5123 ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q6); 5124 ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q7); 5125 ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q16); 5126 ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q17); 5127 5128 ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q18); 5129 ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q19); 5130 ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q20); 5131 ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q21); 5132 5133 ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22); 5134 ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23); 5135 ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22); 5136 ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23); 5137 5138 TEARDOWN(); 5139 } 5140 5141 5142 TEST(neon_st3_lane) { 5143 SETUP(); 5144 5145 // Struct size * addressing modes * element sizes * vector size. 5146 uint8_t dst[3 * 2 * 4 * 16]; 5147 memset(dst, 0, sizeof(dst)); 5148 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 5149 5150 START(); 5151 __ Mov(x17, dst_base); 5152 __ Mov(x18, dst_base); 5153 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f); 5154 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f); 5155 __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f); 5156 5157 // Test B stores with and without post index. 5158 for (int i = 15; i >= 0; i--) { 5159 __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18)); 5160 __ Add(x18, x18, 3); 5161 } 5162 for (int i = 15; i >= 0; i--) { 5163 __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18, 3, PostIndex)); 5164 } 5165 __ Ldr(q3, MemOperand(x17, 0 * 16)); 5166 __ Ldr(q4, MemOperand(x17, 1 * 16)); 5167 __ Ldr(q5, MemOperand(x17, 2 * 16)); 5168 __ Ldr(q6, MemOperand(x17, 3 * 16)); 5169 __ Ldr(q7, MemOperand(x17, 4 * 16)); 5170 __ Ldr(q16, MemOperand(x17, 5 * 16)); 5171 5172 // Test H stores with and without post index. 5173 __ Mov(x0, 6); 5174 for (int i = 7; i >= 0; i--) { 5175 __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18)); 5176 __ Add(x18, x18, 6); 5177 } 5178 for (int i = 7; i >= 0; i--) { 5179 __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18, x0, PostIndex)); 5180 } 5181 __ Ldr(q17, MemOperand(x17, 6 * 16)); 5182 __ Ldr(q18, MemOperand(x17, 7 * 16)); 5183 __ Ldr(q19, MemOperand(x17, 8 * 16)); 5184 __ Ldr(q20, MemOperand(x17, 9 * 16)); 5185 __ Ldr(q21, MemOperand(x17, 10 * 16)); 5186 __ Ldr(q22, MemOperand(x17, 11 * 16)); 5187 5188 // Test S stores with and without post index. 5189 for (int i = 3; i >= 0; i--) { 5190 __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18)); 5191 __ Add(x18, x18, 12); 5192 } 5193 for (int i = 3; i >= 0; i--) { 5194 __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18, 12, PostIndex)); 5195 } 5196 __ Ldr(q23, MemOperand(x17, 12 * 16)); 5197 __ Ldr(q24, MemOperand(x17, 13 * 16)); 5198 __ Ldr(q25, MemOperand(x17, 14 * 16)); 5199 __ Ldr(q26, MemOperand(x17, 15 * 16)); 5200 __ Ldr(q27, MemOperand(x17, 16 * 16)); 5201 __ Ldr(q28, MemOperand(x17, 17 * 16)); 5202 5203 // Test D stores with and without post index. 5204 __ Mov(x0, 24); 5205 __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18)); 5206 __ Add(x18, x18, 24); 5207 __ St3(v0.D(), v1.D(), v2.D(), 0, MemOperand(x18, 24, PostIndex)); 5208 __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18, x0, PostIndex)); 5209 __ Ldr(q29, MemOperand(x17, 18 * 16)); 5210 __ Ldr(q30, MemOperand(x17, 19 * 16)); 5211 __ Ldr(q31, MemOperand(x17, 20 * 16)); 5212 END(); 5213 5214 RUN(); 5215 5216 ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q3); 5217 ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q4); 5218 ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q5); 5219 ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q6); 5220 ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q7); 5221 ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q16); 5222 5223 ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q17); 5224 ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q18); 5225 ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q19); 5226 ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q20); 5227 ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q21); 5228 ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q22); 5229 5230 ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q23); 5231 ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q24); 5232 ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q25); 5233 ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q26); 5234 ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q27); 5235 ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q28); 5236 5237 TEARDOWN(); 5238 } 5239 5240 5241 TEST(neon_st4_lane) { 5242 SETUP(); 5243 5244 // Struct size * element sizes * vector size. 5245 uint8_t dst[4 * 4 * 16]; 5246 memset(dst, 0, sizeof(dst)); 5247 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 5248 5249 START(); 5250 __ Mov(x17, dst_base); 5251 __ Mov(x18, dst_base); 5252 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f); 5253 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f); 5254 __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f); 5255 __ Movi(v3.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f); 5256 5257 // Test B stores without post index. 5258 for (int i = 15; i >= 0; i--) { 5259 __ St4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x18)); 5260 __ Add(x18, x18, 4); 5261 } 5262 __ Ldr(q4, MemOperand(x17, 0 * 16)); 5263 __ Ldr(q5, MemOperand(x17, 1 * 16)); 5264 __ Ldr(q6, MemOperand(x17, 2 * 16)); 5265 __ Ldr(q7, MemOperand(x17, 3 * 16)); 5266 5267 // Test H stores with post index. 5268 __ Mov(x0, 8); 5269 for (int i = 7; i >= 0; i--) { 5270 __ St4(v0.H(), v1.H(), v2.H(), v3.H(), i, MemOperand(x18, x0, PostIndex)); 5271 } 5272 __ Ldr(q16, MemOperand(x17, 4 * 16)); 5273 __ Ldr(q17, MemOperand(x17, 5 * 16)); 5274 __ Ldr(q18, MemOperand(x17, 6 * 16)); 5275 __ Ldr(q19, MemOperand(x17, 7 * 16)); 5276 5277 // Test S stores without post index. 5278 for (int i = 3; i >= 0; i--) { 5279 __ St4(v0.S(), v1.S(), v2.S(), v3.S(), i, MemOperand(x18)); 5280 __ Add(x18, x18, 16); 5281 } 5282 __ Ldr(q20, MemOperand(x17, 8 * 16)); 5283 __ Ldr(q21, MemOperand(x17, 9 * 16)); 5284 __ Ldr(q22, MemOperand(x17, 10 * 16)); 5285 __ Ldr(q23, MemOperand(x17, 11 * 16)); 5286 5287 // Test D stores with post index. 5288 __ Mov(x0, 32); 5289 __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 0, MemOperand(x18, 32, PostIndex)); 5290 __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 1, MemOperand(x18, x0, PostIndex)); 5291 5292 __ Ldr(q24, MemOperand(x17, 12 * 16)); 5293 __ Ldr(q25, MemOperand(x17, 13 * 16)); 5294 __ Ldr(q26, MemOperand(x17, 14 * 16)); 5295 __ Ldr(q27, MemOperand(x17, 15 * 16)); 5296 END(); 5297 5298 RUN(); 5299 5300 ASSERT_EQUAL_128(0x2323130322221202, 0x2121110120201000, q4); 5301 ASSERT_EQUAL_128(0x2727170726261606, 0x2525150524241404, q5); 5302 ASSERT_EQUAL_128(0x2b2b1b0b2a2a1a0a, 0x2929190928281808, q6); 5303 ASSERT_EQUAL_128(0x2f2f1f0f2e2e1e0e, 0x2d2d1d0d2c2c1c0c, q7); 5304 5305 ASSERT_EQUAL_128(0x2223222312130203, 0x2021202110110001, q16); 5306 ASSERT_EQUAL_128(0x2627262716170607, 0x2425242514150405, q17); 5307 ASSERT_EQUAL_128(0x2a2b2a2b1a1b0a0b, 0x2829282918190809, q18); 5308 ASSERT_EQUAL_128(0x2e2f2e2f1e1f0e0f, 0x2c2d2c2d1c1d0c0d, q19); 5309 5310 ASSERT_EQUAL_128(0x2021222320212223, 0x1011121300010203, q20); 5311 ASSERT_EQUAL_128(0x2425262724252627, 0x1415161704050607, q21); 5312 ASSERT_EQUAL_128(0x28292a2b28292a2b, 0x18191a1b08090a0b, q22); 5313 ASSERT_EQUAL_128(0x2c2d2e2f2c2d2e2f, 0x1c1d1e1f0c0d0e0f, q23); 5314 5315 ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q24); 5316 ASSERT_EQUAL_128(0x28292a2b2c2d2e2f, 0x28292a2b2c2d2e2f, q25); 5317 ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q26); 5318 ASSERT_EQUAL_128(0x2021222324252627, 0x2021222324252627, q27); 5319 5320 TEARDOWN(); 5321 } 5322 5323 5324 TEST(neon_ld1_lane_postindex) { 5325 SETUP(); 5326 5327 uint8_t src[64]; 5328 for (unsigned i = 0; i < sizeof(src); i++) { 5329 src[i] = i; 5330 } 5331 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5332 5333 START(); 5334 __ Mov(x17, src_base); 5335 __ Mov(x18, src_base); 5336 __ Mov(x19, src_base); 5337 __ Mov(x20, src_base); 5338 __ Mov(x21, src_base); 5339 __ Mov(x22, src_base); 5340 __ Mov(x23, src_base); 5341 __ Mov(x24, src_base); 5342 5343 // Test loading whole register by element. 5344 for (int i = 15; i >= 0; i--) { 5345 __ Ld1(v0.B(), i, MemOperand(x17, 1, PostIndex)); 5346 } 5347 5348 for (int i = 7; i >= 0; i--) { 5349 __ Ld1(v1.H(), i, MemOperand(x18, 2, PostIndex)); 5350 } 5351 5352 for (int i = 3; i >= 0; i--) { 5353 __ Ld1(v2.S(), i, MemOperand(x19, 4, PostIndex)); 5354 } 5355 5356 for (int i = 1; i >= 0; i--) { 5357 __ Ld1(v3.D(), i, MemOperand(x20, 8, PostIndex)); 5358 } 5359 5360 // Test loading a single element into an initialised register. 5361 __ Mov(x25, 1); 5362 __ Ldr(q4, MemOperand(x21)); 5363 __ Ld1(v4.B(), 4, MemOperand(x21, x25, PostIndex)); 5364 __ Add(x25, x25, 1); 5365 5366 __ Ldr(q5, MemOperand(x22)); 5367 __ Ld1(v5.H(), 3, MemOperand(x22, x25, PostIndex)); 5368 __ Add(x25, x25, 1); 5369 5370 __ Ldr(q6, MemOperand(x23)); 5371 __ Ld1(v6.S(), 2, MemOperand(x23, x25, PostIndex)); 5372 __ Add(x25, x25, 1); 5373 5374 __ Ldr(q7, MemOperand(x24)); 5375 __ Ld1(v7.D(), 1, MemOperand(x24, x25, PostIndex)); 5376 5377 END(); 5378 5379 RUN(); 5380 5381 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0); 5382 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q1); 5383 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q2); 5384 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q3); 5385 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4); 5386 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5); 5387 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6); 5388 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7); 5389 ASSERT_EQUAL_64(src_base + 16, x17); 5390 ASSERT_EQUAL_64(src_base + 16, x18); 5391 ASSERT_EQUAL_64(src_base + 16, x19); 5392 ASSERT_EQUAL_64(src_base + 16, x20); 5393 ASSERT_EQUAL_64(src_base + 1, x21); 5394 ASSERT_EQUAL_64(src_base + 2, x22); 5395 ASSERT_EQUAL_64(src_base + 3, x23); 5396 ASSERT_EQUAL_64(src_base + 4, x24); 5397 5398 TEARDOWN(); 5399 } 5400 5401 5402 TEST(neon_st1_lane_postindex) { 5403 SETUP(); 5404 5405 uint8_t src[64]; 5406 for (unsigned i = 0; i < sizeof(src); i++) { 5407 src[i] = i; 5408 } 5409 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5410 5411 START(); 5412 __ Mov(x17, src_base); 5413 __ Mov(x18, -16); 5414 __ Ldr(q0, MemOperand(x17)); 5415 5416 for (int i = 15; i >= 0; i--) { 5417 __ St1(v0.B(), i, MemOperand(x17, 1, PostIndex)); 5418 } 5419 __ Ldr(q1, MemOperand(x17, x18)); 5420 5421 for (int i = 7; i >= 0; i--) { 5422 __ St1(v0.H(), i, MemOperand(x17, 2, PostIndex)); 5423 } 5424 __ Ldr(q2, MemOperand(x17, x18)); 5425 5426 for (int i = 3; i >= 0; i--) { 5427 __ St1(v0.S(), i, MemOperand(x17, 4, PostIndex)); 5428 } 5429 __ Ldr(q3, MemOperand(x17, x18)); 5430 5431 for (int i = 1; i >= 0; i--) { 5432 __ St1(v0.D(), i, MemOperand(x17, 8, PostIndex)); 5433 } 5434 __ Ldr(q4, MemOperand(x17, x18)); 5435 5436 END(); 5437 5438 RUN(); 5439 5440 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1); 5441 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2); 5442 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3); 5443 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4); 5444 5445 TEARDOWN(); 5446 } 5447 5448 5449 TEST(neon_ld1_alllanes) { 5450 SETUP(); 5451 5452 uint8_t src[64]; 5453 for (unsigned i = 0; i < sizeof(src); i++) { 5454 src[i] = i; 5455 } 5456 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5457 5458 START(); 5459 __ Mov(x17, src_base + 1); 5460 __ Ld1r(v0.V8B(), MemOperand(x17)); 5461 __ Add(x17, x17, 1); 5462 __ Ld1r(v1.V16B(), MemOperand(x17)); 5463 __ Add(x17, x17, 1); 5464 __ Ld1r(v2.V4H(), MemOperand(x17)); 5465 __ Add(x17, x17, 1); 5466 __ Ld1r(v3.V8H(), MemOperand(x17)); 5467 __ Add(x17, x17, 1); 5468 __ Ld1r(v4.V2S(), MemOperand(x17)); 5469 __ Add(x17, x17, 1); 5470 __ Ld1r(v5.V4S(), MemOperand(x17)); 5471 __ Add(x17, x17, 1); 5472 __ Ld1r(v6.V1D(), MemOperand(x17)); 5473 __ Add(x17, x17, 1); 5474 __ Ld1r(v7.V2D(), MemOperand(x17)); 5475 END(); 5476 5477 RUN(); 5478 5479 ASSERT_EQUAL_128(0, 0x0101010101010101, q0); 5480 ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1); 5481 ASSERT_EQUAL_128(0, 0x0403040304030403, q2); 5482 ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3); 5483 ASSERT_EQUAL_128(0, 0x0807060508070605, q4); 5484 ASSERT_EQUAL_128(0x0908070609080706, 0x0908070609080706, q5); 5485 ASSERT_EQUAL_128(0, 0x0e0d0c0b0a090807, q6); 5486 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0f0e0d0c0b0a0908, q7); 5487 5488 TEARDOWN(); 5489 } 5490 5491 5492 TEST(neon_ld1_alllanes_postindex) { 5493 SETUP(); 5494 5495 uint8_t src[64]; 5496 for (unsigned i = 0; i < sizeof(src); i++) { 5497 src[i] = i; 5498 } 5499 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5500 5501 START(); 5502 __ Mov(x17, src_base + 1); 5503 __ Mov(x18, 1); 5504 __ Ld1r(v0.V8B(), MemOperand(x17, 1, PostIndex)); 5505 __ Ld1r(v1.V16B(), MemOperand(x17, x18, PostIndex)); 5506 __ Ld1r(v2.V4H(), MemOperand(x17, x18, PostIndex)); 5507 __ Ld1r(v3.V8H(), MemOperand(x17, 2, PostIndex)); 5508 __ Ld1r(v4.V2S(), MemOperand(x17, x18, PostIndex)); 5509 __ Ld1r(v5.V4S(), MemOperand(x17, 4, PostIndex)); 5510 __ Ld1r(v6.V2D(), MemOperand(x17, 8, PostIndex)); 5511 END(); 5512 5513 RUN(); 5514 5515 ASSERT_EQUAL_128(0, 0x0101010101010101, q0); 5516 ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1); 5517 ASSERT_EQUAL_128(0, 0x0403040304030403, q2); 5518 ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3); 5519 ASSERT_EQUAL_128(0, 0x0908070609080706, q4); 5520 ASSERT_EQUAL_128(0x0a0908070a090807, 0x0a0908070a090807, q5); 5521 ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x1211100f0e0d0c0b, q6); 5522 ASSERT_EQUAL_64(src_base + 19, x17); 5523 5524 TEARDOWN(); 5525 } 5526 5527 5528 TEST(neon_st1_d) { 5529 SETUP(); 5530 5531 uint8_t src[14 * kDRegSizeInBytes]; 5532 for (unsigned i = 0; i < sizeof(src); i++) { 5533 src[i] = i; 5534 } 5535 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5536 5537 START(); 5538 __ Mov(x17, src_base); 5539 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5540 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5541 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 5542 __ Ldr(q3, MemOperand(x17, 16, PostIndex)); 5543 __ Mov(x17, src_base); 5544 5545 __ St1(v0.V8B(), MemOperand(x17)); 5546 __ Ldr(d16, MemOperand(x17, 8, PostIndex)); 5547 5548 __ St1(v0.V8B(), v1.V8B(), MemOperand(x17)); 5549 __ Ldr(q17, MemOperand(x17, 16, PostIndex)); 5550 5551 __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17)); 5552 __ Ldr(d18, MemOperand(x17, 8, PostIndex)); 5553 __ Ldr(d19, MemOperand(x17, 8, PostIndex)); 5554 __ Ldr(d20, MemOperand(x17, 8, PostIndex)); 5555 5556 __ St1(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x17)); 5557 __ Ldr(q21, MemOperand(x17, 16, PostIndex)); 5558 __ Ldr(q22, MemOperand(x17, 16, PostIndex)); 5559 5560 __ St1(v0.V1D(), v1.V1D(), v2.V1D(), v3.V1D(), MemOperand(x17)); 5561 __ Ldr(q23, MemOperand(x17, 16, PostIndex)); 5562 __ Ldr(q24, MemOperand(x17)); 5563 END(); 5564 5565 RUN(); 5566 5567 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q0); 5568 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q1); 5569 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q2); 5570 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q3); 5571 ASSERT_EQUAL_128(0, 0x0706050403020100, q16); 5572 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17); 5573 ASSERT_EQUAL_128(0, 0x0706050403020100, q18); 5574 ASSERT_EQUAL_128(0, 0x1716151413121110, q19); 5575 ASSERT_EQUAL_128(0, 0x2726252423222120, q20); 5576 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21); 5577 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22); 5578 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23); 5579 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24); 5580 5581 TEARDOWN(); 5582 } 5583 5584 5585 TEST(neon_st1_d_postindex) { 5586 SETUP(); 5587 5588 uint8_t src[64 + 14 * kDRegSizeInBytes]; 5589 for (unsigned i = 0; i < sizeof(src); i++) { 5590 src[i] = i; 5591 } 5592 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5593 5594 START(); 5595 __ Mov(x17, src_base); 5596 __ Mov(x18, -8); 5597 __ Mov(x19, -16); 5598 __ Mov(x20, -24); 5599 __ Mov(x21, -32); 5600 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5601 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5602 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 5603 __ Ldr(q3, MemOperand(x17, 16, PostIndex)); 5604 __ Mov(x17, src_base); 5605 5606 __ St1(v0.V8B(), MemOperand(x17, 8, PostIndex)); 5607 __ Ldr(d16, MemOperand(x17, x18)); 5608 5609 __ St1(v0.V8B(), v1.V8B(), MemOperand(x17, 16, PostIndex)); 5610 __ Ldr(q17, MemOperand(x17, x19)); 5611 5612 __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17, 24, PostIndex)); 5613 __ Ldr(d18, MemOperand(x17, x20)); 5614 __ Ldr(d19, MemOperand(x17, x19)); 5615 __ Ldr(d20, MemOperand(x17, x18)); 5616 5617 __ St1(v0.V2S(), 5618 v1.V2S(), 5619 v2.V2S(), 5620 v3.V2S(), 5621 MemOperand(x17, 32, PostIndex)); 5622 __ Ldr(q21, MemOperand(x17, x21)); 5623 __ Ldr(q22, MemOperand(x17, x19)); 5624 5625 __ St1(v0.V1D(), 5626 v1.V1D(), 5627 v2.V1D(), 5628 v3.V1D(), 5629 MemOperand(x17, 32, PostIndex)); 5630 __ Ldr(q23, MemOperand(x17, x21)); 5631 __ Ldr(q24, MemOperand(x17, x19)); 5632 END(); 5633 5634 RUN(); 5635 5636 ASSERT_EQUAL_128(0, 0x0706050403020100, q16); 5637 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17); 5638 ASSERT_EQUAL_128(0, 0x0706050403020100, q18); 5639 ASSERT_EQUAL_128(0, 0x1716151413121110, q19); 5640 ASSERT_EQUAL_128(0, 0x2726252423222120, q20); 5641 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21); 5642 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22); 5643 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23); 5644 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24); 5645 5646 TEARDOWN(); 5647 } 5648 5649 5650 TEST(neon_st1_q) { 5651 SETUP(); 5652 5653 uint8_t src[64 + 160]; 5654 for (unsigned i = 0; i < sizeof(src); i++) { 5655 src[i] = i; 5656 } 5657 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5658 5659 START(); 5660 __ Mov(x17, src_base); 5661 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5662 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5663 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 5664 __ Ldr(q3, MemOperand(x17, 16, PostIndex)); 5665 5666 __ St1(v0.V16B(), MemOperand(x17)); 5667 __ Ldr(q16, MemOperand(x17, 16, PostIndex)); 5668 5669 __ St1(v0.V8H(), v1.V8H(), MemOperand(x17)); 5670 __ Ldr(q17, MemOperand(x17, 16, PostIndex)); 5671 __ Ldr(q18, MemOperand(x17, 16, PostIndex)); 5672 5673 __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17)); 5674 __ Ldr(q19, MemOperand(x17, 16, PostIndex)); 5675 __ Ldr(q20, MemOperand(x17, 16, PostIndex)); 5676 __ Ldr(q21, MemOperand(x17, 16, PostIndex)); 5677 5678 __ St1(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x17)); 5679 __ Ldr(q22, MemOperand(x17, 16, PostIndex)); 5680 __ Ldr(q23, MemOperand(x17, 16, PostIndex)); 5681 __ Ldr(q24, MemOperand(x17, 16, PostIndex)); 5682 __ Ldr(q25, MemOperand(x17)); 5683 END(); 5684 5685 RUN(); 5686 5687 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16); 5688 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17); 5689 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18); 5690 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19); 5691 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20); 5692 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21); 5693 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22); 5694 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23); 5695 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24); 5696 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25); 5697 5698 TEARDOWN(); 5699 } 5700 5701 5702 TEST(neon_st1_q_postindex) { 5703 SETUP(); 5704 5705 uint8_t src[64 + 160]; 5706 for (unsigned i = 0; i < sizeof(src); i++) { 5707 src[i] = i; 5708 } 5709 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5710 5711 START(); 5712 __ Mov(x17, src_base); 5713 __ Mov(x18, -16); 5714 __ Mov(x19, -32); 5715 __ Mov(x20, -48); 5716 __ Mov(x21, -64); 5717 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5718 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5719 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 5720 __ Ldr(q3, MemOperand(x17, 16, PostIndex)); 5721 5722 __ St1(v0.V16B(), MemOperand(x17, 16, PostIndex)); 5723 __ Ldr(q16, MemOperand(x17, x18)); 5724 5725 __ St1(v0.V8H(), v1.V8H(), MemOperand(x17, 32, PostIndex)); 5726 __ Ldr(q17, MemOperand(x17, x19)); 5727 __ Ldr(q18, MemOperand(x17, x18)); 5728 5729 __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17, 48, PostIndex)); 5730 __ Ldr(q19, MemOperand(x17, x20)); 5731 __ Ldr(q20, MemOperand(x17, x19)); 5732 __ Ldr(q21, MemOperand(x17, x18)); 5733 5734 __ St1(v0.V2D(), 5735 v1.V2D(), 5736 v2.V2D(), 5737 v3.V2D(), 5738 MemOperand(x17, 64, PostIndex)); 5739 __ Ldr(q22, MemOperand(x17, x21)); 5740 __ Ldr(q23, MemOperand(x17, x20)); 5741 __ Ldr(q24, MemOperand(x17, x19)); 5742 __ Ldr(q25, MemOperand(x17, x18)); 5743 5744 END(); 5745 5746 RUN(); 5747 5748 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16); 5749 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17); 5750 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18); 5751 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19); 5752 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20); 5753 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21); 5754 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22); 5755 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23); 5756 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24); 5757 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25); 5758 5759 TEARDOWN(); 5760 } 5761 5762 5763 TEST(neon_st2_d) { 5764 SETUP(); 5765 5766 uint8_t src[4 * 16]; 5767 for (unsigned i = 0; i < sizeof(src); i++) { 5768 src[i] = i; 5769 } 5770 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5771 5772 START(); 5773 __ Mov(x17, src_base); 5774 __ Mov(x18, src_base); 5775 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5776 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5777 5778 __ St2(v0.V8B(), v1.V8B(), MemOperand(x18)); 5779 __ Add(x18, x18, 22); 5780 __ St2(v0.V4H(), v1.V4H(), MemOperand(x18)); 5781 __ Add(x18, x18, 11); 5782 __ St2(v0.V2S(), v1.V2S(), MemOperand(x18)); 5783 5784 __ Mov(x19, src_base); 5785 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 5786 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 5787 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 5788 __ Ldr(q3, MemOperand(x19, 16, PostIndex)); 5789 5790 END(); 5791 5792 RUN(); 5793 5794 ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q0); 5795 ASSERT_EQUAL_128(0x0504131203021110, 0x0100151413121110, q1); 5796 ASSERT_EQUAL_128(0x1615140706050413, 0x1211100302010014, q2); 5797 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323117, q3); 5798 5799 TEARDOWN(); 5800 } 5801 5802 5803 TEST(neon_st2_d_postindex) { 5804 SETUP(); 5805 5806 uint8_t src[4 * 16]; 5807 for (unsigned i = 0; i < sizeof(src); i++) { 5808 src[i] = i; 5809 } 5810 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5811 5812 START(); 5813 __ Mov(x22, 5); 5814 __ Mov(x17, src_base); 5815 __ Mov(x18, src_base); 5816 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5817 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5818 5819 __ St2(v0.V8B(), v1.V8B(), MemOperand(x18, x22, PostIndex)); 5820 __ St2(v0.V4H(), v1.V4H(), MemOperand(x18, 16, PostIndex)); 5821 __ St2(v0.V2S(), v1.V2S(), MemOperand(x18)); 5822 5823 5824 __ Mov(x19, src_base); 5825 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 5826 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 5827 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 5828 5829 END(); 5830 5831 RUN(); 5832 5833 ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0); 5834 ASSERT_EQUAL_128(0x0605041312111003, 0x0201001716070615, q1); 5835 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726251716151407, q2); 5836 5837 TEARDOWN(); 5838 } 5839 5840 5841 TEST(neon_st2_q) { 5842 SETUP(); 5843 5844 uint8_t src[5 * 16]; 5845 for (unsigned i = 0; i < sizeof(src); i++) { 5846 src[i] = i; 5847 } 5848 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5849 5850 START(); 5851 __ Mov(x17, src_base); 5852 __ Mov(x18, src_base); 5853 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5854 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5855 5856 __ St2(v0.V16B(), v1.V16B(), MemOperand(x18)); 5857 __ Add(x18, x18, 8); 5858 __ St2(v0.V8H(), v1.V8H(), MemOperand(x18)); 5859 __ Add(x18, x18, 22); 5860 __ St2(v0.V4S(), v1.V4S(), MemOperand(x18)); 5861 __ Add(x18, x18, 2); 5862 __ St2(v0.V2D(), v1.V2D(), MemOperand(x18)); 5863 5864 __ Mov(x19, src_base); 5865 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 5866 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 5867 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 5868 __ Ldr(q3, MemOperand(x19, 16, PostIndex)); 5869 5870 END(); 5871 5872 RUN(); 5873 5874 ASSERT_EQUAL_128(0x1312030211100100, 0x1303120211011000, q0); 5875 ASSERT_EQUAL_128(0x01000b0a19180908, 0x1716070615140504, q1); 5876 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q2); 5877 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0f0e0d0c0b0a0908, q3); 5878 TEARDOWN(); 5879 } 5880 5881 5882 TEST(neon_st2_q_postindex) { 5883 SETUP(); 5884 5885 uint8_t src[5 * 16]; 5886 for (unsigned i = 0; i < sizeof(src); i++) { 5887 src[i] = i; 5888 } 5889 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5890 5891 START(); 5892 __ Mov(x22, 5); 5893 __ Mov(x17, src_base); 5894 __ Mov(x18, src_base); 5895 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5896 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5897 5898 __ St2(v0.V16B(), v1.V16B(), MemOperand(x18, x22, PostIndex)); 5899 __ St2(v0.V8H(), v1.V8H(), MemOperand(x18, 32, PostIndex)); 5900 __ St2(v0.V4S(), v1.V4S(), MemOperand(x18, x22, PostIndex)); 5901 __ St2(v0.V2D(), v1.V2D(), MemOperand(x18)); 5902 5903 __ Mov(x19, src_base); 5904 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 5905 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 5906 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 5907 __ Ldr(q3, MemOperand(x19, 16, PostIndex)); 5908 __ Ldr(q4, MemOperand(x19, 16, PostIndex)); 5909 5910 END(); 5911 5912 RUN(); 5913 5914 ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0); 5915 ASSERT_EQUAL_128(0x1c0d0c1b1a0b0a19, 0x1809081716070615, q1); 5916 ASSERT_EQUAL_128(0x0504030201001003, 0x0201001f1e0f0e1d, q2); 5917 ASSERT_EQUAL_128(0x0d0c0b0a09081716, 0x1514131211100706, q3); 5918 ASSERT_EQUAL_128(0x4f4e4d4c4b4a1f1e, 0x1d1c1b1a19180f0e, q4); 5919 5920 TEARDOWN(); 5921 } 5922 5923 5924 TEST(neon_st3_d) { 5925 SETUP(); 5926 5927 uint8_t src[3 * 16]; 5928 for (unsigned i = 0; i < sizeof(src); i++) { 5929 src[i] = i; 5930 } 5931 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5932 5933 START(); 5934 __ Mov(x17, src_base); 5935 __ Mov(x18, src_base); 5936 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5937 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5938 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 5939 5940 __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18)); 5941 __ Add(x18, x18, 3); 5942 __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18)); 5943 __ Add(x18, x18, 2); 5944 __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18)); 5945 5946 5947 __ Mov(x19, src_base); 5948 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 5949 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 5950 5951 END(); 5952 5953 RUN(); 5954 5955 ASSERT_EQUAL_128(0x2221201312111003, 0x0201000100201000, q0); 5956 ASSERT_EQUAL_128(0x1f1e1d2726252417, 0x1615140706050423, q1); 5957 5958 TEARDOWN(); 5959 } 5960 5961 5962 TEST(neon_st3_d_postindex) { 5963 SETUP(); 5964 5965 uint8_t src[4 * 16]; 5966 for (unsigned i = 0; i < sizeof(src); i++) { 5967 src[i] = i; 5968 } 5969 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5970 5971 START(); 5972 __ Mov(x22, 5); 5973 __ Mov(x17, src_base); 5974 __ Mov(x18, src_base); 5975 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5976 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5977 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 5978 5979 __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18, x22, PostIndex)); 5980 __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18, 24, PostIndex)); 5981 __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18)); 5982 5983 5984 __ Mov(x19, src_base); 5985 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 5986 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 5987 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 5988 __ Ldr(q3, MemOperand(x19, 16, PostIndex)); 5989 5990 END(); 5991 5992 RUN(); 5993 5994 ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0); 5995 ASSERT_EQUAL_128(0x0201002726171607, 0x0625241514050423, q1); 5996 ASSERT_EQUAL_128(0x1615140706050423, 0x2221201312111003, q2); 5997 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736352726252417, q3); 5998 5999 TEARDOWN(); 6000 } 6001 6002 6003 TEST(neon_st3_q) { 6004 SETUP(); 6005 6006 uint8_t src[6 * 16]; 6007 for (unsigned i = 0; i < sizeof(src); i++) { 6008 src[i] = i; 6009 } 6010 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6011 6012 START(); 6013 __ Mov(x17, src_base); 6014 __ Mov(x18, src_base); 6015 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 6016 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 6017 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 6018 6019 __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18)); 6020 __ Add(x18, x18, 5); 6021 __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18)); 6022 __ Add(x18, x18, 12); 6023 __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18)); 6024 __ Add(x18, x18, 22); 6025 __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18)); 6026 6027 __ Mov(x19, src_base); 6028 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 6029 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 6030 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 6031 __ Ldr(q3, MemOperand(x19, 16, PostIndex)); 6032 __ Ldr(q4, MemOperand(x19, 16, PostIndex)); 6033 __ Ldr(q5, MemOperand(x19, 16, PostIndex)); 6034 6035 END(); 6036 6037 RUN(); 6038 6039 ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0); 6040 ASSERT_EQUAL_128(0x0605042322212013, 0x1211100302010023, q1); 6041 ASSERT_EQUAL_128(0x1007060504030201, 0x0025241716151407, q2); 6042 ASSERT_EQUAL_128(0x0827262524232221, 0x2017161514131211, q3); 6043 ASSERT_EQUAL_128(0x281f1e1d1c1b1a19, 0x180f0e0d0c0b0a09, q4); 6044 ASSERT_EQUAL_128(0x5f5e5d5c5b5a5958, 0x572f2e2d2c2b2a29, q5); 6045 6046 TEARDOWN(); 6047 } 6048 6049 6050 TEST(neon_st3_q_postindex) { 6051 SETUP(); 6052 6053 uint8_t src[7 * 16]; 6054 for (unsigned i = 0; i < sizeof(src); i++) { 6055 src[i] = i; 6056 } 6057 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6058 6059 START(); 6060 __ Mov(x22, 5); 6061 __ Mov(x17, src_base); 6062 __ Mov(x18, src_base); 6063 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 6064 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 6065 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 6066 6067 __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18, x22, PostIndex)); 6068 __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18, 48, PostIndex)); 6069 __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18, x22, PostIndex)); 6070 __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18)); 6071 6072 __ Mov(x19, src_base); 6073 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 6074 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 6075 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 6076 __ Ldr(q3, MemOperand(x19, 16, PostIndex)); 6077 __ Ldr(q4, MemOperand(x19, 16, PostIndex)); 6078 __ Ldr(q5, MemOperand(x19, 16, PostIndex)); 6079 __ Ldr(q6, MemOperand(x19, 16, PostIndex)); 6080 6081 END(); 6082 6083 RUN(); 6084 6085 ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0); 6086 ASSERT_EQUAL_128(0x1809082726171607, 0x0625241514050423, q1); 6087 ASSERT_EQUAL_128(0x0e2d2c1d1c0d0c2b, 0x2a1b1a0b0a292819, q2); 6088 ASSERT_EQUAL_128(0x0504030201001003, 0x0201002f2e1f1e0f, q3); 6089 ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q4); 6090 ASSERT_EQUAL_128(0x1d1c1b1a19180f0e, 0x0d0c0b0a09082726, q5); 6091 ASSERT_EQUAL_128(0x6f6e6d6c6b6a2f2e, 0x2d2c2b2a29281f1e, q6); 6092 6093 TEARDOWN(); 6094 } 6095 6096 6097 TEST(neon_st4_d) { 6098 SETUP(); 6099 6100 uint8_t src[4 * 16]; 6101 for (unsigned i = 0; i < sizeof(src); i++) { 6102 src[i] = i; 6103 } 6104 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6105 6106 START(); 6107 __ Mov(x17, src_base); 6108 __ Mov(x18, src_base); 6109 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 6110 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 6111 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 6112 __ Ldr(q3, MemOperand(x17, 16, PostIndex)); 6113 6114 __ St4(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x18)); 6115 __ Add(x18, x18, 12); 6116 __ St4(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(), MemOperand(x18)); 6117 __ Add(x18, x18, 15); 6118 __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18)); 6119 6120 6121 __ Mov(x19, src_base); 6122 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 6123 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 6124 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 6125 __ Ldr(q3, MemOperand(x19, 16, PostIndex)); 6126 6127 END(); 6128 6129 RUN(); 6130 6131 ASSERT_EQUAL_128(0x1110010032221202, 0X3121110130201000, q0); 6132 ASSERT_EQUAL_128(0x1003020100322322, 0X1312030231302120, q1); 6133 ASSERT_EQUAL_128(0x1407060504333231, 0X3023222120131211, q2); 6134 ASSERT_EQUAL_128(0x3f3e3d3c3b373635, 0x3427262524171615, q3); 6135 6136 TEARDOWN(); 6137 } 6138 6139 6140 TEST(neon_st4_d_postindex) { 6141 SETUP(); 6142 6143 uint8_t src[5 * 16]; 6144 for (unsigned i = 0; i < sizeof(src); i++) { 6145 src[i] = i; 6146 } 6147 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6148 6149 START(); 6150 __ Mov(x22, 5); 6151 __ Mov(x17, src_base); 6152 __ Mov(x18, src_base); 6153 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 6154 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 6155 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 6156 __ Ldr(q3, MemOperand(x17, 16, PostIndex)); 6157 6158 __ St4(v0.V8B(), 6159 v1.V8B(), 6160 v2.V8B(), 6161 v3.V8B(), 6162 MemOperand(x18, x22, PostIndex)); 6163 __ St4(v0.V4H(), 6164 v1.V4H(), 6165 v2.V4H(), 6166 v3.V4H(), 6167 MemOperand(x18, 32, PostIndex)); 6168 __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18)); 6169 6170 6171 __ Mov(x19, src_base); 6172 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 6173 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 6174 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 6175 __ Ldr(q3, MemOperand(x19, 16, PostIndex)); 6176 __ Ldr(q4, MemOperand(x19, 16, PostIndex)); 6177 6178 END(); 6179 6180 RUN(); 6181 6182 ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0); 6183 ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1); 6184 ASSERT_EQUAL_128(0x2221201312111003, 0x0201003736272617, q2); 6185 ASSERT_EQUAL_128(0x2625241716151407, 0x0605043332313023, q3); 6186 ASSERT_EQUAL_128(0x4f4e4d4c4b4a4948, 0x4746453736353427, q4); 6187 6188 TEARDOWN(); 6189 } 6190 6191 6192 TEST(neon_st4_q) { 6193 SETUP(); 6194 6195 uint8_t src[7 * 16]; 6196 for (unsigned i = 0; i < sizeof(src); i++) { 6197 src[i] = i; 6198 } 6199 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6200 6201 START(); 6202 __ Mov(x17, src_base); 6203 __ Mov(x18, src_base); 6204 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 6205 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 6206 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 6207 __ Ldr(q3, MemOperand(x17, 16, PostIndex)); 6208 6209 __ St4(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), MemOperand(x18)); 6210 __ Add(x18, x18, 5); 6211 __ St4(v0.V8H(), v1.V8H(), v2.V8H(), v3.V8H(), MemOperand(x18)); 6212 __ Add(x18, x18, 12); 6213 __ St4(v0.V4S(), v1.V4S(), v2.V4S(), v3.V4S(), MemOperand(x18)); 6214 __ Add(x18, x18, 22); 6215 __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18)); 6216 __ Add(x18, x18, 10); 6217 6218 __ Mov(x19, src_base); 6219 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 6220 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 6221 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 6222 __ Ldr(q3, MemOperand(x19, 16, PostIndex)); 6223 __ Ldr(q4, MemOperand(x19, 16, PostIndex)); 6224 __ Ldr(q5, MemOperand(x19, 16, PostIndex)); 6225 __ Ldr(q6, MemOperand(x19, 16, PostIndex)); 6226 6227 END(); 6228 6229 RUN(); 6230 6231 ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0); 6232 ASSERT_EQUAL_128(0x3231302322212013, 0x1211100302010013, q1); 6233 ASSERT_EQUAL_128(0x1007060504030201, 0x0015140706050433, q2); 6234 ASSERT_EQUAL_128(0x3027262524232221, 0x2017161514131211, q3); 6235 ASSERT_EQUAL_128(0x180f0e0d0c0b0a09, 0x0837363534333231, q4); 6236 ASSERT_EQUAL_128(0x382f2e2d2c2b2a29, 0x281f1e1d1c1b1a19, q5); 6237 ASSERT_EQUAL_128(0x6f6e6d6c6b6a6968, 0x673f3e3d3c3b3a39, q6); 6238 6239 TEARDOWN(); 6240 } 6241 6242 6243 TEST(neon_st4_q_postindex) { 6244 SETUP(); 6245 6246 uint8_t src[9 * 16]; 6247 for (unsigned i = 0; i < sizeof(src); i++) { 6248 src[i] = i; 6249 } 6250 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6251 6252 START(); 6253 __ Mov(x22, 5); 6254 __ Mov(x17, src_base); 6255 __ Mov(x18, src_base); 6256 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 6257 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 6258 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 6259 __ Ldr(q3, MemOperand(x17, 16, PostIndex)); 6260 6261 __ St4(v0.V16B(), 6262 v1.V16B(), 6263 v2.V16B(), 6264 v3.V16B(), 6265 MemOperand(x18, x22, PostIndex)); 6266 __ St4(v0.V8H(), 6267 v1.V8H(), 6268 v2.V8H(), 6269 v3.V8H(), 6270 MemOperand(x18, 64, PostIndex)); 6271 __ St4(v0.V4S(), 6272 v1.V4S(), 6273 v2.V4S(), 6274 v3.V4S(), 6275 MemOperand(x18, x22, PostIndex)); 6276 __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18)); 6277 6278 __ Mov(x19, src_base); 6279 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 6280 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 6281 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 6282 __ Ldr(q3, MemOperand(x19, 16, PostIndex)); 6283 __ Ldr(q4, MemOperand(x19, 16, PostIndex)); 6284 __ Ldr(q5, MemOperand(x19, 16, PostIndex)); 6285 __ Ldr(q6, MemOperand(x19, 16, PostIndex)); 6286 __ Ldr(q7, MemOperand(x19, 16, PostIndex)); 6287 __ Ldr(q8, MemOperand(x19, 16, PostIndex)); 6288 6289 END(); 6290 6291 RUN(); 6292 6293 ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0); 6294 ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1); 6295 ASSERT_EQUAL_128(0x1a0b0a3938292819, 0x1809083736272617, q2); 6296 ASSERT_EQUAL_128(0x1e0f0e3d3c2d2c1d, 0x1c0d0c3b3a2b2a1b, q3); 6297 ASSERT_EQUAL_128(0x0504030201001003, 0x0201003f3e2f2e1f, q4); 6298 ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q5); 6299 ASSERT_EQUAL_128(0x0d0c0b0a09083736, 0x3534333231302726, q6); 6300 ASSERT_EQUAL_128(0x2d2c2b2a29281f1e, 0x1d1c1b1a19180f0e, q7); 6301 ASSERT_EQUAL_128(0x8f8e8d8c8b8a3f3e, 0x3d3c3b3a39382f2e, q8); 6302 6303 TEARDOWN(); 6304 } 6305 6306 6307 TEST(neon_destructive_minmaxp) { 6308 SETUP(); 6309 6310 START(); 6311 __ Movi(v0.V2D(), 0, 0x2222222233333333); 6312 __ Movi(v1.V2D(), 0, 0x0000000011111111); 6313 6314 __ Sminp(v16.V2S(), v0.V2S(), v1.V2S()); 6315 __ Mov(v17, v0); 6316 __ Sminp(v17.V2S(), v17.V2S(), v1.V2S()); 6317 __ Mov(v18, v1); 6318 __ Sminp(v18.V2S(), v0.V2S(), v18.V2S()); 6319 __ Mov(v19, v0); 6320 __ Sminp(v19.V2S(), v19.V2S(), v19.V2S()); 6321 6322 __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S()); 6323 __ Mov(v21, v0); 6324 __ Smaxp(v21.V2S(), v21.V2S(), v1.V2S()); 6325 __ Mov(v22, v1); 6326 __ Smaxp(v22.V2S(), v0.V2S(), v22.V2S()); 6327 __ Mov(v23, v0); 6328 __ Smaxp(v23.V2S(), v23.V2S(), v23.V2S()); 6329 6330 __ Uminp(v24.V2S(), v0.V2S(), v1.V2S()); 6331 __ Mov(v25, v0); 6332 __ Uminp(v25.V2S(), v25.V2S(), v1.V2S()); 6333 __ Mov(v26, v1); 6334 __ Uminp(v26.V2S(), v0.V2S(), v26.V2S()); 6335 __ Mov(v27, v0); 6336 __ Uminp(v27.V2S(), v27.V2S(), v27.V2S()); 6337 6338 __ Umaxp(v28.V2S(), v0.V2S(), v1.V2S()); 6339 __ Mov(v29, v0); 6340 __ Umaxp(v29.V2S(), v29.V2S(), v1.V2S()); 6341 __ Mov(v30, v1); 6342 __ Umaxp(v30.V2S(), v0.V2S(), v30.V2S()); 6343 __ Mov(v31, v0); 6344 __ Umaxp(v31.V2S(), v31.V2S(), v31.V2S()); 6345 END(); 6346 6347 RUN(); 6348 6349 ASSERT_EQUAL_128(0, 0x0000000022222222, q16); 6350 ASSERT_EQUAL_128(0, 0x0000000022222222, q17); 6351 ASSERT_EQUAL_128(0, 0x0000000022222222, q18); 6352 ASSERT_EQUAL_128(0, 0x2222222222222222, q19); 6353 6354 ASSERT_EQUAL_128(0, 0x1111111133333333, q20); 6355 ASSERT_EQUAL_128(0, 0x1111111133333333, q21); 6356 ASSERT_EQUAL_128(0, 0x1111111133333333, q22); 6357 ASSERT_EQUAL_128(0, 0x3333333333333333, q23); 6358 6359 ASSERT_EQUAL_128(0, 0x0000000022222222, q24); 6360 ASSERT_EQUAL_128(0, 0x0000000022222222, q25); 6361 ASSERT_EQUAL_128(0, 0x0000000022222222, q26); 6362 ASSERT_EQUAL_128(0, 0x2222222222222222, q27); 6363 6364 ASSERT_EQUAL_128(0, 0x1111111133333333, q28); 6365 ASSERT_EQUAL_128(0, 0x1111111133333333, q29); 6366 ASSERT_EQUAL_128(0, 0x1111111133333333, q30); 6367 ASSERT_EQUAL_128(0, 0x3333333333333333, q31); 6368 6369 TEARDOWN(); 6370 } 6371 6372 6373 TEST(neon_destructive_tbl) { 6374 SETUP(); 6375 6376 START(); 6377 __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f); 6378 __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0); 6379 __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0); 6380 __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0); 6381 __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0); 6382 6383 __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555); 6384 __ Tbl(v16.V16B(), v1.V16B(), v0.V16B()); 6385 __ Mov(v17, v0); 6386 __ Tbl(v17.V16B(), v1.V16B(), v17.V16B()); 6387 __ Mov(v18, v1); 6388 __ Tbl(v18.V16B(), v18.V16B(), v0.V16B()); 6389 __ Mov(v19, v0); 6390 __ Tbl(v19.V16B(), v19.V16B(), v19.V16B()); 6391 6392 __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555); 6393 __ Tbl(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B()); 6394 __ Mov(v21, v0); 6395 __ Tbl(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B()); 6396 __ Mov(v22, v1); 6397 __ Mov(v23, v2); 6398 __ Mov(v24, v3); 6399 __ Mov(v25, v4); 6400 __ Tbl(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B()); 6401 __ Mov(v26, v0); 6402 __ Mov(v27, v1); 6403 __ Mov(v28, v2); 6404 __ Mov(v29, v3); 6405 __ Tbl(v26.V16B(), 6406 v26.V16B(), 6407 v27.V16B(), 6408 v28.V16B(), 6409 v29.V16B(), 6410 v26.V16B()); 6411 END(); 6412 6413 RUN(); 6414 6415 ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q16); 6416 ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q17); 6417 ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q18); 6418 ASSERT_EQUAL_128(0x0f00000000000000, 0x0000000000424100, q19); 6419 6420 ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q20); 6421 ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q21); 6422 ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q22); 6423 ASSERT_EQUAL_128(0x0f000000c4c5c6b7, 0xb8b9aaabac424100, q26); 6424 6425 TEARDOWN(); 6426 } 6427 6428 6429 TEST(neon_destructive_tbx) { 6430 SETUP(); 6431 6432 START(); 6433 __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f); 6434 __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0); 6435 __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0); 6436 __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0); 6437 __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0); 6438 6439 __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555); 6440 __ Tbx(v16.V16B(), v1.V16B(), v0.V16B()); 6441 __ Mov(v17, v0); 6442 __ Tbx(v17.V16B(), v1.V16B(), v17.V16B()); 6443 __ Mov(v18, v1); 6444 __ Tbx(v18.V16B(), v18.V16B(), v0.V16B()); 6445 __ Mov(v19, v0); 6446 __ Tbx(v19.V16B(), v19.V16B(), v19.V16B()); 6447 6448 __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555); 6449 __ Tbx(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B()); 6450 __ Mov(v21, v0); 6451 __ Tbx(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B()); 6452 __ Mov(v22, v1); 6453 __ Mov(v23, v2); 6454 __ Mov(v24, v3); 6455 __ Mov(v25, v4); 6456 __ Tbx(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B()); 6457 __ Mov(v26, v0); 6458 __ Mov(v27, v1); 6459 __ Mov(v28, v2); 6460 __ Mov(v29, v3); 6461 __ Tbx(v26.V16B(), 6462 v26.V16B(), 6463 v27.V16B(), 6464 v28.V16B(), 6465 v29.V16B(), 6466 v26.V16B()); 6467 END(); 6468 6469 RUN(); 6470 6471 ASSERT_EQUAL_128(0xa055555555555555, 0x5555555555adaeaf, q16); 6472 ASSERT_EQUAL_128(0xa041424334353627, 0x28291a1b1cadaeaf, q17); 6473 ASSERT_EQUAL_128(0xa0aeadacabaaa9a8, 0xa7a6a5a4a3adaeaf, q18); 6474 ASSERT_EQUAL_128(0x0f41424334353627, 0x28291a1b1c424100, q19); 6475 6476 ASSERT_EQUAL_128(0xa0555555d4d5d6c7, 0xc8c9babbbcadaeaf, q20); 6477 ASSERT_EQUAL_128(0xa0414243d4d5d6c7, 0xc8c9babbbcadaeaf, q21); 6478 ASSERT_EQUAL_128(0xa0aeadacd4d5d6c7, 0xc8c9babbbcadaeaf, q22); 6479 ASSERT_EQUAL_128(0x0f414243c4c5c6b7, 0xb8b9aaabac424100, q26); 6480 6481 TEARDOWN(); 6482 } 6483 6484 6485 TEST(neon_destructive_fcvtl) { 6486 SETUP(); 6487 6488 START(); 6489 __ Movi(v0.V2D(), 0x400000003f800000, 0xbf800000c0000000); 6490 __ Fcvtl(v16.V2D(), v0.V2S()); 6491 __ Fcvtl2(v17.V2D(), v0.V4S()); 6492 __ Mov(v18, v0); 6493 __ Mov(v19, v0); 6494 __ Fcvtl(v18.V2D(), v18.V2S()); 6495 __ Fcvtl2(v19.V2D(), v19.V4S()); 6496 6497 __ Movi(v1.V2D(), 0x40003c003c004000, 0xc000bc00bc00c000); 6498 __ Fcvtl(v20.V4S(), v1.V4H()); 6499 __ Fcvtl2(v21.V4S(), v1.V8H()); 6500 __ Mov(v22, v1); 6501 __ Mov(v23, v1); 6502 __ Fcvtl(v22.V4S(), v22.V4H()); 6503 __ Fcvtl2(v23.V4S(), v23.V8H()); 6504 6505 END(); 6506 6507 RUN(); 6508 6509 ASSERT_EQUAL_128(0xbff0000000000000, 0xc000000000000000, q16); 6510 ASSERT_EQUAL_128(0x4000000000000000, 0x3ff0000000000000, q17); 6511 ASSERT_EQUAL_128(0xbff0000000000000, 0xc000000000000000, q18); 6512 ASSERT_EQUAL_128(0x4000000000000000, 0x3ff0000000000000, q19); 6513 6514 ASSERT_EQUAL_128(0xc0000000bf800000, 0xbf800000c0000000, q20); 6515 ASSERT_EQUAL_128(0x400000003f800000, 0x3f80000040000000, q21); 6516 ASSERT_EQUAL_128(0xc0000000bf800000, 0xbf800000c0000000, q22); 6517 ASSERT_EQUAL_128(0x400000003f800000, 0x3f80000040000000, q23); 6518 6519 TEARDOWN(); 6520 } 6521 6522 6523 TEST(ldp_stp_float) { 6524 SETUP(); 6525 6526 float src[2] = {1.0, 2.0}; 6527 float dst[3] = {0.0, 0.0, 0.0}; 6528 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6529 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6530 6531 START(); 6532 __ Mov(x16, src_base); 6533 __ Mov(x17, dst_base); 6534 __ Ldp(s31, s0, MemOperand(x16, 2 * sizeof(src[0]), PostIndex)); 6535 __ Stp(s0, s31, MemOperand(x17, sizeof(dst[1]), PreIndex)); 6536 END(); 6537 6538 RUN(); 6539 6540 ASSERT_EQUAL_FP32(1.0, s31); 6541 ASSERT_EQUAL_FP32(2.0, s0); 6542 ASSERT_EQUAL_FP32(0.0, dst[0]); 6543 ASSERT_EQUAL_FP32(2.0, dst[1]); 6544 ASSERT_EQUAL_FP32(1.0, dst[2]); 6545 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x16); 6546 ASSERT_EQUAL_64(dst_base + sizeof(dst[1]), x17); 6547 6548 TEARDOWN(); 6549 } 6550 6551 6552 TEST(ldp_stp_double) { 6553 SETUP(); 6554 6555 double src[2] = {1.0, 2.0}; 6556 double dst[3] = {0.0, 0.0, 0.0}; 6557 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6558 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6559 6560 START(); 6561 __ Mov(x16, src_base); 6562 __ Mov(x17, dst_base); 6563 __ Ldp(d31, d0, MemOperand(x16, 2 * sizeof(src[0]), PostIndex)); 6564 __ Stp(d0, d31, MemOperand(x17, sizeof(dst[1]), PreIndex)); 6565 END(); 6566 6567 RUN(); 6568 6569 ASSERT_EQUAL_FP64(1.0, d31); 6570 ASSERT_EQUAL_FP64(2.0, d0); 6571 ASSERT_EQUAL_FP64(0.0, dst[0]); 6572 ASSERT_EQUAL_FP64(2.0, dst[1]); 6573 ASSERT_EQUAL_FP64(1.0, dst[2]); 6574 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x16); 6575 ASSERT_EQUAL_64(dst_base + sizeof(dst[1]), x17); 6576 6577 TEARDOWN(); 6578 } 6579 6580 6581 TEST(ldp_stp_quad) { 6582 SETUP(); 6583 6584 uint64_t src[4] = {0x0123456789abcdef, 6585 0xaaaaaaaa55555555, 6586 0xfedcba9876543210, 6587 0x55555555aaaaaaaa}; 6588 uint64_t dst[6] = {0, 0, 0, 0, 0, 0}; 6589 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6590 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6591 6592 START(); 6593 __ Mov(x16, src_base); 6594 __ Mov(x17, dst_base); 6595 __ Ldp(q31, q0, MemOperand(x16, 4 * sizeof(src[0]), PostIndex)); 6596 __ Stp(q0, q31, MemOperand(x17, 2 * sizeof(dst[1]), PreIndex)); 6597 END(); 6598 6599 RUN(); 6600 6601 ASSERT_EQUAL_128(0xaaaaaaaa55555555, 0x0123456789abcdef, q31); 6602 ASSERT_EQUAL_128(0x55555555aaaaaaaa, 0xfedcba9876543210, q0); 6603 ASSERT_EQUAL_64(0, dst[0]); 6604 ASSERT_EQUAL_64(0, dst[1]); 6605 ASSERT_EQUAL_64(0xfedcba9876543210, dst[2]); 6606 ASSERT_EQUAL_64(0x55555555aaaaaaaa, dst[3]); 6607 ASSERT_EQUAL_64(0x0123456789abcdef, dst[4]); 6608 ASSERT_EQUAL_64(0xaaaaaaaa55555555, dst[5]); 6609 ASSERT_EQUAL_64(src_base + 4 * sizeof(src[0]), x16); 6610 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[1]), x17); 6611 6612 TEARDOWN(); 6613 } 6614 6615 6616 TEST(ldp_stp_offset) { 6617 SETUP(); 6618 6619 uint64_t src[3] = {0x0011223344556677, 6620 0x8899aabbccddeeff, 6621 0xffeeddccbbaa9988}; 6622 uint64_t dst[7] = {0, 0, 0, 0, 0, 0, 0}; 6623 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6624 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6625 6626 START(); 6627 __ Mov(x16, src_base); 6628 __ Mov(x17, dst_base); 6629 __ Mov(x18, src_base + 24); 6630 __ Mov(x19, dst_base + 56); 6631 __ Ldp(w0, w1, MemOperand(x16)); 6632 __ Ldp(w2, w3, MemOperand(x16, 4)); 6633 __ Ldp(x4, x5, MemOperand(x16, 8)); 6634 __ Ldp(w6, w7, MemOperand(x18, -12)); 6635 __ Ldp(x8, x9, MemOperand(x18, -16)); 6636 __ Stp(w0, w1, MemOperand(x17)); 6637 __ Stp(w2, w3, MemOperand(x17, 8)); 6638 __ Stp(x4, x5, MemOperand(x17, 16)); 6639 __ Stp(w6, w7, MemOperand(x19, -24)); 6640 __ Stp(x8, x9, MemOperand(x19, -16)); 6641 END(); 6642 6643 RUN(); 6644 6645 ASSERT_EQUAL_64(0x44556677, x0); 6646 ASSERT_EQUAL_64(0x00112233, x1); 6647 ASSERT_EQUAL_64(0x0011223344556677, dst[0]); 6648 ASSERT_EQUAL_64(0x00112233, x2); 6649 ASSERT_EQUAL_64(0xccddeeff, x3); 6650 ASSERT_EQUAL_64(0xccddeeff00112233, dst[1]); 6651 ASSERT_EQUAL_64(0x8899aabbccddeeff, x4); 6652 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[2]); 6653 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5); 6654 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[3]); 6655 ASSERT_EQUAL_64(0x8899aabb, x6); 6656 ASSERT_EQUAL_64(0xbbaa9988, x7); 6657 ASSERT_EQUAL_64(0xbbaa99888899aabb, dst[4]); 6658 ASSERT_EQUAL_64(0x8899aabbccddeeff, x8); 6659 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[5]); 6660 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x9); 6661 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[6]); 6662 ASSERT_EQUAL_64(src_base, x16); 6663 ASSERT_EQUAL_64(dst_base, x17); 6664 ASSERT_EQUAL_64(src_base + 24, x18); 6665 ASSERT_EQUAL_64(dst_base + 56, x19); 6666 6667 TEARDOWN(); 6668 } 6669 6670 6671 TEST(ldp_stp_offset_wide) { 6672 SETUP(); 6673 6674 uint64_t src[3] = {0x0011223344556677, 6675 0x8899aabbccddeeff, 6676 0xffeeddccbbaa9988}; 6677 uint64_t dst[7] = {0, 0, 0, 0, 0, 0, 0}; 6678 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6679 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6680 // Move base too far from the array to force multiple instructions 6681 // to be emitted. 6682 const int64_t base_offset = 1024; 6683 6684 START(); 6685 __ Mov(x20, src_base - base_offset); 6686 __ Mov(x21, dst_base - base_offset); 6687 __ Mov(x18, src_base + base_offset + 24); 6688 __ Mov(x19, dst_base + base_offset + 56); 6689 __ Ldp(w0, w1, MemOperand(x20, base_offset)); 6690 __ Ldp(w2, w3, MemOperand(x20, base_offset + 4)); 6691 __ Ldp(x4, x5, MemOperand(x20, base_offset + 8)); 6692 __ Ldp(w6, w7, MemOperand(x18, -12 - base_offset)); 6693 __ Ldp(x8, x9, MemOperand(x18, -16 - base_offset)); 6694 __ Stp(w0, w1, MemOperand(x21, base_offset)); 6695 __ Stp(w2, w3, MemOperand(x21, base_offset + 8)); 6696 __ Stp(x4, x5, MemOperand(x21, base_offset + 16)); 6697 __ Stp(w6, w7, MemOperand(x19, -24 - base_offset)); 6698 __ Stp(x8, x9, MemOperand(x19, -16 - base_offset)); 6699 END(); 6700 6701 RUN(); 6702 6703 ASSERT_EQUAL_64(0x44556677, x0); 6704 ASSERT_EQUAL_64(0x00112233, x1); 6705 ASSERT_EQUAL_64(0x0011223344556677, dst[0]); 6706 ASSERT_EQUAL_64(0x00112233, x2); 6707 ASSERT_EQUAL_64(0xccddeeff, x3); 6708 ASSERT_EQUAL_64(0xccddeeff00112233, dst[1]); 6709 ASSERT_EQUAL_64(0x8899aabbccddeeff, x4); 6710 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[2]); 6711 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5); 6712 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[3]); 6713 ASSERT_EQUAL_64(0x8899aabb, x6); 6714 ASSERT_EQUAL_64(0xbbaa9988, x7); 6715 ASSERT_EQUAL_64(0xbbaa99888899aabb, dst[4]); 6716 ASSERT_EQUAL_64(0x8899aabbccddeeff, x8); 6717 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[5]); 6718 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x9); 6719 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[6]); 6720 ASSERT_EQUAL_64(src_base - base_offset, x20); 6721 ASSERT_EQUAL_64(dst_base - base_offset, x21); 6722 ASSERT_EQUAL_64(src_base + base_offset + 24, x18); 6723 ASSERT_EQUAL_64(dst_base + base_offset + 56, x19); 6724 6725 TEARDOWN(); 6726 } 6727 6728 6729 TEST(ldnp_stnp_offset) { 6730 SETUP(); 6731 6732 uint64_t src[4] = {0x0011223344556677, 6733 0x8899aabbccddeeff, 6734 0xffeeddccbbaa9988, 6735 0x7766554433221100}; 6736 uint64_t dst[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 6737 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6738 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6739 6740 START(); 6741 __ Mov(x16, src_base); 6742 __ Mov(x17, dst_base); 6743 __ Mov(x18, src_base + 24); 6744 __ Mov(x19, dst_base + 64); 6745 __ Mov(x20, src_base + 32); 6746 6747 // Ensure address set up has happened before executing non-temporal ops. 6748 __ Dmb(InnerShareable, BarrierAll); 6749 6750 __ Ldnp(w0, w1, MemOperand(x16)); 6751 __ Ldnp(w2, w3, MemOperand(x16, 4)); 6752 __ Ldnp(x4, x5, MemOperand(x16, 8)); 6753 __ Ldnp(w6, w7, MemOperand(x18, -12)); 6754 __ Ldnp(x8, x9, MemOperand(x18, -16)); 6755 __ Ldnp(q16, q17, MemOperand(x16)); 6756 __ Ldnp(q19, q18, MemOperand(x20, -32)); 6757 __ Stnp(w0, w1, MemOperand(x17)); 6758 __ Stnp(w2, w3, MemOperand(x17, 8)); 6759 __ Stnp(x4, x5, MemOperand(x17, 16)); 6760 __ Stnp(w6, w7, MemOperand(x19, -32)); 6761 __ Stnp(x8, x9, MemOperand(x19, -24)); 6762 __ Stnp(q17, q16, MemOperand(x19)); 6763 __ Stnp(q18, q19, MemOperand(x19, 32)); 6764 END(); 6765 6766 RUN(); 6767 6768 ASSERT_EQUAL_64(0x44556677, x0); 6769 ASSERT_EQUAL_64(0x00112233, x1); 6770 ASSERT_EQUAL_64(0x0011223344556677, dst[0]); 6771 ASSERT_EQUAL_64(0x00112233, x2); 6772 ASSERT_EQUAL_64(0xccddeeff, x3); 6773 ASSERT_EQUAL_64(0xccddeeff00112233, dst[1]); 6774 ASSERT_EQUAL_64(0x8899aabbccddeeff, x4); 6775 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[2]); 6776 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5); 6777 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[3]); 6778 ASSERT_EQUAL_64(0x8899aabb, x6); 6779 ASSERT_EQUAL_64(0xbbaa9988, x7); 6780 ASSERT_EQUAL_64(0xbbaa99888899aabb, dst[4]); 6781 ASSERT_EQUAL_64(0x8899aabbccddeeff, x8); 6782 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[5]); 6783 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x9); 6784 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[6]); 6785 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q16); 6786 ASSERT_EQUAL_128(0x7766554433221100, 0xffeeddccbbaa9988, q17); 6787 ASSERT_EQUAL_128(0x7766554433221100, 0xffeeddccbbaa9988, q18); 6788 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q19); 6789 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[8]); 6790 ASSERT_EQUAL_64(0x7766554433221100, dst[9]); 6791 ASSERT_EQUAL_64(0x0011223344556677, dst[10]); 6792 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[11]); 6793 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[12]); 6794 ASSERT_EQUAL_64(0x7766554433221100, dst[13]); 6795 ASSERT_EQUAL_64(0x0011223344556677, dst[14]); 6796 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[15]); 6797 ASSERT_EQUAL_64(src_base, x16); 6798 ASSERT_EQUAL_64(dst_base, x17); 6799 ASSERT_EQUAL_64(src_base + 24, x18); 6800 ASSERT_EQUAL_64(dst_base + 64, x19); 6801 ASSERT_EQUAL_64(src_base + 32, x20); 6802 6803 TEARDOWN(); 6804 } 6805 6806 6807 TEST(ldnp_stnp_offset_float) { 6808 SETUP(); 6809 6810 float src[3] = {1.2, 2.3, 3.4}; 6811 float dst[6] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; 6812 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6813 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6814 6815 START(); 6816 __ Mov(x16, src_base); 6817 __ Mov(x17, dst_base); 6818 __ Mov(x18, src_base + 12); 6819 __ Mov(x19, dst_base + 24); 6820 6821 // Ensure address set up has happened before executing non-temporal ops. 6822 __ Dmb(InnerShareable, BarrierAll); 6823 6824 __ Ldnp(s0, s1, MemOperand(x16)); 6825 __ Ldnp(s2, s3, MemOperand(x16, 4)); 6826 __ Ldnp(s5, s4, MemOperand(x18, -8)); 6827 __ Stnp(s1, s0, MemOperand(x17)); 6828 __ Stnp(s3, s2, MemOperand(x17, 8)); 6829 __ Stnp(s4, s5, MemOperand(x19, -8)); 6830 END(); 6831 6832 RUN(); 6833 6834 ASSERT_EQUAL_FP32(1.2, s0); 6835 ASSERT_EQUAL_FP32(2.3, s1); 6836 ASSERT_EQUAL_FP32(2.3, dst[0]); 6837 ASSERT_EQUAL_FP32(1.2, dst[1]); 6838 ASSERT_EQUAL_FP32(2.3, s2); 6839 ASSERT_EQUAL_FP32(3.4, s3); 6840 ASSERT_EQUAL_FP32(3.4, dst[2]); 6841 ASSERT_EQUAL_FP32(2.3, dst[3]); 6842 ASSERT_EQUAL_FP32(3.4, s4); 6843 ASSERT_EQUAL_FP32(2.3, s5); 6844 ASSERT_EQUAL_FP32(3.4, dst[4]); 6845 ASSERT_EQUAL_FP32(2.3, dst[5]); 6846 ASSERT_EQUAL_64(src_base, x16); 6847 ASSERT_EQUAL_64(dst_base, x17); 6848 ASSERT_EQUAL_64(src_base + 12, x18); 6849 ASSERT_EQUAL_64(dst_base + 24, x19); 6850 6851 TEARDOWN(); 6852 } 6853 6854 6855 TEST(ldnp_stnp_offset_double) { 6856 SETUP(); 6857 6858 double src[3] = {1.2, 2.3, 3.4}; 6859 double dst[6] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; 6860 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6861 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6862 6863 START(); 6864 __ Mov(x16, src_base); 6865 __ Mov(x17, dst_base); 6866 __ Mov(x18, src_base + 24); 6867 __ Mov(x19, dst_base + 48); 6868 6869 // Ensure address set up has happened before executing non-temporal ops. 6870 __ Dmb(InnerShareable, BarrierAll); 6871 6872 __ Ldnp(d0, d1, MemOperand(x16)); 6873 __ Ldnp(d2, d3, MemOperand(x16, 8)); 6874 __ Ldnp(d5, d4, MemOperand(x18, -16)); 6875 __ Stnp(d1, d0, MemOperand(x17)); 6876 __ Stnp(d3, d2, MemOperand(x17, 16)); 6877 __ Stnp(d4, d5, MemOperand(x19, -16)); 6878 END(); 6879 6880 RUN(); 6881 6882 ASSERT_EQUAL_FP64(1.2, d0); 6883 ASSERT_EQUAL_FP64(2.3, d1); 6884 ASSERT_EQUAL_FP64(2.3, dst[0]); 6885 ASSERT_EQUAL_FP64(1.2, dst[1]); 6886 ASSERT_EQUAL_FP64(2.3, d2); 6887 ASSERT_EQUAL_FP64(3.4, d3); 6888 ASSERT_EQUAL_FP64(3.4, dst[2]); 6889 ASSERT_EQUAL_FP64(2.3, dst[3]); 6890 ASSERT_EQUAL_FP64(3.4, d4); 6891 ASSERT_EQUAL_FP64(2.3, d5); 6892 ASSERT_EQUAL_FP64(3.4, dst[4]); 6893 ASSERT_EQUAL_FP64(2.3, dst[5]); 6894 ASSERT_EQUAL_64(src_base, x16); 6895 ASSERT_EQUAL_64(dst_base, x17); 6896 ASSERT_EQUAL_64(src_base + 24, x18); 6897 ASSERT_EQUAL_64(dst_base + 48, x19); 6898 6899 TEARDOWN(); 6900 } 6901 6902 6903 TEST(ldp_stp_preindex) { 6904 SETUP(); 6905 6906 uint64_t src[3] = {0x0011223344556677, 6907 0x8899aabbccddeeff, 6908 0xffeeddccbbaa9988}; 6909 uint64_t dst[5] = {0, 0, 0, 0, 0}; 6910 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6911 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6912 6913 START(); 6914 __ Mov(x16, src_base); 6915 __ Mov(x17, dst_base); 6916 __ Mov(x18, dst_base + 16); 6917 __ Ldp(w0, w1, MemOperand(x16, 4, PreIndex)); 6918 __ Mov(x19, x16); 6919 __ Ldp(w2, w3, MemOperand(x16, -4, PreIndex)); 6920 __ Stp(w2, w3, MemOperand(x17, 4, PreIndex)); 6921 __ Mov(x20, x17); 6922 __ Stp(w0, w1, MemOperand(x17, -4, PreIndex)); 6923 __ Ldp(x4, x5, MemOperand(x16, 8, PreIndex)); 6924 __ Mov(x21, x16); 6925 __ Ldp(x6, x7, MemOperand(x16, -8, PreIndex)); 6926 __ Stp(x7, x6, MemOperand(x18, 8, PreIndex)); 6927 __ Mov(x22, x18); 6928 __ Stp(x5, x4, MemOperand(x18, -8, PreIndex)); 6929 END(); 6930 6931 RUN(); 6932 6933 ASSERT_EQUAL_64(0x00112233, x0); 6934 ASSERT_EQUAL_64(0xccddeeff, x1); 6935 ASSERT_EQUAL_64(0x44556677, x2); 6936 ASSERT_EQUAL_64(0x00112233, x3); 6937 ASSERT_EQUAL_64(0xccddeeff00112233, dst[0]); 6938 ASSERT_EQUAL_64(0x0000000000112233, dst[1]); 6939 ASSERT_EQUAL_64(0x8899aabbccddeeff, x4); 6940 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5); 6941 ASSERT_EQUAL_64(0x0011223344556677, x6); 6942 ASSERT_EQUAL_64(0x8899aabbccddeeff, x7); 6943 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]); 6944 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]); 6945 ASSERT_EQUAL_64(0x0011223344556677, dst[4]); 6946 ASSERT_EQUAL_64(src_base, x16); 6947 ASSERT_EQUAL_64(dst_base, x17); 6948 ASSERT_EQUAL_64(dst_base + 16, x18); 6949 ASSERT_EQUAL_64(src_base + 4, x19); 6950 ASSERT_EQUAL_64(dst_base + 4, x20); 6951 ASSERT_EQUAL_64(src_base + 8, x21); 6952 ASSERT_EQUAL_64(dst_base + 24, x22); 6953 6954 TEARDOWN(); 6955 } 6956 6957 6958 TEST(ldp_stp_preindex_wide) { 6959 SETUP(); 6960 6961 uint64_t src[3] = {0x0011223344556677, 6962 0x8899aabbccddeeff, 6963 0xffeeddccbbaa9988}; 6964 uint64_t dst[5] = {0, 0, 0, 0, 0}; 6965 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6966 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6967 // Move base too far from the array to force multiple instructions 6968 // to be emitted. 6969 const int64_t base_offset = 1024; 6970 6971 START(); 6972 __ Mov(x24, src_base - base_offset); 6973 __ Mov(x25, dst_base + base_offset); 6974 __ Mov(x18, dst_base + base_offset + 16); 6975 __ Ldp(w0, w1, MemOperand(x24, base_offset + 4, PreIndex)); 6976 __ Mov(x19, x24); 6977 __ Mov(x24, src_base - base_offset + 4); 6978 __ Ldp(w2, w3, MemOperand(x24, base_offset - 4, PreIndex)); 6979 __ Stp(w2, w3, MemOperand(x25, 4 - base_offset, PreIndex)); 6980 __ Mov(x20, x25); 6981 __ Mov(x25, dst_base + base_offset + 4); 6982 __ Mov(x24, src_base - base_offset); 6983 __ Stp(w0, w1, MemOperand(x25, -4 - base_offset, PreIndex)); 6984 __ Ldp(x4, x5, MemOperand(x24, base_offset + 8, PreIndex)); 6985 __ Mov(x21, x24); 6986 __ Mov(x24, src_base - base_offset + 8); 6987 __ Ldp(x6, x7, MemOperand(x24, base_offset - 8, PreIndex)); 6988 __ Stp(x7, x6, MemOperand(x18, 8 - base_offset, PreIndex)); 6989 __ Mov(x22, x18); 6990 __ Mov(x18, dst_base + base_offset + 16 + 8); 6991 __ Stp(x5, x4, MemOperand(x18, -8 - base_offset, PreIndex)); 6992 END(); 6993 6994 RUN(); 6995 6996 ASSERT_EQUAL_64(0x00112233, x0); 6997 ASSERT_EQUAL_64(0xccddeeff, x1); 6998 ASSERT_EQUAL_64(0x44556677, x2); 6999 ASSERT_EQUAL_64(0x00112233, x3); 7000 ASSERT_EQUAL_64(0xccddeeff00112233, dst[0]); 7001 ASSERT_EQUAL_64(0x0000000000112233, dst[1]); 7002 ASSERT_EQUAL_64(0x8899aabbccddeeff, x4); 7003 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5); 7004 ASSERT_EQUAL_64(0x0011223344556677, x6); 7005 ASSERT_EQUAL_64(0x8899aabbccddeeff, x7); 7006 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]); 7007 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]); 7008 ASSERT_EQUAL_64(0x0011223344556677, dst[4]); 7009 ASSERT_EQUAL_64(src_base, x24); 7010 ASSERT_EQUAL_64(dst_base, x25); 7011 ASSERT_EQUAL_64(dst_base + 16, x18); 7012 ASSERT_EQUAL_64(src_base + 4, x19); 7013 ASSERT_EQUAL_64(dst_base + 4, x20); 7014 ASSERT_EQUAL_64(src_base + 8, x21); 7015 ASSERT_EQUAL_64(dst_base + 24, x22); 7016 7017 TEARDOWN(); 7018 } 7019 7020 7021 TEST(ldp_stp_postindex) { 7022 SETUP(); 7023 7024 uint64_t src[4] = {0x0011223344556677, 7025 0x8899aabbccddeeff, 7026 0xffeeddccbbaa9988, 7027 0x7766554433221100}; 7028 uint64_t dst[5] = {0, 0, 0, 0, 0}; 7029 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 7030 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 7031 7032 START(); 7033 __ Mov(x16, src_base); 7034 __ Mov(x17, dst_base); 7035 __ Mov(x18, dst_base + 16); 7036 __ Ldp(w0, w1, MemOperand(x16, 4, PostIndex)); 7037 __ Mov(x19, x16); 7038 __ Ldp(w2, w3, MemOperand(x16, -4, PostIndex)); 7039 __ Stp(w2, w3, MemOperand(x17, 4, PostIndex)); 7040 __ Mov(x20, x17); 7041 __ Stp(w0, w1, MemOperand(x17, -4, PostIndex)); 7042 __ Ldp(x4, x5, MemOperand(x16, 8, PostIndex)); 7043 __ Mov(x21, x16); 7044 __ Ldp(x6, x7, MemOperand(x16, -8, PostIndex)); 7045 __ Stp(x7, x6, MemOperand(x18, 8, PostIndex)); 7046 __ Mov(x22, x18); 7047 __ Stp(x5, x4, MemOperand(x18, -8, PostIndex)); 7048 END(); 7049 7050 RUN(); 7051 7052 ASSERT_EQUAL_64(0x44556677, x0); 7053 ASSERT_EQUAL_64(0x00112233, x1); 7054 ASSERT_EQUAL_64(0x00112233, x2); 7055 ASSERT_EQUAL_64(0xccddeeff, x3); 7056 ASSERT_EQUAL_64(0x4455667700112233, dst[0]); 7057 ASSERT_EQUAL_64(0x0000000000112233, dst[1]); 7058 ASSERT_EQUAL_64(0x0011223344556677, x4); 7059 ASSERT_EQUAL_64(0x8899aabbccddeeff, x5); 7060 ASSERT_EQUAL_64(0x8899aabbccddeeff, x6); 7061 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x7); 7062 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]); 7063 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]); 7064 ASSERT_EQUAL_64(0x0011223344556677, dst[4]); 7065 ASSERT_EQUAL_64(src_base, x16); 7066 ASSERT_EQUAL_64(dst_base, x17); 7067 ASSERT_EQUAL_64(dst_base + 16, x18); 7068 ASSERT_EQUAL_64(src_base + 4, x19); 7069 ASSERT_EQUAL_64(dst_base + 4, x20); 7070 ASSERT_EQUAL_64(src_base + 8, x21); 7071 ASSERT_EQUAL_64(dst_base + 24, x22); 7072 7073 TEARDOWN(); 7074 } 7075 7076 7077 TEST(ldp_stp_postindex_wide) { 7078 SETUP(); 7079 7080 uint64_t src[4] = {0x0011223344556677, 7081 0x8899aabbccddeeff, 7082 0xffeeddccbbaa9988, 7083 0x7766554433221100}; 7084 uint64_t dst[5] = {0, 0, 0, 0, 0}; 7085 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 7086 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 7087 // Move base too far from the array to force multiple instructions 7088 // to be emitted. 7089 const int64_t base_offset = 1024; 7090 7091 START(); 7092 __ Mov(x24, src_base); 7093 __ Mov(x25, dst_base); 7094 __ Mov(x18, dst_base + 16); 7095 __ Ldp(w0, w1, MemOperand(x24, base_offset + 4, PostIndex)); 7096 __ Mov(x19, x24); 7097 __ Sub(x24, x24, base_offset); 7098 __ Ldp(w2, w3, MemOperand(x24, base_offset - 4, PostIndex)); 7099 __ Stp(w2, w3, MemOperand(x25, 4 - base_offset, PostIndex)); 7100 __ Mov(x20, x25); 7101 __ Sub(x24, x24, base_offset); 7102 __ Add(x25, x25, base_offset); 7103 __ Stp(w0, w1, MemOperand(x25, -4 - base_offset, PostIndex)); 7104 __ Ldp(x4, x5, MemOperand(x24, base_offset + 8, PostIndex)); 7105 __ Mov(x21, x24); 7106 __ Sub(x24, x24, base_offset); 7107 __ Ldp(x6, x7, MemOperand(x24, base_offset - 8, PostIndex)); 7108 __ Stp(x7, x6, MemOperand(x18, 8 - base_offset, PostIndex)); 7109 __ Mov(x22, x18); 7110 __ Add(x18, x18, base_offset); 7111 __ Stp(x5, x4, MemOperand(x18, -8 - base_offset, PostIndex)); 7112 END(); 7113 7114 RUN(); 7115 7116 ASSERT_EQUAL_64(0x44556677, x0); 7117 ASSERT_EQUAL_64(0x00112233, x1); 7118 ASSERT_EQUAL_64(0x00112233, x2); 7119 ASSERT_EQUAL_64(0xccddeeff, x3); 7120 ASSERT_EQUAL_64(0x4455667700112233, dst[0]); 7121 ASSERT_EQUAL_64(0x0000000000112233, dst[1]); 7122 ASSERT_EQUAL_64(0x0011223344556677, x4); 7123 ASSERT_EQUAL_64(0x8899aabbccddeeff, x5); 7124 ASSERT_EQUAL_64(0x8899aabbccddeeff, x6); 7125 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x7); 7126 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]); 7127 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]); 7128 ASSERT_EQUAL_64(0x0011223344556677, dst[4]); 7129 ASSERT_EQUAL_64(src_base + base_offset, x24); 7130 ASSERT_EQUAL_64(dst_base - base_offset, x25); 7131 ASSERT_EQUAL_64(dst_base - base_offset + 16, x18); 7132 ASSERT_EQUAL_64(src_base + base_offset + 4, x19); 7133 ASSERT_EQUAL_64(dst_base - base_offset + 4, x20); 7134 ASSERT_EQUAL_64(src_base + base_offset + 8, x21); 7135 ASSERT_EQUAL_64(dst_base - base_offset + 24, x22); 7136 7137 TEARDOWN(); 7138 } 7139 7140 7141 TEST(ldp_sign_extend) { 7142 SETUP(); 7143 7144 uint32_t src[2] = {0x80000000, 0x7fffffff}; 7145 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 7146 7147 START(); 7148 __ Mov(x24, src_base); 7149 __ Ldpsw(x0, x1, MemOperand(x24)); 7150 END(); 7151 7152 RUN(); 7153 7154 ASSERT_EQUAL_64(0xffffffff80000000, x0); 7155 ASSERT_EQUAL_64(0x000000007fffffff, x1); 7156 7157 TEARDOWN(); 7158 } 7159 7160 7161 TEST(ldur_stur) { 7162 SETUP(); 7163 7164 int64_t src[2] = {0x0123456789abcdef, 0x0123456789abcdef}; 7165 int64_t dst[5] = {0, 0, 0, 0, 0}; 7166 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 7167 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 7168 7169 START(); 7170 __ Mov(x17, src_base); 7171 __ Mov(x18, dst_base); 7172 __ Mov(x19, src_base + 16); 7173 __ Mov(x20, dst_base + 32); 7174 __ Mov(x21, dst_base + 40); 7175 __ Ldr(w0, MemOperand(x17, 1)); 7176 __ Str(w0, MemOperand(x18, 2)); 7177 __ Ldr(x1, MemOperand(x17, 3)); 7178 __ Str(x1, MemOperand(x18, 9)); 7179 __ Ldr(w2, MemOperand(x19, -9)); 7180 __ Str(w2, MemOperand(x20, -5)); 7181 __ Ldrb(w3, MemOperand(x19, -1)); 7182 __ Strb(w3, MemOperand(x21, -1)); 7183 END(); 7184 7185 RUN(); 7186 7187 ASSERT_EQUAL_64(0x6789abcd, x0); 7188 ASSERT_EQUAL_64(0x00006789abcd0000, dst[0]); 7189 ASSERT_EQUAL_64(0xabcdef0123456789, x1); 7190 ASSERT_EQUAL_64(0xcdef012345678900, dst[1]); 7191 ASSERT_EQUAL_64(0x000000ab, dst[2]); 7192 ASSERT_EQUAL_64(0xabcdef01, x2); 7193 ASSERT_EQUAL_64(0x00abcdef01000000, dst[3]); 7194 ASSERT_EQUAL_64(0x00000001, x3); 7195 ASSERT_EQUAL_64(0x0100000000000000, dst[4]); 7196 ASSERT_EQUAL_64(src_base, x17); 7197 ASSERT_EQUAL_64(dst_base, x18); 7198 ASSERT_EQUAL_64(src_base + 16, x19); 7199 ASSERT_EQUAL_64(dst_base + 32, x20); 7200 7201 TEARDOWN(); 7202 } 7203 7204 7205 TEST(ldur_stur_fp) { 7206 SETUP(); 7207 7208 int64_t src[3] = {0x0123456789abcdef, 0x0123456789abcdef, 0x0123456789abcdef}; 7209 int64_t dst[5] = {0, 0, 0, 0, 0}; 7210 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 7211 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 7212 7213 START(); 7214 __ Mov(x17, src_base); 7215 __ Mov(x18, dst_base); 7216 __ Ldr(b0, MemOperand(x17)); 7217 __ Str(b0, MemOperand(x18)); 7218 __ Ldr(h1, MemOperand(x17, 1)); 7219 __ Str(h1, MemOperand(x18, 1)); 7220 __ Ldr(s2, MemOperand(x17, 2)); 7221 __ Str(s2, MemOperand(x18, 3)); 7222 __ Ldr(d3, MemOperand(x17, 3)); 7223 __ Str(d3, MemOperand(x18, 7)); 7224 __ Ldr(q4, MemOperand(x17, 4)); 7225 __ Str(q4, MemOperand(x18, 15)); 7226 END(); 7227 7228 RUN(); 7229 7230 ASSERT_EQUAL_128(0, 0xef, q0); 7231 ASSERT_EQUAL_128(0, 0xabcd, q1); 7232 ASSERT_EQUAL_128(0, 0x456789ab, q2); 7233 ASSERT_EQUAL_128(0, 0xabcdef0123456789, q3); 7234 ASSERT_EQUAL_128(0x89abcdef01234567, 0x89abcdef01234567, q4); 7235 ASSERT_EQUAL_64(0x89456789ababcdef, dst[0]); 7236 ASSERT_EQUAL_64(0x67abcdef01234567, dst[1]); 7237 ASSERT_EQUAL_64(0x6789abcdef012345, dst[2]); 7238 ASSERT_EQUAL_64(0x0089abcdef012345, dst[3]); 7239 7240 TEARDOWN(); 7241 } 7242 7243 7244 TEST(ldr_literal) { 7245 SETUP(); 7246 7247 START(); 7248 __ Ldr(x2, 0x1234567890abcdef); 7249 __ Ldr(w3, 0xfedcba09); 7250 __ Ldrsw(x4, 0x7fffffff); 7251 __ Ldrsw(x5, 0x80000000); 7252 __ Ldr(q11, 0x1234000056780000, 0xabcd0000ef000000); 7253 __ Ldr(d13, 1.234); 7254 __ Ldr(s25, 2.5); 7255 END(); 7256 7257 RUN(); 7258 7259 ASSERT_EQUAL_64(0x1234567890abcdef, x2); 7260 ASSERT_EQUAL_64(0xfedcba09, x3); 7261 ASSERT_EQUAL_64(0x7fffffff, x4); 7262 ASSERT_EQUAL_64(0xffffffff80000000, x5); 7263 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q11); 7264 ASSERT_EQUAL_FP64(1.234, d13); 7265 ASSERT_EQUAL_FP32(2.5, s25); 7266 7267 TEARDOWN(); 7268 } 7269 7270 7271 TEST(ldr_literal_range) { 7272 SETUP(); 7273 7274 START(); 7275 // Make sure the pool is empty; 7276 masm.EmitLiteralPool(LiteralPool::kBranchRequired); 7277 ASSERT_LITERAL_POOL_SIZE(0); 7278 7279 // Create some literal pool entries. 7280 __ Ldr(x0, 0x1234567890abcdef); 7281 __ Ldr(w1, 0xfedcba09); 7282 __ Ldrsw(x2, 0x7fffffff); 7283 __ Ldrsw(x3, 0x80000000); 7284 __ Ldr(q2, 0x1234000056780000, 0xabcd0000ef000000); 7285 __ Ldr(d0, 1.234); 7286 __ Ldr(s1, 2.5); 7287 ASSERT_LITERAL_POOL_SIZE(48); 7288 7289 // Emit more code than the maximum literal load range to ensure the pool 7290 // should be emitted. 7291 const ptrdiff_t end = masm.GetCursorOffset() + 2 * kMaxLoadLiteralRange; 7292 while (masm.GetCursorOffset() < end) { 7293 __ Nop(); 7294 } 7295 7296 // The pool should have been emitted. 7297 ASSERT_LITERAL_POOL_SIZE(0); 7298 7299 // These loads should be after the pool (and will require a new one). 7300 __ Ldr(x4, 0x34567890abcdef12); 7301 __ Ldr(w5, 0xdcba09fe); 7302 __ Ldrsw(x6, 0x7fffffff); 7303 __ Ldrsw(x7, 0x80000000); 7304 __ Ldr(q6, 0x1234000056780000, 0xabcd0000ef000000); 7305 __ Ldr(d4, 123.4); 7306 __ Ldr(s5, 250.0); 7307 ASSERT_LITERAL_POOL_SIZE(48); 7308 END(); 7309 7310 RUN(); 7311 7312 // Check that the literals loaded correctly. 7313 ASSERT_EQUAL_64(0x1234567890abcdef, x0); 7314 ASSERT_EQUAL_64(0xfedcba09, x1); 7315 ASSERT_EQUAL_64(0x7fffffff, x2); 7316 ASSERT_EQUAL_64(0xffffffff80000000, x3); 7317 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q2); 7318 ASSERT_EQUAL_FP64(1.234, d0); 7319 ASSERT_EQUAL_FP32(2.5, s1); 7320 ASSERT_EQUAL_64(0x34567890abcdef12, x4); 7321 ASSERT_EQUAL_64(0xdcba09fe, x5); 7322 ASSERT_EQUAL_64(0x7fffffff, x6); 7323 ASSERT_EQUAL_64(0xffffffff80000000, x7); 7324 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q6); 7325 ASSERT_EQUAL_FP64(123.4, d4); 7326 ASSERT_EQUAL_FP32(250.0, s5); 7327 7328 TEARDOWN(); 7329 } 7330 7331 7332 TEST(ldr_literal_values_q) { 7333 SETUP(); 7334 7335 static const uint64_t kHalfValues[] = {0x8000000000000000, 7336 0x7fffffffffffffff, 7337 0x0000000000000000, 7338 0xffffffffffffffff, 7339 0x00ff00ff00ff00ff, 7340 0x1234567890abcdef}; 7341 const int card = sizeof(kHalfValues) / sizeof(kHalfValues[0]); 7342 const Register& ref_low64 = x1; 7343 const Register& ref_high64 = x2; 7344 const Register& loaded_low64 = x3; 7345 const Register& loaded_high64 = x4; 7346 const VRegister& tgt = q0; 7347 7348 START(); 7349 __ Mov(x0, 0); 7350 7351 for (int i = 0; i < card; i++) { 7352 __ Mov(ref_low64, kHalfValues[i]); 7353 for (int j = 0; j < card; j++) { 7354 __ Mov(ref_high64, kHalfValues[j]); 7355 __ Ldr(tgt, kHalfValues[j], kHalfValues[i]); 7356 __ Mov(loaded_low64, tgt.V2D(), 0); 7357 __ Mov(loaded_high64, tgt.V2D(), 1); 7358 __ Cmp(loaded_low64, ref_low64); 7359 __ Ccmp(loaded_high64, ref_high64, NoFlag, eq); 7360 __ Cset(x0, ne); 7361 } 7362 } 7363 END(); 7364 7365 RUN(); 7366 7367 // If one of the values differs, the trace can be used to identify which one. 7368 ASSERT_EQUAL_64(0, x0); 7369 7370 TEARDOWN(); 7371 } 7372 7373 7374 template <typename T> 7375 void LoadIntValueHelper(T values[], int card) { 7376 SETUP(); 7377 7378 const bool is_32bit = (sizeof(T) == 4); 7379 Register tgt1 = is_32bit ? Register(w1) : Register(x1); 7380 Register tgt2 = is_32bit ? Register(w2) : Register(x2); 7381 7382 START(); 7383 __ Mov(x0, 0); 7384 7385 // If one of the values differ then x0 will be one. 7386 for (int i = 0; i < card; ++i) { 7387 __ Mov(tgt1, values[i]); 7388 __ Ldr(tgt2, values[i]); 7389 __ Cmp(tgt1, tgt2); 7390 __ Cset(x0, ne); 7391 } 7392 END(); 7393 7394 RUN(); 7395 7396 // If one of the values differs, the trace can be used to identify which one. 7397 ASSERT_EQUAL_64(0, x0); 7398 7399 TEARDOWN(); 7400 } 7401 7402 7403 TEST(ldr_literal_values_x) { 7404 static const uint64_t kValues[] = {0x8000000000000000, 7405 0x7fffffffffffffff, 7406 0x0000000000000000, 7407 0xffffffffffffffff, 7408 0x00ff00ff00ff00ff, 7409 0x1234567890abcdef}; 7410 7411 LoadIntValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0])); 7412 } 7413 7414 7415 TEST(ldr_literal_values_w) { 7416 static const uint32_t kValues[] = {0x80000000, 7417 0x7fffffff, 7418 0x00000000, 7419 0xffffffff, 7420 0x00ff00ff, 7421 0x12345678, 7422 0x90abcdef}; 7423 7424 LoadIntValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0])); 7425 } 7426 7427 7428 template <typename T> 7429 void LoadFPValueHelper(T values[], int card) { 7430 SETUP(); 7431 7432 const bool is_32bits = (sizeof(T) == 4); 7433 const FPRegister& fp_tgt = is_32bits ? s2 : d2; 7434 const Register& tgt1 = is_32bits ? Register(w1) : Register(x1); 7435 const Register& tgt2 = is_32bits ? Register(w2) : Register(x2); 7436 7437 START(); 7438 __ Mov(x0, 0); 7439 7440 // If one of the values differ then x0 will be one. 7441 for (int i = 0; i < card; ++i) { 7442 __ Mov(tgt1, 7443 is_32bits ? FloatToRawbits(values[i]) : DoubleToRawbits(values[i])); 7444 __ Ldr(fp_tgt, values[i]); 7445 __ Fmov(tgt2, fp_tgt); 7446 __ Cmp(tgt1, tgt2); 7447 __ Cset(x0, ne); 7448 } 7449 END(); 7450 7451 RUN(); 7452 7453 // If one of the values differs, the trace can be used to identify which one. 7454 ASSERT_EQUAL_64(0, x0); 7455 7456 TEARDOWN(); 7457 } 7458 7459 TEST(ldr_literal_values_d) { 7460 static const double kValues[] = {-0.0, 0.0, -1.0, 1.0, -1e10, 1e10}; 7461 7462 LoadFPValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0])); 7463 } 7464 7465 7466 TEST(ldr_literal_values_s) { 7467 static const float kValues[] = {-0.0, 0.0, -1.0, 1.0, -1e10, 1e10}; 7468 7469 LoadFPValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0])); 7470 } 7471 7472 7473 TEST(ldr_literal_custom) { 7474 SETUP(); 7475 7476 Label end_of_pool_before; 7477 Label end_of_pool_after; 7478 7479 const size_t kSizeOfPoolInBytes = 44; 7480 7481 Literal<uint64_t> before_x(0x1234567890abcdef); 7482 Literal<uint32_t> before_w(0xfedcba09); 7483 Literal<uint32_t> before_sx(0x80000000); 7484 Literal<uint64_t> before_q(0x1234000056780000, 0xabcd0000ef000000); 7485 Literal<double> before_d(1.234); 7486 Literal<float> before_s(2.5); 7487 7488 Literal<uint64_t> after_x(0x1234567890abcdef); 7489 Literal<uint32_t> after_w(0xfedcba09); 7490 Literal<uint32_t> after_sx(0x80000000); 7491 Literal<uint64_t> after_q(0x1234000056780000, 0xabcd0000ef000000); 7492 Literal<double> after_d(1.234); 7493 Literal<float> after_s(2.5); 7494 7495 START(); 7496 7497 // Manually generate a pool. 7498 __ B(&end_of_pool_before); 7499 { 7500 ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes); 7501 __ place(&before_x); 7502 __ place(&before_w); 7503 __ place(&before_sx); 7504 __ place(&before_q); 7505 __ place(&before_d); 7506 __ place(&before_s); 7507 } 7508 __ Bind(&end_of_pool_before); 7509 7510 { 7511 ExactAssemblyScope scope(&masm, 12 * kInstructionSize); 7512 __ ldr(x2, &before_x); 7513 __ ldr(w3, &before_w); 7514 __ ldrsw(x5, &before_sx); 7515 __ ldr(q11, &before_q); 7516 __ ldr(d13, &before_d); 7517 __ ldr(s25, &before_s); 7518 7519 __ ldr(x6, &after_x); 7520 __ ldr(w7, &after_w); 7521 __ ldrsw(x8, &after_sx); 7522 __ ldr(q18, &after_q); 7523 __ ldr(d14, &after_d); 7524 __ ldr(s26, &after_s); 7525 } 7526 7527 // Manually generate a pool. 7528 __ B(&end_of_pool_after); 7529 { 7530 ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes); 7531 __ place(&after_x); 7532 __ place(&after_w); 7533 __ place(&after_sx); 7534 __ place(&after_q); 7535 __ place(&after_d); 7536 __ place(&after_s); 7537 } 7538 __ Bind(&end_of_pool_after); 7539 7540 END(); 7541 7542 RUN(); 7543 7544 ASSERT_EQUAL_64(0x1234567890abcdef, x2); 7545 ASSERT_EQUAL_64(0xfedcba09, x3); 7546 ASSERT_EQUAL_64(0xffffffff80000000, x5); 7547 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q11); 7548 ASSERT_EQUAL_FP64(1.234, d13); 7549 ASSERT_EQUAL_FP32(2.5, s25); 7550 7551 ASSERT_EQUAL_64(0x1234567890abcdef, x6); 7552 ASSERT_EQUAL_64(0xfedcba09, x7); 7553 ASSERT_EQUAL_64(0xffffffff80000000, x8); 7554 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q18); 7555 ASSERT_EQUAL_FP64(1.234, d14); 7556 ASSERT_EQUAL_FP32(2.5, s26); 7557 7558 TEARDOWN(); 7559 } 7560 7561 7562 TEST(ldr_literal_custom_shared) { 7563 SETUP(); 7564 7565 Label end_of_pool_before; 7566 Label end_of_pool_after; 7567 7568 const size_t kSizeOfPoolInBytes = 40; 7569 7570 Literal<uint64_t> before_x(0x1234567890abcdef); 7571 Literal<uint32_t> before_w(0xfedcba09); 7572 Literal<uint64_t> before_q(0x1234000056780000, 0xabcd0000ef000000); 7573 Literal<double> before_d(1.234); 7574 Literal<float> before_s(2.5); 7575 7576 Literal<uint64_t> after_x(0x1234567890abcdef); 7577 Literal<uint32_t> after_w(0xfedcba09); 7578 Literal<uint64_t> after_q(0x1234000056780000, 0xabcd0000ef000000); 7579 Literal<double> after_d(1.234); 7580 Literal<float> after_s(2.5); 7581 7582 START(); 7583 7584 // Manually generate a pool. 7585 __ B(&end_of_pool_before); 7586 { 7587 ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes); 7588 __ place(&before_x); 7589 __ place(&before_w); 7590 __ place(&before_q); 7591 __ place(&before_d); 7592 __ place(&before_s); 7593 } 7594 __ Bind(&end_of_pool_before); 7595 7596 // Load the entries several times to test that literals can be shared. 7597 for (int i = 0; i < 50; i++) { 7598 ExactAssemblyScope scope(&masm, 12 * kInstructionSize); 7599 __ ldr(x2, &before_x); 7600 __ ldr(w3, &before_w); 7601 __ ldrsw(x5, &before_w); // Re-use before_w. 7602 __ ldr(q11, &before_q); 7603 __ ldr(d13, &before_d); 7604 __ ldr(s25, &before_s); 7605 7606 __ ldr(x6, &after_x); 7607 __ ldr(w7, &after_w); 7608 __ ldrsw(x8, &after_w); // Re-use after_w. 7609 __ ldr(q18, &after_q); 7610 __ ldr(d14, &after_d); 7611 __ ldr(s26, &after_s); 7612 } 7613 7614 // Manually generate a pool. 7615 __ B(&end_of_pool_after); 7616 { 7617 ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes); 7618 __ place(&after_x); 7619 __ place(&after_w); 7620 __ place(&after_q); 7621 __ place(&after_d); 7622 __ place(&after_s); 7623 } 7624 __ Bind(&end_of_pool_after); 7625 7626 END(); 7627 7628 RUN(); 7629 7630 ASSERT_EQUAL_64(0x1234567890abcdef, x2); 7631 ASSERT_EQUAL_64(0xfedcba09, x3); 7632 ASSERT_EQUAL_64(0xfffffffffedcba09, x5); 7633 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q11); 7634 ASSERT_EQUAL_FP64(1.234, d13); 7635 ASSERT_EQUAL_FP32(2.5, s25); 7636 7637 ASSERT_EQUAL_64(0x1234567890abcdef, x6); 7638 ASSERT_EQUAL_64(0xfedcba09, x7); 7639 ASSERT_EQUAL_64(0xfffffffffedcba09, x8); 7640 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q18); 7641 ASSERT_EQUAL_FP64(1.234, d14); 7642 ASSERT_EQUAL_FP32(2.5, s26); 7643 7644 TEARDOWN(); 7645 } 7646 7647 7648 TEST(prfm_offset) { 7649 SETUP(); 7650 7651 START(); 7652 // The address used in prfm doesn't have to be valid. 7653 __ Mov(x0, 0x0123456789abcdef); 7654 7655 for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) { 7656 // Unallocated prefetch operations are ignored, so test all of them. 7657 PrefetchOperation op = static_cast<PrefetchOperation>(i); 7658 7659 __ Prfm(op, MemOperand(x0)); 7660 __ Prfm(op, MemOperand(x0, 8)); 7661 __ Prfm(op, MemOperand(x0, 32760)); 7662 __ Prfm(op, MemOperand(x0, 32768)); 7663 7664 __ Prfm(op, MemOperand(x0, 1)); 7665 __ Prfm(op, MemOperand(x0, 9)); 7666 __ Prfm(op, MemOperand(x0, 255)); 7667 __ Prfm(op, MemOperand(x0, 257)); 7668 __ Prfm(op, MemOperand(x0, -1)); 7669 __ Prfm(op, MemOperand(x0, -9)); 7670 __ Prfm(op, MemOperand(x0, -255)); 7671 __ Prfm(op, MemOperand(x0, -257)); 7672 7673 __ Prfm(op, MemOperand(x0, 0xfedcba9876543210)); 7674 } 7675 7676 END(); 7677 RUN(); 7678 TEARDOWN(); 7679 } 7680 7681 7682 TEST(prfm_regoffset) { 7683 SETUP(); 7684 7685 START(); 7686 // The address used in prfm doesn't have to be valid. 7687 __ Mov(x0, 0x0123456789abcdef); 7688 7689 CPURegList inputs(CPURegister::kRegister, kXRegSize, 10, 18); 7690 __ Mov(x10, 0); 7691 __ Mov(x11, 1); 7692 __ Mov(x12, 8); 7693 __ Mov(x13, 255); 7694 __ Mov(x14, -0); 7695 __ Mov(x15, -1); 7696 __ Mov(x16, -8); 7697 __ Mov(x17, -255); 7698 __ Mov(x18, 0xfedcba9876543210); 7699 7700 for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) { 7701 // Unallocated prefetch operations are ignored, so test all of them. 7702 PrefetchOperation op = static_cast<PrefetchOperation>(i); 7703 7704 CPURegList loop = inputs; 7705 while (!loop.IsEmpty()) { 7706 Register input(loop.PopLowestIndex()); 7707 __ Prfm(op, MemOperand(x0, input)); 7708 __ Prfm(op, MemOperand(x0, input, UXTW)); 7709 __ Prfm(op, MemOperand(x0, input, UXTW, 3)); 7710 __ Prfm(op, MemOperand(x0, input, LSL)); 7711 __ Prfm(op, MemOperand(x0, input, LSL, 3)); 7712 __ Prfm(op, MemOperand(x0, input, SXTW)); 7713 __ Prfm(op, MemOperand(x0, input, SXTW, 3)); 7714 __ Prfm(op, MemOperand(x0, input, SXTX)); 7715 __ Prfm(op, MemOperand(x0, input, SXTX, 3)); 7716 } 7717 } 7718 7719 END(); 7720 RUN(); 7721 TEARDOWN(); 7722 } 7723 7724 7725 TEST(prfm_literal_imm19) { 7726 SETUP(); 7727 START(); 7728 7729 for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) { 7730 // Unallocated prefetch operations are ignored, so test all of them. 7731 PrefetchOperation op = static_cast<PrefetchOperation>(i); 7732 7733 ExactAssemblyScope scope(&masm, 7 * kInstructionSize); 7734 // The address used in prfm doesn't have to be valid. 7735 __ prfm(op, INT64_C(0)); 7736 __ prfm(op, 1); 7737 __ prfm(op, -1); 7738 __ prfm(op, 1000); 7739 __ prfm(op, -1000); 7740 __ prfm(op, 0x3ffff); 7741 __ prfm(op, -0x40000); 7742 } 7743 7744 END(); 7745 RUN(); 7746 TEARDOWN(); 7747 } 7748 7749 7750 TEST(prfm_literal) { 7751 SETUP(); 7752 7753 Label end_of_pool_before; 7754 Label end_of_pool_after; 7755 Literal<uint64_t> before(0); 7756 Literal<uint64_t> after(0); 7757 7758 START(); 7759 7760 // Manually generate a pool. 7761 __ B(&end_of_pool_before); 7762 { 7763 ExactAssemblyScope scope(&masm, before.GetSize()); 7764 __ place(&before); 7765 } 7766 __ Bind(&end_of_pool_before); 7767 7768 for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) { 7769 // Unallocated prefetch operations are ignored, so test all of them. 7770 PrefetchOperation op = static_cast<PrefetchOperation>(i); 7771 7772 ExactAssemblyScope guard(&masm, 2 * kInstructionSize); 7773 __ prfm(op, &before); 7774 __ prfm(op, &after); 7775 } 7776 7777 // Manually generate a pool. 7778 __ B(&end_of_pool_after); 7779 { 7780 ExactAssemblyScope scope(&masm, after.GetSize()); 7781 __ place(&after); 7782 } 7783 __ Bind(&end_of_pool_after); 7784 7785 END(); 7786 RUN(); 7787 TEARDOWN(); 7788 } 7789 7790 7791 TEST(prfm_wide) { 7792 SETUP(); 7793 7794 START(); 7795 // The address used in prfm doesn't have to be valid. 7796 __ Mov(x0, 0x0123456789abcdef); 7797 7798 for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) { 7799 // Unallocated prefetch operations are ignored, so test all of them. 7800 PrefetchOperation op = static_cast<PrefetchOperation>(i); 7801 7802 __ Prfm(op, MemOperand(x0, 0x40000)); 7803 __ Prfm(op, MemOperand(x0, -0x40001)); 7804 __ Prfm(op, MemOperand(x0, UINT64_C(0x5555555555555555))); 7805 __ Prfm(op, MemOperand(x0, UINT64_C(0xfedcba9876543210))); 7806 } 7807 7808 END(); 7809 RUN(); 7810 TEARDOWN(); 7811 } 7812 7813 7814 TEST(load_prfm_literal) { 7815 // Test literals shared between both prfm and ldr. 7816 SETUP(); 7817 7818 Label end_of_pool_before; 7819 Label end_of_pool_after; 7820 7821 const size_t kSizeOfPoolInBytes = 28; 7822 7823 Literal<uint64_t> before_x(0x1234567890abcdef); 7824 Literal<uint32_t> before_w(0xfedcba09); 7825 Literal<uint32_t> before_sx(0x80000000); 7826 Literal<double> before_d(1.234); 7827 Literal<float> before_s(2.5); 7828 Literal<uint64_t> after_x(0x1234567890abcdef); 7829 Literal<uint32_t> after_w(0xfedcba09); 7830 Literal<uint32_t> after_sx(0x80000000); 7831 Literal<double> after_d(1.234); 7832 Literal<float> after_s(2.5); 7833 7834 START(); 7835 7836 // Manually generate a pool. 7837 __ B(&end_of_pool_before); 7838 { 7839 ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes); 7840 __ place(&before_x); 7841 __ place(&before_w); 7842 __ place(&before_sx); 7843 __ place(&before_d); 7844 __ place(&before_s); 7845 } 7846 __ Bind(&end_of_pool_before); 7847 7848 for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) { 7849 // Unallocated prefetch operations are ignored, so test all of them. 7850 PrefetchOperation op = static_cast<PrefetchOperation>(i); 7851 ExactAssemblyScope scope(&masm, 10 * kInstructionSize); 7852 7853 __ prfm(op, &before_x); 7854 __ prfm(op, &before_w); 7855 __ prfm(op, &before_sx); 7856 __ prfm(op, &before_d); 7857 __ prfm(op, &before_s); 7858 7859 __ prfm(op, &after_x); 7860 __ prfm(op, &after_w); 7861 __ prfm(op, &after_sx); 7862 __ prfm(op, &after_d); 7863 __ prfm(op, &after_s); 7864 } 7865 7866 { 7867 ExactAssemblyScope scope(&masm, 10 * kInstructionSize); 7868 __ ldr(x2, &before_x); 7869 __ ldr(w3, &before_w); 7870 __ ldrsw(x5, &before_sx); 7871 __ ldr(d13, &before_d); 7872 __ ldr(s25, &before_s); 7873 7874 __ ldr(x6, &after_x); 7875 __ ldr(w7, &after_w); 7876 __ ldrsw(x8, &after_sx); 7877 __ ldr(d14, &after_d); 7878 __ ldr(s26, &after_s); 7879 } 7880 7881 // Manually generate a pool. 7882 __ B(&end_of_pool_after); 7883 { 7884 ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes); 7885 __ place(&after_x); 7886 __ place(&after_w); 7887 __ place(&after_sx); 7888 __ place(&after_d); 7889 __ place(&after_s); 7890 } 7891 __ Bind(&end_of_pool_after); 7892 7893 END(); 7894 7895 RUN(); 7896 7897 ASSERT_EQUAL_64(0x1234567890abcdef, x2); 7898 ASSERT_EQUAL_64(0xfedcba09, x3); 7899 ASSERT_EQUAL_64(0xffffffff80000000, x5); 7900 ASSERT_EQUAL_FP64(1.234, d13); 7901 ASSERT_EQUAL_FP32(2.5, s25); 7902 7903 ASSERT_EQUAL_64(0x1234567890abcdef, x6); 7904 ASSERT_EQUAL_64(0xfedcba09, x7); 7905 ASSERT_EQUAL_64(0xffffffff80000000, x8); 7906 ASSERT_EQUAL_FP64(1.234, d14); 7907 ASSERT_EQUAL_FP32(2.5, s26); 7908 7909 TEARDOWN(); 7910 } 7911 7912 7913 TEST(add_sub_imm) { 7914 SETUP(); 7915 7916 START(); 7917 __ Mov(x0, 0x0); 7918 __ Mov(x1, 0x1111); 7919 __ Mov(x2, 0xffffffffffffffff); 7920 __ Mov(x3, 0x8000000000000000); 7921 7922 __ Add(x10, x0, Operand(0x123)); 7923 __ Add(x11, x1, Operand(0x122000)); 7924 __ Add(x12, x0, Operand(0xabc << 12)); 7925 __ Add(x13, x2, Operand(1)); 7926 7927 __ Add(w14, w0, Operand(0x123)); 7928 __ Add(w15, w1, Operand(0x122000)); 7929 __ Add(w16, w0, Operand(0xabc << 12)); 7930 __ Add(w17, w2, Operand(1)); 7931 7932 __ Sub(x20, x0, Operand(0x1)); 7933 __ Sub(x21, x1, Operand(0x111)); 7934 __ Sub(x22, x1, Operand(0x1 << 12)); 7935 __ Sub(x23, x3, Operand(1)); 7936 7937 __ Sub(w24, w0, Operand(0x1)); 7938 __ Sub(w25, w1, Operand(0x111)); 7939 __ Sub(w26, w1, Operand(0x1 << 12)); 7940 __ Sub(w27, w3, Operand(1)); 7941 END(); 7942 7943 RUN(); 7944 7945 ASSERT_EQUAL_64(0x123, x10); 7946 ASSERT_EQUAL_64(0x123111, x11); 7947 ASSERT_EQUAL_64(0xabc000, x12); 7948 ASSERT_EQUAL_64(0x0, x13); 7949 7950 ASSERT_EQUAL_32(0x123, w14); 7951 ASSERT_EQUAL_32(0x123111, w15); 7952 ASSERT_EQUAL_32(0xabc000, w16); 7953 ASSERT_EQUAL_32(0x0, w17); 7954 7955 ASSERT_EQUAL_64(0xffffffffffffffff, x20); 7956 ASSERT_EQUAL_64(0x1000, x21); 7957 ASSERT_EQUAL_64(0x111, x22); 7958 ASSERT_EQUAL_64(0x7fffffffffffffff, x23); 7959 7960 ASSERT_EQUAL_32(0xffffffff, w24); 7961 ASSERT_EQUAL_32(0x1000, w25); 7962 ASSERT_EQUAL_32(0x111, w26); 7963 ASSERT_EQUAL_32(0xffffffff, w27); 7964 7965 TEARDOWN(); 7966 } 7967 7968 7969 TEST(add_sub_wide_imm) { 7970 SETUP(); 7971 7972 START(); 7973 __ Mov(x0, 0x0); 7974 __ Mov(x1, 0x1); 7975 7976 __ Add(x10, x0, Operand(0x1234567890abcdef)); 7977 __ Add(x11, x1, Operand(0xffffffff)); 7978 7979 __ Add(w12, w0, Operand(0x12345678)); 7980 __ Add(w13, w1, Operand(0xffffffff)); 7981 7982 __ Add(w18, w0, Operand(kWMinInt)); 7983 __ Sub(w19, w0, Operand(kWMinInt)); 7984 7985 __ Sub(x20, x0, Operand(0x1234567890abcdef)); 7986 __ Sub(w21, w0, Operand(0x12345678)); 7987 7988 END(); 7989 7990 RUN(); 7991 7992 ASSERT_EQUAL_64(0x1234567890abcdef, x10); 7993 ASSERT_EQUAL_64(0x100000000, x11); 7994 7995 ASSERT_EQUAL_32(0x12345678, w12); 7996 ASSERT_EQUAL_64(0x0, x13); 7997 7998 ASSERT_EQUAL_32(kWMinInt, w18); 7999 ASSERT_EQUAL_32(kWMinInt, w19); 8000 8001 ASSERT_EQUAL_64(-0x1234567890abcdef, x20); 8002 ASSERT_EQUAL_32(-0x12345678, w21); 8003 8004 TEARDOWN(); 8005 } 8006 8007 8008 TEST(add_sub_shifted) { 8009 SETUP(); 8010 8011 START(); 8012 __ Mov(x0, 0); 8013 __ Mov(x1, 0x0123456789abcdef); 8014 __ Mov(x2, 0xfedcba9876543210); 8015 __ Mov(x3, 0xffffffffffffffff); 8016 8017 __ Add(x10, x1, Operand(x2)); 8018 __ Add(x11, x0, Operand(x1, LSL, 8)); 8019 __ Add(x12, x0, Operand(x1, LSR, 8)); 8020 __ Add(x13, x0, Operand(x1, ASR, 8)); 8021 __ Add(x14, x0, Operand(x2, ASR, 8)); 8022 __ Add(w15, w0, Operand(w1, ASR, 8)); 8023 __ Add(w18, w3, Operand(w1, ROR, 8)); 8024 __ Add(x19, x3, Operand(x1, ROR, 8)); 8025 8026 __ Sub(x20, x3, Operand(x2)); 8027 __ Sub(x21, x3, Operand(x1, LSL, 8)); 8028 __ Sub(x22, x3, Operand(x1, LSR, 8)); 8029 __ Sub(x23, x3, Operand(x1, ASR, 8)); 8030 __ Sub(x24, x3, Operand(x2, ASR, 8)); 8031 __ Sub(w25, w3, Operand(w1, ASR, 8)); 8032 __ Sub(w26, w3, Operand(w1, ROR, 8)); 8033 __ Sub(x27, x3, Operand(x1, ROR, 8)); 8034 END(); 8035 8036 RUN(); 8037 8038 ASSERT_EQUAL_64(0xffffffffffffffff, x10); 8039 ASSERT_EQUAL_64(0x23456789abcdef00, x11); 8040 ASSERT_EQUAL_64(0x000123456789abcd, x12); 8041 ASSERT_EQUAL_64(0x000123456789abcd, x13); 8042 ASSERT_EQUAL_64(0xfffedcba98765432, x14); 8043 ASSERT_EQUAL_64(0xff89abcd, x15); 8044 ASSERT_EQUAL_64(0xef89abcc, x18); 8045 ASSERT_EQUAL_64(0xef0123456789abcc, x19); 8046 8047 ASSERT_EQUAL_64(0x0123456789abcdef, x20); 8048 ASSERT_EQUAL_64(0xdcba9876543210ff, x21); 8049 ASSERT_EQUAL_64(0xfffedcba98765432, x22); 8050 ASSERT_EQUAL_64(0xfffedcba98765432, x23); 8051 ASSERT_EQUAL_64(0x000123456789abcd, x24); 8052 ASSERT_EQUAL_64(0x00765432, x25); 8053 ASSERT_EQUAL_64(0x10765432, x26); 8054 ASSERT_EQUAL_64(0x10fedcba98765432, x27); 8055 8056 TEARDOWN(); 8057 } 8058 8059 8060 TEST(add_sub_extended) { 8061 SETUP(); 8062 8063 START(); 8064 __ Mov(x0, 0); 8065 __ Mov(x1, 0x0123456789abcdef); 8066 __ Mov(x2, 0xfedcba9876543210); 8067 __ Mov(w3, 0x80); 8068 8069 __ Add(x10, x0, Operand(x1, UXTB, 0)); 8070 __ Add(x11, x0, Operand(x1, UXTB, 1)); 8071 __ Add(x12, x0, Operand(x1, UXTH, 2)); 8072 __ Add(x13, x0, Operand(x1, UXTW, 4)); 8073 8074 __ Add(x14, x0, Operand(x1, SXTB, 0)); 8075 __ Add(x15, x0, Operand(x1, SXTB, 1)); 8076 __ Add(x16, x0, Operand(x1, SXTH, 2)); 8077 __ Add(x17, x0, Operand(x1, SXTW, 3)); 8078 __ Add(x18, x0, Operand(x2, SXTB, 0)); 8079 __ Add(x19, x0, Operand(x2, SXTB, 1)); 8080 __ Add(x20, x0, Operand(x2, SXTH, 2)); 8081 __ Add(x21, x0, Operand(x2, SXTW, 3)); 8082 8083 __ Add(x22, x1, Operand(x2, SXTB, 1)); 8084 __ Sub(x23, x1, Operand(x2, SXTB, 1)); 8085 8086 __ Add(w24, w1, Operand(w2, UXTB, 2)); 8087 __ Add(w25, w0, Operand(w1, SXTB, 0)); 8088 __ Add(w26, w0, Operand(w1, SXTB, 1)); 8089 __ Add(w27, w2, Operand(w1, SXTW, 3)); 8090 8091 __ Add(w28, w0, Operand(w1, SXTW, 3)); 8092 __ Add(x29, x0, Operand(w1, SXTW, 3)); 8093 8094 __ Sub(x30, x0, Operand(w3, SXTB, 1)); 8095 END(); 8096 8097 RUN(); 8098 8099 ASSERT_EQUAL_64(0xef, x10); 8100 ASSERT_EQUAL_64(0x1de, x11); 8101 ASSERT_EQUAL_64(0x337bc, x12); 8102 ASSERT_EQUAL_64(0x89abcdef0, x13); 8103 8104 ASSERT_EQUAL_64(0xffffffffffffffef, x14); 8105 ASSERT_EQUAL_64(0xffffffffffffffde, x15); 8106 ASSERT_EQUAL_64(0xffffffffffff37bc, x16); 8107 ASSERT_EQUAL_64(0xfffffffc4d5e6f78, x17); 8108 ASSERT_EQUAL_64(0x10, x18); 8109 ASSERT_EQUAL_64(0x20, x19); 8110 ASSERT_EQUAL_64(0xc840, x20); 8111 ASSERT_EQUAL_64(0x3b2a19080, x21); 8112 8113 ASSERT_EQUAL_64(0x0123456789abce0f, x22); 8114 ASSERT_EQUAL_64(0x0123456789abcdcf, x23); 8115 8116 ASSERT_EQUAL_32(0x89abce2f, w24); 8117 ASSERT_EQUAL_32(0xffffffef, w25); 8118 ASSERT_EQUAL_32(0xffffffde, w26); 8119 ASSERT_EQUAL_32(0xc3b2a188, w27); 8120 8121 ASSERT_EQUAL_32(0x4d5e6f78, w28); 8122 ASSERT_EQUAL_64(0xfffffffc4d5e6f78, x29); 8123 8124 ASSERT_EQUAL_64(256, x30); 8125 8126 TEARDOWN(); 8127 } 8128 8129 8130 TEST(add_sub_negative) { 8131 SETUP(); 8132 8133 START(); 8134 __ Mov(x0, 0); 8135 __ Mov(x1, 4687); 8136 __ Mov(x2, 0x1122334455667788); 8137 __ Mov(w3, 0x11223344); 8138 __ Mov(w4, 400000); 8139 8140 __ Add(x10, x0, -42); 8141 __ Add(x11, x1, -687); 8142 __ Add(x12, x2, -0x88); 8143 8144 __ Sub(x13, x0, -600); 8145 __ Sub(x14, x1, -313); 8146 __ Sub(x15, x2, -0x555); 8147 8148 __ Add(w19, w3, -0x344); 8149 __ Add(w20, w4, -2000); 8150 8151 __ Sub(w21, w3, -0xbc); 8152 __ Sub(w22, w4, -2000); 8153 END(); 8154 8155 RUN(); 8156 8157 ASSERT_EQUAL_64(-42, x10); 8158 ASSERT_EQUAL_64(4000, x11); 8159 ASSERT_EQUAL_64(0x1122334455667700, x12); 8160 8161 ASSERT_EQUAL_64(600, x13); 8162 ASSERT_EQUAL_64(5000, x14); 8163 ASSERT_EQUAL_64(0x1122334455667cdd, x15); 8164 8165 ASSERT_EQUAL_32(0x11223000, w19); 8166 ASSERT_EQUAL_32(398000, w20); 8167 8168 ASSERT_EQUAL_32(0x11223400, w21); 8169 ASSERT_EQUAL_32(402000, w22); 8170 8171 TEARDOWN(); 8172 } 8173 8174 8175 TEST(add_sub_zero) { 8176 SETUP(); 8177 8178 START(); 8179 __ Mov(x0, 0); 8180 __ Mov(x1, 0); 8181 __ Mov(x2, 0); 8182 8183 Label blob1; 8184 __ Bind(&blob1); 8185 __ Add(x0, x0, 0); 8186 __ Sub(x1, x1, 0); 8187 __ Sub(x2, x2, xzr); 8188 VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&blob1) == 0); 8189 8190 Label blob2; 8191 __ Bind(&blob2); 8192 __ Add(w3, w3, 0); 8193 VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&blob2) != 0); 8194 8195 Label blob3; 8196 __ Bind(&blob3); 8197 __ Sub(w3, w3, wzr); 8198 VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&blob3) != 0); 8199 8200 END(); 8201 8202 RUN(); 8203 8204 ASSERT_EQUAL_64(0, x0); 8205 ASSERT_EQUAL_64(0, x1); 8206 ASSERT_EQUAL_64(0, x2); 8207 8208 TEARDOWN(); 8209 } 8210 8211 8212 TEST(claim_drop_zero) { 8213 SETUP(); 8214 8215 START(); 8216 8217 Label start; 8218 __ Bind(&start); 8219 __ Claim(Operand(0)); 8220 __ Drop(Operand(0)); 8221 __ Claim(Operand(xzr)); 8222 __ Drop(Operand(xzr)); 8223 VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&start) == 0); 8224 8225 END(); 8226 8227 RUN(); 8228 8229 TEARDOWN(); 8230 } 8231 8232 8233 TEST(neg) { 8234 SETUP(); 8235 8236 START(); 8237 __ Mov(x0, 0xf123456789abcdef); 8238 8239 // Immediate. 8240 __ Neg(x1, 0x123); 8241 __ Neg(w2, 0x123); 8242 8243 // Shifted. 8244 __ Neg(x3, Operand(x0, LSL, 1)); 8245 __ Neg(w4, Operand(w0, LSL, 2)); 8246 __ Neg(x5, Operand(x0, LSR, 3)); 8247 __ Neg(w6, Operand(w0, LSR, 4)); 8248 __ Neg(x7, Operand(x0, ASR, 5)); 8249 __ Neg(w8, Operand(w0, ASR, 6)); 8250 8251 // Extended. 8252 __ Neg(w9, Operand(w0, UXTB)); 8253 __ Neg(x10, Operand(x0, SXTB, 1)); 8254 __ Neg(w11, Operand(w0, UXTH, 2)); 8255 __ Neg(x12, Operand(x0, SXTH, 3)); 8256 __ Neg(w13, Operand(w0, UXTW, 4)); 8257 __ Neg(x14, Operand(x0, SXTW, 4)); 8258 END(); 8259 8260 RUN(); 8261 8262 ASSERT_EQUAL_64(0xfffffffffffffedd, x1); 8263 ASSERT_EQUAL_64(0xfffffedd, x2); 8264 ASSERT_EQUAL_64(0x1db97530eca86422, x3); 8265 ASSERT_EQUAL_64(0xd950c844, x4); 8266 ASSERT_EQUAL_64(0xe1db97530eca8643, x5); 8267 ASSERT_EQUAL_64(0xf7654322, x6); 8268 ASSERT_EQUAL_64(0x0076e5d4c3b2a191, x7); 8269 ASSERT_EQUAL_64(0x01d950c9, x8); 8270 ASSERT_EQUAL_64(0xffffff11, x9); 8271 ASSERT_EQUAL_64(0x0000000000000022, x10); 8272 ASSERT_EQUAL_64(0xfffcc844, x11); 8273 ASSERT_EQUAL_64(0x0000000000019088, x12); 8274 ASSERT_EQUAL_64(0x65432110, x13); 8275 ASSERT_EQUAL_64(0x0000000765432110, x14); 8276 8277 TEARDOWN(); 8278 } 8279 8280 8281 template <typename T, typename Op> 8282 static void AdcsSbcsHelper( 8283 Op op, T left, T right, int carry, T expected, StatusFlags expected_flags) { 8284 int reg_size = sizeof(T) * 8; 8285 Register left_reg(0, reg_size); 8286 Register right_reg(1, reg_size); 8287 Register result_reg(2, reg_size); 8288 8289 SETUP(); 8290 START(); 8291 8292 __ Mov(left_reg, left); 8293 __ Mov(right_reg, right); 8294 __ Mov(x10, (carry ? CFlag : NoFlag)); 8295 8296 __ Msr(NZCV, x10); 8297 (masm.*op)(result_reg, left_reg, right_reg); 8298 8299 END(); 8300 RUN(); 8301 8302 ASSERT_EQUAL_64(left, left_reg.X()); 8303 ASSERT_EQUAL_64(right, right_reg.X()); 8304 ASSERT_EQUAL_64(expected, result_reg.X()); 8305 ASSERT_EQUAL_NZCV(expected_flags); 8306 8307 TEARDOWN(); 8308 } 8309 8310 8311 TEST(adcs_sbcs_x) { 8312 uint64_t inputs[] = { 8313 0x0000000000000000, 8314 0x0000000000000001, 8315 0x7ffffffffffffffe, 8316 0x7fffffffffffffff, 8317 0x8000000000000000, 8318 0x8000000000000001, 8319 0xfffffffffffffffe, 8320 0xffffffffffffffff, 8321 }; 8322 static const size_t input_count = sizeof(inputs) / sizeof(inputs[0]); 8323 8324 struct Expected { 8325 uint64_t carry0_result; 8326 StatusFlags carry0_flags; 8327 uint64_t carry1_result; 8328 StatusFlags carry1_flags; 8329 }; 8330 8331 static const Expected expected_adcs_x[input_count][input_count] = 8332 {{{0x0000000000000000, ZFlag, 0x0000000000000001, NoFlag}, 8333 {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag}, 8334 {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag}, 8335 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}, 8336 {0x8000000000000000, NFlag, 0x8000000000000001, NFlag}, 8337 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag}, 8338 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}, 8339 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}}, 8340 {{0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag}, 8341 {0x0000000000000002, NoFlag, 0x0000000000000003, NoFlag}, 8342 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}, 8343 {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag}, 8344 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag}, 8345 {0x8000000000000002, NFlag, 0x8000000000000003, NFlag}, 8346 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8347 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}}, 8348 {{0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag}, 8349 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}, 8350 {0xfffffffffffffffc, NVFlag, 0xfffffffffffffffd, NVFlag}, 8351 {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag}, 8352 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}, 8353 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8354 {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag}, 8355 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}}, 8356 {{0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}, 8357 {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag}, 8358 {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag}, 8359 {0xfffffffffffffffe, NVFlag, 0xffffffffffffffff, NVFlag}, 8360 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8361 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}, 8362 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}, 8363 {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag}}, 8364 {{0x8000000000000000, NFlag, 0x8000000000000001, NFlag}, 8365 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag}, 8366 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}, 8367 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8368 {0x0000000000000000, ZCVFlag, 0x0000000000000001, CVFlag}, 8369 {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag}, 8370 {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag}, 8371 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}}, 8372 {{0x8000000000000001, NFlag, 0x8000000000000002, NFlag}, 8373 {0x8000000000000002, NFlag, 0x8000000000000003, NFlag}, 8374 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8375 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}, 8376 {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag}, 8377 {0x0000000000000002, CVFlag, 0x0000000000000003, CVFlag}, 8378 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}, 8379 {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag}}, 8380 {{0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}, 8381 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8382 {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag}, 8383 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}, 8384 {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag}, 8385 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}, 8386 {0xfffffffffffffffc, NCFlag, 0xfffffffffffffffd, NCFlag}, 8387 {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag}}, 8388 {{0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8389 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}, 8390 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}, 8391 {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag}, 8392 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}, 8393 {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag}, 8394 {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag}, 8395 {0xfffffffffffffffe, NCFlag, 0xffffffffffffffff, NCFlag}}}; 8396 8397 static const Expected expected_sbcs_x[input_count][input_count] = 8398 {{{0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8399 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}, 8400 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag}, 8401 {0x8000000000000000, NFlag, 0x8000000000000001, NFlag}, 8402 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}, 8403 {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag}, 8404 {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag}, 8405 {0x0000000000000000, ZFlag, 0x0000000000000001, NoFlag}}, 8406 {{0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}, 8407 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8408 {0x8000000000000002, NFlag, 0x8000000000000003, NFlag}, 8409 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag}, 8410 {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag}, 8411 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}, 8412 {0x0000000000000002, NoFlag, 0x0000000000000003, NoFlag}, 8413 {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag}}, 8414 {{0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}, 8415 {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag}, 8416 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8417 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}, 8418 {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag}, 8419 {0xfffffffffffffffc, NVFlag, 0xfffffffffffffffd, NVFlag}, 8420 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}, 8421 {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag}}, 8422 {{0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag}, 8423 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}, 8424 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}, 8425 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8426 {0xfffffffffffffffe, NVFlag, 0xffffffffffffffff, NVFlag}, 8427 {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag}, 8428 {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag}, 8429 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}}, 8430 {{0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}, 8431 {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag}, 8432 {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag}, 8433 {0x0000000000000000, ZCVFlag, 0x0000000000000001, CVFlag}, 8434 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8435 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}, 8436 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag}, 8437 {0x8000000000000000, NFlag, 0x8000000000000001, NFlag}}, 8438 {{0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag}, 8439 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}, 8440 {0x0000000000000002, CVFlag, 0x0000000000000003, CVFlag}, 8441 {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag}, 8442 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}, 8443 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8444 {0x8000000000000002, NFlag, 0x8000000000000003, NFlag}, 8445 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag}}, 8446 {{0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag}, 8447 {0xfffffffffffffffc, NCFlag, 0xfffffffffffffffd, NCFlag}, 8448 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}, 8449 {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag}, 8450 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}, 8451 {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag}, 8452 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8453 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}}, 8454 {{0xfffffffffffffffe, NCFlag, 0xffffffffffffffff, NCFlag}, 8455 {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag}, 8456 {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag}, 8457 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}, 8458 {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag}, 8459 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}, 8460 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}, 8461 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}}}; 8462 8463 for (size_t left = 0; left < input_count; left++) { 8464 for (size_t right = 0; right < input_count; right++) { 8465 const Expected& expected = expected_adcs_x[left][right]; 8466 AdcsSbcsHelper(&MacroAssembler::Adcs, 8467 inputs[left], 8468 inputs[right], 8469 0, 8470 expected.carry0_result, 8471 expected.carry0_flags); 8472 AdcsSbcsHelper(&MacroAssembler::Adcs, 8473 inputs[left], 8474 inputs[right], 8475 1, 8476 expected.carry1_result, 8477 expected.carry1_flags); 8478 } 8479 } 8480 8481 for (size_t left = 0; left < input_count; left++) { 8482 for (size_t right = 0; right < input_count; right++) { 8483 const Expected& expected = expected_sbcs_x[left][right]; 8484 AdcsSbcsHelper(&MacroAssembler::Sbcs, 8485 inputs[left], 8486 inputs[right], 8487 0, 8488 expected.carry0_result, 8489 expected.carry0_flags); 8490 AdcsSbcsHelper(&MacroAssembler::Sbcs, 8491 inputs[left], 8492 inputs[right], 8493 1, 8494 expected.carry1_result, 8495 expected.carry1_flags); 8496 } 8497 } 8498 } 8499 8500 8501 TEST(adcs_sbcs_w) { 8502 uint32_t inputs[] = { 8503 0x00000000, 8504 0x00000001, 8505 0x7ffffffe, 8506 0x7fffffff, 8507 0x80000000, 8508 0x80000001, 8509 0xfffffffe, 8510 0xffffffff, 8511 }; 8512 static const size_t input_count = sizeof(inputs) / sizeof(inputs[0]); 8513 8514 struct Expected { 8515 uint32_t carry0_result; 8516 StatusFlags carry0_flags; 8517 uint32_t carry1_result; 8518 StatusFlags carry1_flags; 8519 }; 8520 8521 static const Expected expected_adcs_w[input_count][input_count] = 8522 {{{0x00000000, ZFlag, 0x00000001, NoFlag}, 8523 {0x00000001, NoFlag, 0x00000002, NoFlag}, 8524 {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag}, 8525 {0x7fffffff, NoFlag, 0x80000000, NVFlag}, 8526 {0x80000000, NFlag, 0x80000001, NFlag}, 8527 {0x80000001, NFlag, 0x80000002, NFlag}, 8528 {0xfffffffe, NFlag, 0xffffffff, NFlag}, 8529 {0xffffffff, NFlag, 0x00000000, ZCFlag}}, 8530 {{0x00000001, NoFlag, 0x00000002, NoFlag}, 8531 {0x00000002, NoFlag, 0x00000003, NoFlag}, 8532 {0x7fffffff, NoFlag, 0x80000000, NVFlag}, 8533 {0x80000000, NVFlag, 0x80000001, NVFlag}, 8534 {0x80000001, NFlag, 0x80000002, NFlag}, 8535 {0x80000002, NFlag, 0x80000003, NFlag}, 8536 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8537 {0x00000000, ZCFlag, 0x00000001, CFlag}}, 8538 {{0x7ffffffe, NoFlag, 0x7fffffff, NoFlag}, 8539 {0x7fffffff, NoFlag, 0x80000000, NVFlag}, 8540 {0xfffffffc, NVFlag, 0xfffffffd, NVFlag}, 8541 {0xfffffffd, NVFlag, 0xfffffffe, NVFlag}, 8542 {0xfffffffe, NFlag, 0xffffffff, NFlag}, 8543 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8544 {0x7ffffffc, CFlag, 0x7ffffffd, CFlag}, 8545 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag}}, 8546 {{0x7fffffff, NoFlag, 0x80000000, NVFlag}, 8547 {0x80000000, NVFlag, 0x80000001, NVFlag}, 8548 {0xfffffffd, NVFlag, 0xfffffffe, NVFlag}, 8549 {0xfffffffe, NVFlag, 0xffffffff, NVFlag}, 8550 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8551 {0x00000000, ZCFlag, 0x00000001, CFlag}, 8552 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag}, 8553 {0x7ffffffe, CFlag, 0x7fffffff, CFlag}}, 8554 {{0x80000000, NFlag, 0x80000001, NFlag}, 8555 {0x80000001, NFlag, 0x80000002, NFlag}, 8556 {0xfffffffe, NFlag, 0xffffffff, NFlag}, 8557 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8558 {0x00000000, ZCVFlag, 0x00000001, CVFlag}, 8559 {0x00000001, CVFlag, 0x00000002, CVFlag}, 8560 {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag}, 8561 {0x7fffffff, CVFlag, 0x80000000, NCFlag}}, 8562 {{0x80000001, NFlag, 0x80000002, NFlag}, 8563 {0x80000002, NFlag, 0x80000003, NFlag}, 8564 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8565 {0x00000000, ZCFlag, 0x00000001, CFlag}, 8566 {0x00000001, CVFlag, 0x00000002, CVFlag}, 8567 {0x00000002, CVFlag, 0x00000003, CVFlag}, 8568 {0x7fffffff, CVFlag, 0x80000000, NCFlag}, 8569 {0x80000000, NCFlag, 0x80000001, NCFlag}}, 8570 {{0xfffffffe, NFlag, 0xffffffff, NFlag}, 8571 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8572 {0x7ffffffc, CFlag, 0x7ffffffd, CFlag}, 8573 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag}, 8574 {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag}, 8575 {0x7fffffff, CVFlag, 0x80000000, NCFlag}, 8576 {0xfffffffc, NCFlag, 0xfffffffd, NCFlag}, 8577 {0xfffffffd, NCFlag, 0xfffffffe, NCFlag}}, 8578 {{0xffffffff, NFlag, 0x00000000, ZCFlag}, 8579 {0x00000000, ZCFlag, 0x00000001, CFlag}, 8580 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag}, 8581 {0x7ffffffe, CFlag, 0x7fffffff, CFlag}, 8582 {0x7fffffff, CVFlag, 0x80000000, NCFlag}, 8583 {0x80000000, NCFlag, 0x80000001, NCFlag}, 8584 {0xfffffffd, NCFlag, 0xfffffffe, NCFlag}, 8585 {0xfffffffe, NCFlag, 0xffffffff, NCFlag}}}; 8586 8587 static const Expected expected_sbcs_w[input_count][input_count] = 8588 {{{0xffffffff, NFlag, 0x00000000, ZCFlag}, 8589 {0xfffffffe, NFlag, 0xffffffff, NFlag}, 8590 {0x80000001, NFlag, 0x80000002, NFlag}, 8591 {0x80000000, NFlag, 0x80000001, NFlag}, 8592 {0x7fffffff, NoFlag, 0x80000000, NVFlag}, 8593 {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag}, 8594 {0x00000001, NoFlag, 0x00000002, NoFlag}, 8595 {0x00000000, ZFlag, 0x00000001, NoFlag}}, 8596 {{0x00000000, ZCFlag, 0x00000001, CFlag}, 8597 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8598 {0x80000002, NFlag, 0x80000003, NFlag}, 8599 {0x80000001, NFlag, 0x80000002, NFlag}, 8600 {0x80000000, NVFlag, 0x80000001, NVFlag}, 8601 {0x7fffffff, NoFlag, 0x80000000, NVFlag}, 8602 {0x00000002, NoFlag, 0x00000003, NoFlag}, 8603 {0x00000001, NoFlag, 0x00000002, NoFlag}}, 8604 {{0x7ffffffd, CFlag, 0x7ffffffe, CFlag}, 8605 {0x7ffffffc, CFlag, 0x7ffffffd, CFlag}, 8606 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8607 {0xfffffffe, NFlag, 0xffffffff, NFlag}, 8608 {0xfffffffd, NVFlag, 0xfffffffe, NVFlag}, 8609 {0xfffffffc, NVFlag, 0xfffffffd, NVFlag}, 8610 {0x7fffffff, NoFlag, 0x80000000, NVFlag}, 8611 {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag}}, 8612 {{0x7ffffffe, CFlag, 0x7fffffff, CFlag}, 8613 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag}, 8614 {0x00000000, ZCFlag, 0x00000001, CFlag}, 8615 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8616 {0xfffffffe, NVFlag, 0xffffffff, NVFlag}, 8617 {0xfffffffd, NVFlag, 0xfffffffe, NVFlag}, 8618 {0x80000000, NVFlag, 0x80000001, NVFlag}, 8619 {0x7fffffff, NoFlag, 0x80000000, NVFlag}}, 8620 {{0x7fffffff, CVFlag, 0x80000000, NCFlag}, 8621 {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag}, 8622 {0x00000001, CVFlag, 0x00000002, CVFlag}, 8623 {0x00000000, ZCVFlag, 0x00000001, CVFlag}, 8624 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8625 {0xfffffffe, NFlag, 0xffffffff, NFlag}, 8626 {0x80000001, NFlag, 0x80000002, NFlag}, 8627 {0x80000000, NFlag, 0x80000001, NFlag}}, 8628 {{0x80000000, NCFlag, 0x80000001, NCFlag}, 8629 {0x7fffffff, CVFlag, 0x80000000, NCFlag}, 8630 {0x00000002, CVFlag, 0x00000003, CVFlag}, 8631 {0x00000001, CVFlag, 0x00000002, CVFlag}, 8632 {0x00000000, ZCFlag, 0x00000001, CFlag}, 8633 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8634 {0x80000002, NFlag, 0x80000003, NFlag}, 8635 {0x80000001, NFlag, 0x80000002, NFlag}}, 8636 {{0xfffffffd, NCFlag, 0xfffffffe, NCFlag}, 8637 {0xfffffffc, NCFlag, 0xfffffffd, NCFlag}, 8638 {0x7fffffff, CVFlag, 0x80000000, NCFlag}, 8639 {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag}, 8640 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag}, 8641 {0x7ffffffc, CFlag, 0x7ffffffd, CFlag}, 8642 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8643 {0xfffffffe, NFlag, 0xffffffff, NFlag}}, 8644 {{0xfffffffe, NCFlag, 0xffffffff, NCFlag}, 8645 {0xfffffffd, NCFlag, 0xfffffffe, NCFlag}, 8646 {0x80000000, NCFlag, 0x80000001, NCFlag}, 8647 {0x7fffffff, CVFlag, 0x80000000, NCFlag}, 8648 {0x7ffffffe, CFlag, 0x7fffffff, CFlag}, 8649 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag}, 8650 {0x00000000, ZCFlag, 0x00000001, CFlag}, 8651 {0xffffffff, NFlag, 0x00000000, ZCFlag}}}; 8652 8653 for (size_t left = 0; left < input_count; left++) { 8654 for (size_t right = 0; right < input_count; right++) { 8655 const Expected& expected = expected_adcs_w[left][right]; 8656 AdcsSbcsHelper(&MacroAssembler::Adcs, 8657 inputs[left], 8658 inputs[right], 8659 0, 8660 expected.carry0_result, 8661 expected.carry0_flags); 8662 AdcsSbcsHelper(&MacroAssembler::Adcs, 8663 inputs[left], 8664 inputs[right], 8665 1, 8666 expected.carry1_result, 8667 expected.carry1_flags); 8668 } 8669 } 8670 8671 for (size_t left = 0; left < input_count; left++) { 8672 for (size_t right = 0; right < input_count; right++) { 8673 const Expected& expected = expected_sbcs_w[left][right]; 8674 AdcsSbcsHelper(&MacroAssembler::Sbcs, 8675 inputs[left], 8676 inputs[right], 8677 0, 8678 expected.carry0_result, 8679 expected.carry0_flags); 8680 AdcsSbcsHelper(&MacroAssembler::Sbcs, 8681 inputs[left], 8682 inputs[right], 8683 1, 8684 expected.carry1_result, 8685 expected.carry1_flags); 8686 } 8687 } 8688 } 8689 8690 8691 TEST(adc_sbc_shift) { 8692 SETUP(); 8693 8694 START(); 8695 __ Mov(x0, 0); 8696 __ Mov(x1, 1); 8697 __ Mov(x2, 0x0123456789abcdef); 8698 __ Mov(x3, 0xfedcba9876543210); 8699 __ Mov(x4, 0xffffffffffffffff); 8700 8701 // Clear the C flag. 8702 __ Adds(x0, x0, Operand(0)); 8703 8704 __ Adc(x5, x2, Operand(x3)); 8705 __ Adc(x6, x0, Operand(x1, LSL, 60)); 8706 __ Sbc(x7, x4, Operand(x3, LSR, 4)); 8707 __ Adc(x8, x2, Operand(x3, ASR, 4)); 8708 __ Adc(x9, x2, Operand(x3, ROR, 8)); 8709 8710 __ Adc(w10, w2, Operand(w3)); 8711 __ Adc(w11, w0, Operand(w1, LSL, 30)); 8712 __ Sbc(w12, w4, Operand(w3, LSR, 4)); 8713 __ Adc(w13, w2, Operand(w3, ASR, 4)); 8714 __ Adc(w14, w2, Operand(w3, ROR, 8)); 8715 8716 // Set the C flag. 8717 __ Cmp(w0, Operand(w0)); 8718 8719 __ Adc(x18, x2, Operand(x3)); 8720 __ Adc(x19, x0, Operand(x1, LSL, 60)); 8721 __ Sbc(x20, x4, Operand(x3, LSR, 4)); 8722 __ Adc(x21, x2, Operand(x3, ASR, 4)); 8723 __ Adc(x22, x2, Operand(x3, ROR, 8)); 8724 8725 __ Adc(w23, w2, Operand(w3)); 8726 __ Adc(w24, w0, Operand(w1, LSL, 30)); 8727 __ Sbc(w25, w4, Operand(w3, LSR, 4)); 8728 __ Adc(w26, w2, Operand(w3, ASR, 4)); 8729 __ Adc(w27, w2, Operand(w3, ROR, 8)); 8730 END(); 8731 8732 RUN(); 8733 8734 ASSERT_EQUAL_64(0xffffffffffffffff, x5); 8735 ASSERT_EQUAL_64(INT64_C(1) << 60, x6); 8736 ASSERT_EQUAL_64(0xf0123456789abcdd, x7); 8737 ASSERT_EQUAL_64(0x0111111111111110, x8); 8738 ASSERT_EQUAL_64(0x1222222222222221, x9); 8739 8740 ASSERT_EQUAL_32(0xffffffff, w10); 8741 ASSERT_EQUAL_32(INT32_C(1) << 30, w11); 8742 ASSERT_EQUAL_32(0xf89abcdd, w12); 8743 ASSERT_EQUAL_32(0x91111110, w13); 8744 ASSERT_EQUAL_32(0x9a222221, w14); 8745 8746 ASSERT_EQUAL_64(0xffffffffffffffff + 1, x18); 8747 ASSERT_EQUAL_64((INT64_C(1) << 60) + 1, x19); 8748 ASSERT_EQUAL_64(0xf0123456789abcdd + 1, x20); 8749 ASSERT_EQUAL_64(0x0111111111111110 + 1, x21); 8750 ASSERT_EQUAL_64(0x1222222222222221 + 1, x22); 8751 8752 ASSERT_EQUAL_32(0xffffffff + 1, w23); 8753 ASSERT_EQUAL_32((INT32_C(1) << 30) + 1, w24); 8754 ASSERT_EQUAL_32(0xf89abcdd + 1, w25); 8755 ASSERT_EQUAL_32(0x91111110 + 1, w26); 8756 ASSERT_EQUAL_32(0x9a222221 + 1, w27); 8757 8758 TEARDOWN(); 8759 } 8760 8761 8762 TEST(adc_sbc_extend) { 8763 SETUP(); 8764 8765 START(); 8766 // Clear the C flag. 8767 __ Adds(x0, x0, Operand(0)); 8768 8769 __ Mov(x0, 0); 8770 __ Mov(x1, 1); 8771 __ Mov(x2, 0x0123456789abcdef); 8772 8773 __ Adc(x10, x1, Operand(w2, UXTB, 1)); 8774 __ Adc(x11, x1, Operand(x2, SXTH, 2)); 8775 __ Sbc(x12, x1, Operand(w2, UXTW, 4)); 8776 __ Adc(x13, x1, Operand(x2, UXTX, 4)); 8777 8778 __ Adc(w14, w1, Operand(w2, UXTB, 1)); 8779 __ Adc(w15, w1, Operand(w2, SXTH, 2)); 8780 __ Adc(w9, w1, Operand(w2, UXTW, 4)); 8781 8782 // Set the C flag. 8783 __ Cmp(w0, Operand(w0)); 8784 8785 __ Adc(x20, x1, Operand(w2, UXTB, 1)); 8786 __ Adc(x21, x1, Operand(x2, SXTH, 2)); 8787 __ Sbc(x22, x1, Operand(w2, UXTW, 4)); 8788 __ Adc(x23, x1, Operand(x2, UXTX, 4)); 8789 8790 __ Adc(w24, w1, Operand(w2, UXTB, 1)); 8791 __ Adc(w25, w1, Operand(w2, SXTH, 2)); 8792 __ Adc(w26, w1, Operand(w2, UXTW, 4)); 8793 END(); 8794 8795 RUN(); 8796 8797 ASSERT_EQUAL_64(0x1df, x10); 8798 ASSERT_EQUAL_64(0xffffffffffff37bd, x11); 8799 ASSERT_EQUAL_64(0xfffffff765432110, x12); 8800 ASSERT_EQUAL_64(0x123456789abcdef1, x13); 8801 8802 ASSERT_EQUAL_32(0x1df, w14); 8803 ASSERT_EQUAL_32(0xffff37bd, w15); 8804 ASSERT_EQUAL_32(0x9abcdef1, w9); 8805 8806 ASSERT_EQUAL_64(0x1df + 1, x20); 8807 ASSERT_EQUAL_64(0xffffffffffff37bd + 1, x21); 8808 ASSERT_EQUAL_64(0xfffffff765432110 + 1, x22); 8809 ASSERT_EQUAL_64(0x123456789abcdef1 + 1, x23); 8810 8811 ASSERT_EQUAL_32(0x1df + 1, w24); 8812 ASSERT_EQUAL_32(0xffff37bd + 1, w25); 8813 ASSERT_EQUAL_32(0x9abcdef1 + 1, w26); 8814 8815 // Check that adc correctly sets the condition flags. 8816 START(); 8817 __ Mov(x0, 0xff); 8818 __ Mov(x1, 0xffffffffffffffff); 8819 // Clear the C flag. 8820 __ Adds(x0, x0, Operand(0)); 8821 __ Adcs(x10, x0, Operand(x1, SXTX, 1)); 8822 END(); 8823 8824 RUN(); 8825 8826 ASSERT_EQUAL_NZCV(CFlag); 8827 8828 START(); 8829 __ Mov(x0, 0x7fffffffffffffff); 8830 __ Mov(x1, 1); 8831 // Clear the C flag. 8832 __ Adds(x0, x0, Operand(0)); 8833 __ Adcs(x10, x0, Operand(x1, UXTB, 2)); 8834 END(); 8835 8836 RUN(); 8837 8838 ASSERT_EQUAL_NZCV(NVFlag); 8839 8840 START(); 8841 __ Mov(x0, 0x7fffffffffffffff); 8842 // Clear the C flag. 8843 __ Adds(x0, x0, Operand(0)); 8844 __ Adcs(x10, x0, Operand(1)); 8845 END(); 8846 8847 RUN(); 8848 8849 ASSERT_EQUAL_NZCV(NVFlag); 8850 8851 TEARDOWN(); 8852 } 8853 8854 8855 TEST(adc_sbc_wide_imm) { 8856 SETUP(); 8857 8858 START(); 8859 __ Mov(x0, 0); 8860 8861 // Clear the C flag. 8862 __ Adds(x0, x0, Operand(0)); 8863 8864 __ Adc(x7, x0, Operand(0x1234567890abcdef)); 8865 __ Adc(w8, w0, Operand(0xffffffff)); 8866 __ Sbc(x9, x0, Operand(0x1234567890abcdef)); 8867 __ Sbc(w10, w0, Operand(0xffffffff)); 8868 __ Ngc(x11, Operand(0xffffffff00000000)); 8869 __ Ngc(w12, Operand(0xffff0000)); 8870 8871 // Set the C flag. 8872 __ Cmp(w0, Operand(w0)); 8873 8874 __ Adc(x18, x0, Operand(0x1234567890abcdef)); 8875 __ Adc(w19, w0, Operand(0xffffffff)); 8876 __ Sbc(x20, x0, Operand(0x1234567890abcdef)); 8877 __ Sbc(w21, w0, Operand(0xffffffff)); 8878 __ Ngc(x22, Operand(0xffffffff00000000)); 8879 __ Ngc(w23, Operand(0xffff0000)); 8880 END(); 8881 8882 RUN(); 8883 8884 ASSERT_EQUAL_64(0x1234567890abcdef, x7); 8885 ASSERT_EQUAL_64(0xffffffff, x8); 8886 ASSERT_EQUAL_64(0xedcba9876f543210, x9); 8887 ASSERT_EQUAL_64(0, x10); 8888 ASSERT_EQUAL_64(0xffffffff, x11); 8889 ASSERT_EQUAL_64(0xffff, x12); 8890 8891 ASSERT_EQUAL_64(0x1234567890abcdef + 1, x18); 8892 ASSERT_EQUAL_64(0, x19); 8893 ASSERT_EQUAL_64(0xedcba9876f543211, x20); 8894 ASSERT_EQUAL_64(1, x21); 8895 ASSERT_EQUAL_64(0x0000000100000000, x22); 8896 ASSERT_EQUAL_64(0x0000000000010000, x23); 8897 8898 TEARDOWN(); 8899 } 8900 8901 TEST(flags) { 8902 SETUP(); 8903 8904 START(); 8905 __ Mov(x0, 0); 8906 __ Mov(x1, 0x1111111111111111); 8907 __ Neg(x10, Operand(x0)); 8908 __ Neg(x11, Operand(x1)); 8909 __ Neg(w12, Operand(w1)); 8910 // Clear the C flag. 8911 __ Adds(x0, x0, Operand(0)); 8912 __ Ngc(x13, Operand(x0)); 8913 // Set the C flag. 8914 __ Cmp(x0, Operand(x0)); 8915 __ Ngc(w14, Operand(w0)); 8916 END(); 8917 8918 RUN(); 8919 8920 ASSERT_EQUAL_64(0, x10); 8921 ASSERT_EQUAL_64(-0x1111111111111111, x11); 8922 ASSERT_EQUAL_32(-0x11111111, w12); 8923 ASSERT_EQUAL_64(-1, x13); 8924 ASSERT_EQUAL_32(0, w14); 8925 8926 START(); 8927 __ Mov(x0, 0); 8928 __ Cmp(x0, Operand(x0)); 8929 END(); 8930 8931 RUN(); 8932 8933 ASSERT_EQUAL_NZCV(ZCFlag); 8934 8935 START(); 8936 __ Mov(w0, 0); 8937 __ Cmp(w0, Operand(w0)); 8938 END(); 8939 8940 RUN(); 8941 8942 ASSERT_EQUAL_NZCV(ZCFlag); 8943 8944 START(); 8945 __ Mov(x0, 0); 8946 __ Mov(x1, 0x1111111111111111); 8947 __ Cmp(x0, Operand(x1)); 8948 END(); 8949 8950 RUN(); 8951 8952 ASSERT_EQUAL_NZCV(NFlag); 8953 8954 START(); 8955 __ Mov(w0, 0); 8956 __ Mov(w1, 0x11111111); 8957 __ Cmp(w0, Operand(w1)); 8958 END(); 8959 8960 RUN(); 8961 8962 ASSERT_EQUAL_NZCV(NFlag); 8963 8964 START(); 8965 __ Mov(x1, 0x1111111111111111); 8966 __ Cmp(x1, Operand(0)); 8967 END(); 8968 8969 RUN(); 8970 8971 ASSERT_EQUAL_NZCV(CFlag); 8972 8973 START(); 8974 __ Mov(w1, 0x11111111); 8975 __ Cmp(w1, Operand(0)); 8976 END(); 8977 8978 RUN(); 8979 8980 ASSERT_EQUAL_NZCV(CFlag); 8981 8982 START(); 8983 __ Mov(x0, 1); 8984 __ Mov(x1, 0x7fffffffffffffff); 8985 __ Cmn(x1, Operand(x0)); 8986 END(); 8987 8988 RUN(); 8989 8990 ASSERT_EQUAL_NZCV(NVFlag); 8991 8992 START(); 8993 __ Mov(w0, 1); 8994 __ Mov(w1, 0x7fffffff); 8995 __ Cmn(w1, Operand(w0)); 8996 END(); 8997 8998 RUN(); 8999 9000 ASSERT_EQUAL_NZCV(NVFlag); 9001 9002 START(); 9003 __ Mov(x0, 1); 9004 __ Mov(x1, 0xffffffffffffffff); 9005 __ Cmn(x1, Operand(x0)); 9006 END(); 9007 9008 RUN(); 9009 9010 ASSERT_EQUAL_NZCV(ZCFlag); 9011 9012 START(); 9013 __ Mov(w0, 1); 9014 __ Mov(w1, 0xffffffff); 9015 __ Cmn(w1, Operand(w0)); 9016 END(); 9017 9018 RUN(); 9019 9020 ASSERT_EQUAL_NZCV(ZCFlag); 9021 9022 START(); 9023 __ Mov(w0, 0); 9024 __ Mov(w1, 1); 9025 // Clear the C flag. 9026 __ Adds(w0, w0, Operand(0)); 9027 __ Ngcs(w0, Operand(w1)); 9028 END(); 9029 9030 RUN(); 9031 9032 ASSERT_EQUAL_NZCV(NFlag); 9033 9034 START(); 9035 __ Mov(w0, 0); 9036 __ Mov(w1, 0); 9037 // Set the C flag. 9038 __ Cmp(w0, Operand(w0)); 9039 __ Ngcs(w0, Operand(w1)); 9040 END(); 9041 9042 RUN(); 9043 9044 ASSERT_EQUAL_NZCV(ZCFlag); 9045 9046 TEARDOWN(); 9047 } 9048 9049 9050 TEST(cmp_shift) { 9051 SETUP(); 9052 9053 START(); 9054 __ Mov(x18, 0xf0000000); 9055 __ Mov(x19, 0xf000000010000000); 9056 __ Mov(x20, 0xf0000000f0000000); 9057 __ Mov(x21, 0x7800000078000000); 9058 __ Mov(x22, 0x3c0000003c000000); 9059 __ Mov(x23, 0x8000000780000000); 9060 __ Mov(x24, 0x0000000f00000000); 9061 __ Mov(x25, 0x00000003c0000000); 9062 __ Mov(x26, 0x8000000780000000); 9063 __ Mov(x27, 0xc0000003); 9064 9065 __ Cmp(w20, Operand(w21, LSL, 1)); 9066 __ Mrs(x0, NZCV); 9067 9068 __ Cmp(x20, Operand(x22, LSL, 2)); 9069 __ Mrs(x1, NZCV); 9070 9071 __ Cmp(w19, Operand(w23, LSR, 3)); 9072 __ Mrs(x2, NZCV); 9073 9074 __ Cmp(x18, Operand(x24, LSR, 4)); 9075 __ Mrs(x3, NZCV); 9076 9077 __ Cmp(w20, Operand(w25, ASR, 2)); 9078 __ Mrs(x4, NZCV); 9079 9080 __ Cmp(x20, Operand(x26, ASR, 3)); 9081 __ Mrs(x5, NZCV); 9082 9083 __ Cmp(w27, Operand(w22, ROR, 28)); 9084 __ Mrs(x6, NZCV); 9085 9086 __ Cmp(x20, Operand(x21, ROR, 31)); 9087 __ Mrs(x7, NZCV); 9088 END(); 9089 9090 RUN(); 9091 9092 ASSERT_EQUAL_32(ZCFlag, w0); 9093 ASSERT_EQUAL_32(ZCFlag, w1); 9094 ASSERT_EQUAL_32(ZCFlag, w2); 9095 ASSERT_EQUAL_32(ZCFlag, w3); 9096 ASSERT_EQUAL_32(ZCFlag, w4); 9097 ASSERT_EQUAL_32(ZCFlag, w5); 9098 ASSERT_EQUAL_32(ZCFlag, w6); 9099 ASSERT_EQUAL_32(ZCFlag, w7); 9100 9101 TEARDOWN(); 9102 } 9103 9104 9105 TEST(cmp_extend) { 9106 SETUP(); 9107 9108 START(); 9109 __ Mov(w20, 0x2); 9110 __ Mov(w21, 0x1); 9111 __ Mov(x22, 0xffffffffffffffff); 9112 __ Mov(x23, 0xff); 9113 __ Mov(x24, 0xfffffffffffffffe); 9114 __ Mov(x25, 0xffff); 9115 __ Mov(x26, 0xffffffff); 9116 9117 __ Cmp(w20, Operand(w21, LSL, 1)); 9118 __ Mrs(x0, NZCV); 9119 9120 __ Cmp(x22, Operand(x23, SXTB, 0)); 9121 __ Mrs(x1, NZCV); 9122 9123 __ Cmp(x24, Operand(x23, SXTB, 1)); 9124 __ Mrs(x2, NZCV); 9125 9126 __ Cmp(x24, Operand(x23, UXTB, 1)); 9127 __ Mrs(x3, NZCV); 9128 9129 __ Cmp(w22, Operand(w25, UXTH)); 9130 __ Mrs(x4, NZCV); 9131 9132 __ Cmp(x22, Operand(x25, SXTH)); 9133 __ Mrs(x5, NZCV); 9134 9135 __ Cmp(x22, Operand(x26, UXTW)); 9136 __ Mrs(x6, NZCV); 9137 9138 __ Cmp(x24, Operand(x26, SXTW, 1)); 9139 __ Mrs(x7, NZCV); 9140 END(); 9141 9142 RUN(); 9143 9144 ASSERT_EQUAL_32(ZCFlag, w0); 9145 ASSERT_EQUAL_32(ZCFlag, w1); 9146 ASSERT_EQUAL_32(ZCFlag, w2); 9147 ASSERT_EQUAL_32(NCFlag, w3); 9148 ASSERT_EQUAL_32(NCFlag, w4); 9149 ASSERT_EQUAL_32(ZCFlag, w5); 9150 ASSERT_EQUAL_32(NCFlag, w6); 9151 ASSERT_EQUAL_32(ZCFlag, w7); 9152 9153 TEARDOWN(); 9154 } 9155 9156 9157 TEST(ccmp) { 9158 SETUP(); 9159 9160 START(); 9161 __ Mov(w16, 0); 9162 __ Mov(w17, 1); 9163 __ Cmp(w16, w16); 9164 __ Ccmp(w16, w17, NCFlag, eq); 9165 __ Mrs(x0, NZCV); 9166 9167 __ Cmp(w16, w16); 9168 __ Ccmp(w16, w17, NCFlag, ne); 9169 __ Mrs(x1, NZCV); 9170 9171 __ Cmp(x16, x16); 9172 __ Ccmn(x16, 2, NZCVFlag, eq); 9173 __ Mrs(x2, NZCV); 9174 9175 __ Cmp(x16, x16); 9176 __ Ccmn(x16, 2, NZCVFlag, ne); 9177 __ Mrs(x3, NZCV); 9178 9179 // The MacroAssembler does not allow al as a condition. 9180 { 9181 ExactAssemblyScope scope(&masm, kInstructionSize); 9182 __ ccmp(x16, x16, NZCVFlag, al); 9183 } 9184 __ Mrs(x4, NZCV); 9185 9186 // The MacroAssembler does not allow nv as a condition. 9187 { 9188 ExactAssemblyScope scope(&masm, kInstructionSize); 9189 __ ccmp(x16, x16, NZCVFlag, nv); 9190 } 9191 __ Mrs(x5, NZCV); 9192 9193 END(); 9194 9195 RUN(); 9196 9197 ASSERT_EQUAL_32(NFlag, w0); 9198 ASSERT_EQUAL_32(NCFlag, w1); 9199 ASSERT_EQUAL_32(NoFlag, w2); 9200 ASSERT_EQUAL_32(NZCVFlag, w3); 9201 ASSERT_EQUAL_32(ZCFlag, w4); 9202 ASSERT_EQUAL_32(ZCFlag, w5); 9203 9204 TEARDOWN(); 9205 } 9206 9207 9208 TEST(ccmp_wide_imm) { 9209 SETUP(); 9210 9211 START(); 9212 __ Mov(w20, 0); 9213 9214 __ Cmp(w20, Operand(w20)); 9215 __ Ccmp(w20, Operand(0x12345678), NZCVFlag, eq); 9216 __ Mrs(x0, NZCV); 9217 9218 __ Cmp(w20, Operand(w20)); 9219 __ Ccmp(x20, Operand(0xffffffffffffffff), NZCVFlag, eq); 9220 __ Mrs(x1, NZCV); 9221 END(); 9222 9223 RUN(); 9224 9225 ASSERT_EQUAL_32(NFlag, w0); 9226 ASSERT_EQUAL_32(NoFlag, w1); 9227 9228 TEARDOWN(); 9229 } 9230 9231 9232 TEST(ccmp_shift_extend) { 9233 SETUP(); 9234 9235 START(); 9236 __ Mov(w20, 0x2); 9237 __ Mov(w21, 0x1); 9238 __ Mov(x22, 0xffffffffffffffff); 9239 __ Mov(x23, 0xff); 9240 __ Mov(x24, 0xfffffffffffffffe); 9241 9242 __ Cmp(w20, Operand(w20)); 9243 __ Ccmp(w20, Operand(w21, LSL, 1), NZCVFlag, eq); 9244 __ Mrs(x0, NZCV); 9245 9246 __ Cmp(w20, Operand(w20)); 9247 __ Ccmp(x22, Operand(x23, SXTB, 0), NZCVFlag, eq); 9248 __ Mrs(x1, NZCV); 9249 9250 __ Cmp(w20, Operand(w20)); 9251 __ Ccmp(x24, Operand(x23, SXTB, 1), NZCVFlag, eq); 9252 __ Mrs(x2, NZCV); 9253 9254 __ Cmp(w20, Operand(w20)); 9255 __ Ccmp(x24, Operand(x23, UXTB, 1), NZCVFlag, eq); 9256 __ Mrs(x3, NZCV); 9257 9258 __ Cmp(w20, Operand(w20)); 9259 __ Ccmp(x24, Operand(x23, UXTB, 1), NZCVFlag, ne); 9260 __ Mrs(x4, NZCV); 9261 END(); 9262 9263 RUN(); 9264 9265 ASSERT_EQUAL_32(ZCFlag, w0); 9266 ASSERT_EQUAL_32(ZCFlag, w1); 9267 ASSERT_EQUAL_32(ZCFlag, w2); 9268 ASSERT_EQUAL_32(NCFlag, w3); 9269 ASSERT_EQUAL_32(NZCVFlag, w4); 9270 9271 TEARDOWN(); 9272 } 9273 9274 9275 TEST(csel_reg) { 9276 SETUP(); 9277 9278 START(); 9279 __ Mov(x16, 0); 9280 __ Mov(x24, 0x0000000f0000000f); 9281 __ Mov(x25, 0x0000001f0000001f); 9282 9283 __ Cmp(w16, Operand(0)); 9284 __ Csel(w0, w24, w25, eq); 9285 __ Csel(w1, w24, w25, ne); 9286 __ Csinc(w2, w24, w25, mi); 9287 __ Csinc(w3, w24, w25, pl); 9288 9289 // The MacroAssembler does not allow al or nv as a condition. 9290 { 9291 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 9292 __ csel(w13, w24, w25, al); 9293 __ csel(x14, x24, x25, nv); 9294 } 9295 9296 __ Cmp(x16, Operand(1)); 9297 __ Csinv(x4, x24, x25, gt); 9298 __ Csinv(x5, x24, x25, le); 9299 __ Csneg(x6, x24, x25, hs); 9300 __ Csneg(x7, x24, x25, lo); 9301 9302 __ Cset(w8, ne); 9303 __ Csetm(w9, ne); 9304 __ Cinc(x10, x25, ne); 9305 __ Cinv(x11, x24, ne); 9306 __ Cneg(x12, x24, ne); 9307 9308 // The MacroAssembler does not allow al or nv as a condition. 9309 { 9310 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 9311 __ csel(w15, w24, w25, al); 9312 __ csel(x17, x24, x25, nv); 9313 } 9314 9315 END(); 9316 9317 RUN(); 9318 9319 ASSERT_EQUAL_64(0x0000000f, x0); 9320 ASSERT_EQUAL_64(0x0000001f, x1); 9321 ASSERT_EQUAL_64(0x00000020, x2); 9322 ASSERT_EQUAL_64(0x0000000f, x3); 9323 ASSERT_EQUAL_64(0xffffffe0ffffffe0, x4); 9324 ASSERT_EQUAL_64(0x0000000f0000000f, x5); 9325 ASSERT_EQUAL_64(0xffffffe0ffffffe1, x6); 9326 ASSERT_EQUAL_64(0x0000000f0000000f, x7); 9327 ASSERT_EQUAL_64(0x00000001, x8); 9328 ASSERT_EQUAL_64(0xffffffff, x9); 9329 ASSERT_EQUAL_64(0x0000001f00000020, x10); 9330 ASSERT_EQUAL_64(0xfffffff0fffffff0, x11); 9331 ASSERT_EQUAL_64(0xfffffff0fffffff1, x12); 9332 ASSERT_EQUAL_64(0x0000000f, x13); 9333 ASSERT_EQUAL_64(0x0000000f0000000f, x14); 9334 ASSERT_EQUAL_64(0x0000000f, x15); 9335 ASSERT_EQUAL_64(0x0000000f0000000f, x17); 9336 9337 TEARDOWN(); 9338 } 9339 9340 9341 TEST(csel_imm) { 9342 SETUP(); 9343 9344 int values[] = {-123, -2, -1, 0, 1, 2, 123}; 9345 int n_values = sizeof(values) / sizeof(values[0]); 9346 9347 for (int i = 0; i < n_values; i++) { 9348 for (int j = 0; j < n_values; j++) { 9349 int left = values[i]; 9350 int right = values[j]; 9351 9352 START(); 9353 __ Mov(x10, 0); 9354 __ Cmp(x10, 0); 9355 __ Csel(w0, left, right, eq); 9356 __ Csel(w1, left, right, ne); 9357 __ Csel(x2, left, right, eq); 9358 __ Csel(x3, left, right, ne); 9359 9360 END(); 9361 9362 RUN(); 9363 9364 ASSERT_EQUAL_32(left, w0); 9365 ASSERT_EQUAL_32(right, w1); 9366 ASSERT_EQUAL_64(left, x2); 9367 ASSERT_EQUAL_64(right, x3); 9368 } 9369 } 9370 9371 TEARDOWN(); 9372 } 9373 9374 9375 TEST(csel_mixed) { 9376 SETUP(); 9377 9378 START(); 9379 __ Mov(x18, 0); 9380 __ Mov(x19, 0x80000000); 9381 __ Mov(x20, 0x8000000000000000); 9382 9383 __ Cmp(x18, Operand(0)); 9384 __ Csel(w0, w19, -2, ne); 9385 __ Csel(w1, w19, -1, ne); 9386 __ Csel(w2, w19, 0, ne); 9387 __ Csel(w3, w19, 1, ne); 9388 __ Csel(w4, w19, 2, ne); 9389 __ Csel(w5, w19, Operand(w19, ASR, 31), ne); 9390 __ Csel(w6, w19, Operand(w19, ROR, 1), ne); 9391 __ Csel(w7, w19, 3, eq); 9392 9393 __ Csel(x8, x20, -2, ne); 9394 __ Csel(x9, x20, -1, ne); 9395 __ Csel(x10, x20, 0, ne); 9396 __ Csel(x11, x20, 1, ne); 9397 __ Csel(x12, x20, 2, ne); 9398 __ Csel(x13, x20, Operand(x20, ASR, 63), ne); 9399 __ Csel(x14, x20, Operand(x20, ROR, 1), ne); 9400 __ Csel(x15, x20, 3, eq); 9401 9402 END(); 9403 9404 RUN(); 9405 9406 ASSERT_EQUAL_32(-2, w0); 9407 ASSERT_EQUAL_32(-1, w1); 9408 ASSERT_EQUAL_32(0, w2); 9409 ASSERT_EQUAL_32(1, w3); 9410 ASSERT_EQUAL_32(2, w4); 9411 ASSERT_EQUAL_32(-1, w5); 9412 ASSERT_EQUAL_32(0x40000000, w6); 9413 ASSERT_EQUAL_32(0x80000000, w7); 9414 9415 ASSERT_EQUAL_64(-2, x8); 9416 ASSERT_EQUAL_64(-1, x9); 9417 ASSERT_EQUAL_64(0, x10); 9418 ASSERT_EQUAL_64(1, x11); 9419 ASSERT_EQUAL_64(2, x12); 9420 ASSERT_EQUAL_64(-1, x13); 9421 ASSERT_EQUAL_64(0x4000000000000000, x14); 9422 ASSERT_EQUAL_64(0x8000000000000000, x15); 9423 9424 TEARDOWN(); 9425 } 9426 9427 9428 TEST(lslv) { 9429 SETUP(); 9430 9431 uint64_t value = 0x0123456789abcdef; 9432 int shift[] = {1, 3, 5, 9, 17, 33}; 9433 9434 START(); 9435 __ Mov(x0, value); 9436 __ Mov(w1, shift[0]); 9437 __ Mov(w2, shift[1]); 9438 __ Mov(w3, shift[2]); 9439 __ Mov(w4, shift[3]); 9440 __ Mov(w5, shift[4]); 9441 __ Mov(w6, shift[5]); 9442 9443 // The MacroAssembler does not allow zr as an argument. 9444 { 9445 ExactAssemblyScope scope(&masm, kInstructionSize); 9446 __ lslv(x0, x0, xzr); 9447 } 9448 9449 __ Lsl(x16, x0, x1); 9450 __ Lsl(x17, x0, x2); 9451 __ Lsl(x18, x0, x3); 9452 __ Lsl(x19, x0, x4); 9453 __ Lsl(x20, x0, x5); 9454 __ Lsl(x21, x0, x6); 9455 9456 __ Lsl(w22, w0, w1); 9457 __ Lsl(w23, w0, w2); 9458 __ Lsl(w24, w0, w3); 9459 __ Lsl(w25, w0, w4); 9460 __ Lsl(w26, w0, w5); 9461 __ Lsl(w27, w0, w6); 9462 END(); 9463 9464 RUN(); 9465 9466 ASSERT_EQUAL_64(value, x0); 9467 ASSERT_EQUAL_64(value << (shift[0] & 63), x16); 9468 ASSERT_EQUAL_64(value << (shift[1] & 63), x17); 9469 ASSERT_EQUAL_64(value << (shift[2] & 63), x18); 9470 ASSERT_EQUAL_64(value << (shift[3] & 63), x19); 9471 ASSERT_EQUAL_64(value << (shift[4] & 63), x20); 9472 ASSERT_EQUAL_64(value << (shift[5] & 63), x21); 9473 ASSERT_EQUAL_32(value << (shift[0] & 31), w22); 9474 ASSERT_EQUAL_32(value << (shift[1] & 31), w23); 9475 ASSERT_EQUAL_32(value << (shift[2] & 31), w24); 9476 ASSERT_EQUAL_32(value << (shift[3] & 31), w25); 9477 ASSERT_EQUAL_32(value << (shift[4] & 31), w26); 9478 ASSERT_EQUAL_32(value << (shift[5] & 31), w27); 9479 9480 TEARDOWN(); 9481 } 9482 9483 9484 TEST(lsrv) { 9485 SETUP(); 9486 9487 uint64_t value = 0x0123456789abcdef; 9488 int shift[] = {1, 3, 5, 9, 17, 33}; 9489 9490 START(); 9491 __ Mov(x0, value); 9492 __ Mov(w1, shift[0]); 9493 __ Mov(w2, shift[1]); 9494 __ Mov(w3, shift[2]); 9495 __ Mov(w4, shift[3]); 9496 __ Mov(w5, shift[4]); 9497 __ Mov(w6, shift[5]); 9498 9499 // The MacroAssembler does not allow zr as an argument. 9500 { 9501 ExactAssemblyScope scope(&masm, kInstructionSize); 9502 __ lsrv(x0, x0, xzr); 9503 } 9504 9505 __ Lsr(x16, x0, x1); 9506 __ Lsr(x17, x0, x2); 9507 __ Lsr(x18, x0, x3); 9508 __ Lsr(x19, x0, x4); 9509 __ Lsr(x20, x0, x5); 9510 __ Lsr(x21, x0, x6); 9511 9512 __ Lsr(w22, w0, w1); 9513 __ Lsr(w23, w0, w2); 9514 __ Lsr(w24, w0, w3); 9515 __ Lsr(w25, w0, w4); 9516 __ Lsr(w26, w0, w5); 9517 __ Lsr(w27, w0, w6); 9518 END(); 9519 9520 RUN(); 9521 9522 ASSERT_EQUAL_64(value, x0); 9523 ASSERT_EQUAL_64(value >> (shift[0] & 63), x16); 9524 ASSERT_EQUAL_64(value >> (shift[1] & 63), x17); 9525 ASSERT_EQUAL_64(value >> (shift[2] & 63), x18); 9526 ASSERT_EQUAL_64(value >> (shift[3] & 63), x19); 9527 ASSERT_EQUAL_64(value >> (shift[4] & 63), x20); 9528 ASSERT_EQUAL_64(value >> (shift[5] & 63), x21); 9529 9530 value &= 0xffffffff; 9531 ASSERT_EQUAL_32(value >> (shift[0] & 31), w22); 9532 ASSERT_EQUAL_32(value >> (shift[1] & 31), w23); 9533 ASSERT_EQUAL_32(value >> (shift[2] & 31), w24); 9534 ASSERT_EQUAL_32(value >> (shift[3] & 31), w25); 9535 ASSERT_EQUAL_32(value >> (shift[4] & 31), w26); 9536 ASSERT_EQUAL_32(value >> (shift[5] & 31), w27); 9537 9538 TEARDOWN(); 9539 } 9540 9541 9542 TEST(asrv) { 9543 SETUP(); 9544 9545 int64_t value = 0xfedcba98fedcba98; 9546 int shift[] = {1, 3, 5, 9, 17, 33}; 9547 9548 START(); 9549 __ Mov(x0, value); 9550 __ Mov(w1, shift[0]); 9551 __ Mov(w2, shift[1]); 9552 __ Mov(w3, shift[2]); 9553 __ Mov(w4, shift[3]); 9554 __ Mov(w5, shift[4]); 9555 __ Mov(w6, shift[5]); 9556 9557 // The MacroAssembler does not allow zr as an argument. 9558 { 9559 ExactAssemblyScope scope(&masm, kInstructionSize); 9560 __ asrv(x0, x0, xzr); 9561 } 9562 9563 __ Asr(x16, x0, x1); 9564 __ Asr(x17, x0, x2); 9565 __ Asr(x18, x0, x3); 9566 __ Asr(x19, x0, x4); 9567 __ Asr(x20, x0, x5); 9568 __ Asr(x21, x0, x6); 9569 9570 __ Asr(w22, w0, w1); 9571 __ Asr(w23, w0, w2); 9572 __ Asr(w24, w0, w3); 9573 __ Asr(w25, w0, w4); 9574 __ Asr(w26, w0, w5); 9575 __ Asr(w27, w0, w6); 9576 END(); 9577 9578 RUN(); 9579 9580 ASSERT_EQUAL_64(value, x0); 9581 ASSERT_EQUAL_64(value >> (shift[0] & 63), x16); 9582 ASSERT_EQUAL_64(value >> (shift[1] & 63), x17); 9583 ASSERT_EQUAL_64(value >> (shift[2] & 63), x18); 9584 ASSERT_EQUAL_64(value >> (shift[3] & 63), x19); 9585 ASSERT_EQUAL_64(value >> (shift[4] & 63), x20); 9586 ASSERT_EQUAL_64(value >> (shift[5] & 63), x21); 9587 9588 int32_t value32 = static_cast<int32_t>(value & 0xffffffff); 9589 ASSERT_EQUAL_32(value32 >> (shift[0] & 31), w22); 9590 ASSERT_EQUAL_32(value32 >> (shift[1] & 31), w23); 9591 ASSERT_EQUAL_32(value32 >> (shift[2] & 31), w24); 9592 ASSERT_EQUAL_32(value32 >> (shift[3] & 31), w25); 9593 ASSERT_EQUAL_32(value32 >> (shift[4] & 31), w26); 9594 ASSERT_EQUAL_32(value32 >> (shift[5] & 31), w27); 9595 9596 TEARDOWN(); 9597 } 9598 9599 9600 TEST(rorv) { 9601 SETUP(); 9602 9603 uint64_t value = 0x0123456789abcdef; 9604 int shift[] = {4, 8, 12, 16, 24, 36}; 9605 9606 START(); 9607 __ Mov(x0, value); 9608 __ Mov(w1, shift[0]); 9609 __ Mov(w2, shift[1]); 9610 __ Mov(w3, shift[2]); 9611 __ Mov(w4, shift[3]); 9612 __ Mov(w5, shift[4]); 9613 __ Mov(w6, shift[5]); 9614 9615 // The MacroAssembler does not allow zr as an argument. 9616 { 9617 ExactAssemblyScope scope(&masm, kInstructionSize); 9618 __ rorv(x0, x0, xzr); 9619 } 9620 9621 __ Ror(x16, x0, x1); 9622 __ Ror(x17, x0, x2); 9623 __ Ror(x18, x0, x3); 9624 __ Ror(x19, x0, x4); 9625 __ Ror(x20, x0, x5); 9626 __ Ror(x21, x0, x6); 9627 9628 __ Ror(w22, w0, w1); 9629 __ Ror(w23, w0, w2); 9630 __ Ror(w24, w0, w3); 9631 __ Ror(w25, w0, w4); 9632 __ Ror(w26, w0, w5); 9633 __ Ror(w27, w0, w6); 9634 END(); 9635 9636 RUN(); 9637 9638 ASSERT_EQUAL_64(value, x0); 9639 ASSERT_EQUAL_64(0xf0123456789abcde, x16); 9640 ASSERT_EQUAL_64(0xef0123456789abcd, x17); 9641 ASSERT_EQUAL_64(0xdef0123456789abc, x18); 9642 ASSERT_EQUAL_64(0xcdef0123456789ab, x19); 9643 ASSERT_EQUAL_64(0xabcdef0123456789, x20); 9644 ASSERT_EQUAL_64(0x789abcdef0123456, x21); 9645 ASSERT_EQUAL_32(0xf89abcde, w22); 9646 ASSERT_EQUAL_32(0xef89abcd, w23); 9647 ASSERT_EQUAL_32(0xdef89abc, w24); 9648 ASSERT_EQUAL_32(0xcdef89ab, w25); 9649 ASSERT_EQUAL_32(0xabcdef89, w26); 9650 ASSERT_EQUAL_32(0xf89abcde, w27); 9651 9652 TEARDOWN(); 9653 } 9654 9655 9656 TEST(bfm) { 9657 SETUP(); 9658 9659 START(); 9660 __ Mov(x1, 0x0123456789abcdef); 9661 9662 __ Mov(x10, 0x8888888888888888); 9663 __ Mov(x11, 0x8888888888888888); 9664 __ Mov(x12, 0x8888888888888888); 9665 __ Mov(x13, 0x8888888888888888); 9666 __ Mov(w20, 0x88888888); 9667 __ Mov(w21, 0x88888888); 9668 9669 __ Bfm(x10, x1, 16, 31); 9670 __ Bfm(x11, x1, 32, 15); 9671 9672 __ Bfm(w20, w1, 16, 23); 9673 __ Bfm(w21, w1, 24, 15); 9674 9675 // Aliases. 9676 __ Bfi(x12, x1, 16, 8); 9677 __ Bfxil(x13, x1, 16, 8); 9678 END(); 9679 9680 RUN(); 9681 9682 9683 ASSERT_EQUAL_64(0x88888888888889ab, x10); 9684 ASSERT_EQUAL_64(0x8888cdef88888888, x11); 9685 9686 ASSERT_EQUAL_32(0x888888ab, w20); 9687 ASSERT_EQUAL_32(0x88cdef88, w21); 9688 9689 ASSERT_EQUAL_64(0x8888888888ef8888, x12); 9690 ASSERT_EQUAL_64(0x88888888888888ab, x13); 9691 9692 TEARDOWN(); 9693 } 9694 9695 9696 TEST(sbfm) { 9697 SETUP(); 9698 9699 START(); 9700 __ Mov(x1, 0x0123456789abcdef); 9701 __ Mov(x2, 0xfedcba9876543210); 9702 9703 __ Sbfm(x10, x1, 16, 31); 9704 __ Sbfm(x11, x1, 32, 15); 9705 __ Sbfm(x12, x1, 32, 47); 9706 __ Sbfm(x13, x1, 48, 35); 9707 9708 __ Sbfm(w14, w1, 16, 23); 9709 __ Sbfm(w15, w1, 24, 15); 9710 __ Sbfm(w16, w2, 16, 23); 9711 __ Sbfm(w17, w2, 24, 15); 9712 9713 // Aliases. 9714 __ Asr(x18, x1, 32); 9715 __ Asr(x19, x2, 32); 9716 __ Sbfiz(x20, x1, 8, 16); 9717 __ Sbfiz(x21, x2, 8, 16); 9718 __ Sbfx(x22, x1, 8, 16); 9719 __ Sbfx(x23, x2, 8, 16); 9720 __ Sxtb(x24, w1); 9721 __ Sxtb(x25, x2); 9722 __ Sxth(x26, w1); 9723 __ Sxth(x27, x2); 9724 __ Sxtw(x28, w1); 9725 __ Sxtw(x29, x2); 9726 END(); 9727 9728 RUN(); 9729 9730 9731 ASSERT_EQUAL_64(0xffffffffffff89ab, x10); 9732 ASSERT_EQUAL_64(0xffffcdef00000000, x11); 9733 ASSERT_EQUAL_64(0x0000000000004567, x12); 9734 ASSERT_EQUAL_64(0x000789abcdef0000, x13); 9735 9736 ASSERT_EQUAL_32(0xffffffab, w14); 9737 ASSERT_EQUAL_32(0xffcdef00, w15); 9738 ASSERT_EQUAL_32(0x00000054, w16); 9739 ASSERT_EQUAL_32(0x00321000, w17); 9740 9741 ASSERT_EQUAL_64(0x0000000001234567, x18); 9742 ASSERT_EQUAL_64(0xfffffffffedcba98, x19); 9743 ASSERT_EQUAL_64(0xffffffffffcdef00, x20); 9744 ASSERT_EQUAL_64(0x0000000000321000, x21); 9745 ASSERT_EQUAL_64(0xffffffffffffabcd, x22); 9746 ASSERT_EQUAL_64(0x0000000000005432, x23); 9747 ASSERT_EQUAL_64(0xffffffffffffffef, x24); 9748 ASSERT_EQUAL_64(0x0000000000000010, x25); 9749 ASSERT_EQUAL_64(0xffffffffffffcdef, x26); 9750 ASSERT_EQUAL_64(0x0000000000003210, x27); 9751 ASSERT_EQUAL_64(0xffffffff89abcdef, x28); 9752 ASSERT_EQUAL_64(0x0000000076543210, x29); 9753 9754 TEARDOWN(); 9755 } 9756 9757 9758 TEST(ubfm) { 9759 SETUP(); 9760 9761 START(); 9762 __ Mov(x1, 0x0123456789abcdef); 9763 __ Mov(x2, 0xfedcba9876543210); 9764 9765 __ Mov(x10, 0x8888888888888888); 9766 __ Mov(x11, 0x8888888888888888); 9767 9768 __ Ubfm(x10, x1, 16, 31); 9769 __ Ubfm(x11, x1, 32, 15); 9770 __ Ubfm(x12, x1, 32, 47); 9771 __ Ubfm(x13, x1, 48, 35); 9772 9773 __ Ubfm(w25, w1, 16, 23); 9774 __ Ubfm(w26, w1, 24, 15); 9775 __ Ubfm(w27, w2, 16, 23); 9776 __ Ubfm(w28, w2, 24, 15); 9777 9778 // Aliases 9779 __ Lsl(x15, x1, 63); 9780 __ Lsl(x16, x1, 0); 9781 __ Lsr(x17, x1, 32); 9782 __ Ubfiz(x18, x1, 8, 16); 9783 __ Ubfx(x19, x1, 8, 16); 9784 __ Uxtb(x20, x1); 9785 __ Uxth(x21, x1); 9786 __ Uxtw(x22, x1); 9787 END(); 9788 9789 RUN(); 9790 9791 ASSERT_EQUAL_64(0x00000000000089ab, x10); 9792 ASSERT_EQUAL_64(0x0000cdef00000000, x11); 9793 ASSERT_EQUAL_64(0x0000000000004567, x12); 9794 ASSERT_EQUAL_64(0x000789abcdef0000, x13); 9795 9796 ASSERT_EQUAL_32(0x000000ab, w25); 9797 ASSERT_EQUAL_32(0x00cdef00, w26); 9798 ASSERT_EQUAL_32(0x00000054, w27); 9799 ASSERT_EQUAL_32(0x00321000, w28); 9800 9801 ASSERT_EQUAL_64(0x8000000000000000, x15); 9802 ASSERT_EQUAL_64(0x0123456789abcdef, x16); 9803 ASSERT_EQUAL_64(0x0000000001234567, x17); 9804 ASSERT_EQUAL_64(0x0000000000cdef00, x18); 9805 ASSERT_EQUAL_64(0x000000000000abcd, x19); 9806 ASSERT_EQUAL_64(0x00000000000000ef, x20); 9807 ASSERT_EQUAL_64(0x000000000000cdef, x21); 9808 ASSERT_EQUAL_64(0x0000000089abcdef, x22); 9809 9810 TEARDOWN(); 9811 } 9812 9813 9814 TEST(extr) { 9815 SETUP(); 9816 9817 START(); 9818 __ Mov(x1, 0x0123456789abcdef); 9819 __ Mov(x2, 0xfedcba9876543210); 9820 9821 __ Extr(w10, w1, w2, 0); 9822 __ Extr(w11, w1, w2, 1); 9823 __ Extr(x12, x2, x1, 2); 9824 9825 __ Ror(w13, w1, 0); 9826 __ Ror(w14, w2, 17); 9827 __ Ror(w15, w1, 31); 9828 __ Ror(x18, x2, 0); 9829 __ Ror(x19, x2, 1); 9830 __ Ror(x20, x1, 63); 9831 END(); 9832 9833 RUN(); 9834 9835 ASSERT_EQUAL_64(0x76543210, x10); 9836 ASSERT_EQUAL_64(0xbb2a1908, x11); 9837 ASSERT_EQUAL_64(0x0048d159e26af37b, x12); 9838 ASSERT_EQUAL_64(0x89abcdef, x13); 9839 ASSERT_EQUAL_64(0x19083b2a, x14); 9840 ASSERT_EQUAL_64(0x13579bdf, x15); 9841 ASSERT_EQUAL_64(0xfedcba9876543210, x18); 9842 ASSERT_EQUAL_64(0x7f6e5d4c3b2a1908, x19); 9843 ASSERT_EQUAL_64(0x02468acf13579bde, x20); 9844 9845 TEARDOWN(); 9846 } 9847 9848 9849 TEST(fmov_imm) { 9850 SETUP(); 9851 9852 START(); 9853 __ Fmov(s11, 1.0); 9854 __ Fmov(d22, -13.0); 9855 __ Fmov(s1, 255.0); 9856 __ Fmov(d2, 12.34567); 9857 __ Fmov(s3, 0.0); 9858 __ Fmov(d4, 0.0); 9859 __ Fmov(s5, kFP32PositiveInfinity); 9860 __ Fmov(d6, kFP64NegativeInfinity); 9861 END(); 9862 9863 RUN(); 9864 9865 ASSERT_EQUAL_FP32(1.0, s11); 9866 ASSERT_EQUAL_FP64(-13.0, d22); 9867 ASSERT_EQUAL_FP32(255.0, s1); 9868 ASSERT_EQUAL_FP64(12.34567, d2); 9869 ASSERT_EQUAL_FP32(0.0, s3); 9870 ASSERT_EQUAL_FP64(0.0, d4); 9871 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s5); 9872 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d6); 9873 9874 TEARDOWN(); 9875 } 9876 9877 9878 TEST(fmov_reg) { 9879 SETUP(); 9880 9881 START(); 9882 __ Fmov(s20, 1.0); 9883 __ Fmov(w10, s20); 9884 __ Fmov(s30, w10); 9885 __ Fmov(s5, s20); 9886 __ Fmov(d1, -13.0); 9887 __ Fmov(x1, d1); 9888 __ Fmov(d2, x1); 9889 __ Fmov(d4, d1); 9890 __ Fmov(d6, RawbitsToDouble(0x0123456789abcdef)); 9891 __ Fmov(s6, s6); 9892 9893 __ Fmov(d0, 0.0); 9894 __ Fmov(v0.D(), 1, x1); 9895 __ Fmov(x2, v0.D(), 1); 9896 9897 END(); 9898 9899 RUN(); 9900 9901 ASSERT_EQUAL_32(FloatToRawbits(1.0), w10); 9902 ASSERT_EQUAL_FP32(1.0, s30); 9903 ASSERT_EQUAL_FP32(1.0, s5); 9904 ASSERT_EQUAL_64(DoubleToRawbits(-13.0), x1); 9905 ASSERT_EQUAL_FP64(-13.0, d2); 9906 ASSERT_EQUAL_FP64(-13.0, d4); 9907 ASSERT_EQUAL_FP32(RawbitsToFloat(0x89abcdef), s6); 9908 ASSERT_EQUAL_128(DoubleToRawbits(-13.0), 0x0000000000000000, q0); 9909 ASSERT_EQUAL_64(DoubleToRawbits(-13.0), x2); 9910 TEARDOWN(); 9911 } 9912 9913 9914 TEST(fadd) { 9915 SETUP(); 9916 9917 START(); 9918 __ Fmov(s14, -0.0f); 9919 __ Fmov(s15, kFP32PositiveInfinity); 9920 __ Fmov(s16, kFP32NegativeInfinity); 9921 __ Fmov(s17, 3.25f); 9922 __ Fmov(s18, 1.0f); 9923 __ Fmov(s19, 0.0f); 9924 9925 __ Fmov(d26, -0.0); 9926 __ Fmov(d27, kFP64PositiveInfinity); 9927 __ Fmov(d28, kFP64NegativeInfinity); 9928 __ Fmov(d29, 0.0); 9929 __ Fmov(d30, -2.0); 9930 __ Fmov(d31, 2.25); 9931 9932 __ Fadd(s0, s17, s18); 9933 __ Fadd(s1, s18, s19); 9934 __ Fadd(s2, s14, s18); 9935 __ Fadd(s3, s15, s18); 9936 __ Fadd(s4, s16, s18); 9937 __ Fadd(s5, s15, s16); 9938 __ Fadd(s6, s16, s15); 9939 9940 __ Fadd(d7, d30, d31); 9941 __ Fadd(d8, d29, d31); 9942 __ Fadd(d9, d26, d31); 9943 __ Fadd(d10, d27, d31); 9944 __ Fadd(d11, d28, d31); 9945 __ Fadd(d12, d27, d28); 9946 __ Fadd(d13, d28, d27); 9947 END(); 9948 9949 RUN(); 9950 9951 ASSERT_EQUAL_FP32(4.25, s0); 9952 ASSERT_EQUAL_FP32(1.0, s1); 9953 ASSERT_EQUAL_FP32(1.0, s2); 9954 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s3); 9955 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s4); 9956 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s5); 9957 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6); 9958 ASSERT_EQUAL_FP64(0.25, d7); 9959 ASSERT_EQUAL_FP64(2.25, d8); 9960 ASSERT_EQUAL_FP64(2.25, d9); 9961 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d10); 9962 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d11); 9963 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d12); 9964 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13); 9965 9966 TEARDOWN(); 9967 } 9968 9969 9970 TEST(fsub) { 9971 SETUP(); 9972 9973 START(); 9974 __ Fmov(s14, -0.0f); 9975 __ Fmov(s15, kFP32PositiveInfinity); 9976 __ Fmov(s16, kFP32NegativeInfinity); 9977 __ Fmov(s17, 3.25f); 9978 __ Fmov(s18, 1.0f); 9979 __ Fmov(s19, 0.0f); 9980 9981 __ Fmov(d26, -0.0); 9982 __ Fmov(d27, kFP64PositiveInfinity); 9983 __ Fmov(d28, kFP64NegativeInfinity); 9984 __ Fmov(d29, 0.0); 9985 __ Fmov(d30, -2.0); 9986 __ Fmov(d31, 2.25); 9987 9988 __ Fsub(s0, s17, s18); 9989 __ Fsub(s1, s18, s19); 9990 __ Fsub(s2, s14, s18); 9991 __ Fsub(s3, s18, s15); 9992 __ Fsub(s4, s18, s16); 9993 __ Fsub(s5, s15, s15); 9994 __ Fsub(s6, s16, s16); 9995 9996 __ Fsub(d7, d30, d31); 9997 __ Fsub(d8, d29, d31); 9998 __ Fsub(d9, d26, d31); 9999 __ Fsub(d10, d31, d27); 10000 __ Fsub(d11, d31, d28); 10001 __ Fsub(d12, d27, d27); 10002 __ Fsub(d13, d28, d28); 10003 END(); 10004 10005 RUN(); 10006 10007 ASSERT_EQUAL_FP32(2.25, s0); 10008 ASSERT_EQUAL_FP32(1.0, s1); 10009 ASSERT_EQUAL_FP32(-1.0, s2); 10010 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s3); 10011 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s4); 10012 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s5); 10013 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6); 10014 ASSERT_EQUAL_FP64(-4.25, d7); 10015 ASSERT_EQUAL_FP64(-2.25, d8); 10016 ASSERT_EQUAL_FP64(-2.25, d9); 10017 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d10); 10018 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d11); 10019 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d12); 10020 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13); 10021 10022 TEARDOWN(); 10023 } 10024 10025 10026 TEST(fmul) { 10027 SETUP(); 10028 10029 START(); 10030 __ Fmov(s14, -0.0f); 10031 __ Fmov(s15, kFP32PositiveInfinity); 10032 __ Fmov(s16, kFP32NegativeInfinity); 10033 __ Fmov(s17, 3.25f); 10034 __ Fmov(s18, 2.0f); 10035 __ Fmov(s19, 0.0f); 10036 __ Fmov(s20, -2.0f); 10037 10038 __ Fmov(d26, -0.0); 10039 __ Fmov(d27, kFP64PositiveInfinity); 10040 __ Fmov(d28, kFP64NegativeInfinity); 10041 __ Fmov(d29, 0.0); 10042 __ Fmov(d30, -2.0); 10043 __ Fmov(d31, 2.25); 10044 10045 __ Fmul(s0, s17, s18); 10046 __ Fmul(s1, s18, s19); 10047 __ Fmul(s2, s14, s14); 10048 __ Fmul(s3, s15, s20); 10049 __ Fmul(s4, s16, s20); 10050 __ Fmul(s5, s15, s19); 10051 __ Fmul(s6, s19, s16); 10052 10053 __ Fmul(d7, d30, d31); 10054 __ Fmul(d8, d29, d31); 10055 __ Fmul(d9, d26, d26); 10056 __ Fmul(d10, d27, d30); 10057 __ Fmul(d11, d28, d30); 10058 __ Fmul(d12, d27, d29); 10059 __ Fmul(d13, d29, d28); 10060 END(); 10061 10062 RUN(); 10063 10064 ASSERT_EQUAL_FP32(6.5, s0); 10065 ASSERT_EQUAL_FP32(0.0, s1); 10066 ASSERT_EQUAL_FP32(0.0, s2); 10067 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s3); 10068 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s4); 10069 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s5); 10070 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6); 10071 ASSERT_EQUAL_FP64(-4.5, d7); 10072 ASSERT_EQUAL_FP64(0.0, d8); 10073 ASSERT_EQUAL_FP64(0.0, d9); 10074 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d10); 10075 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d11); 10076 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d12); 10077 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13); 10078 10079 TEARDOWN(); 10080 } 10081 10082 10083 static void FmaddFmsubHelper(double n, 10084 double m, 10085 double a, 10086 double fmadd, 10087 double fmsub, 10088 double fnmadd, 10089 double fnmsub) { 10090 SETUP(); 10091 START(); 10092 10093 __ Fmov(d0, n); 10094 __ Fmov(d1, m); 10095 __ Fmov(d2, a); 10096 __ Fmadd(d28, d0, d1, d2); 10097 __ Fmsub(d29, d0, d1, d2); 10098 __ Fnmadd(d30, d0, d1, d2); 10099 __ Fnmsub(d31, d0, d1, d2); 10100 10101 END(); 10102 RUN(); 10103 10104 ASSERT_EQUAL_FP64(fmadd, d28); 10105 ASSERT_EQUAL_FP64(fmsub, d29); 10106 ASSERT_EQUAL_FP64(fnmadd, d30); 10107 ASSERT_EQUAL_FP64(fnmsub, d31); 10108 10109 TEARDOWN(); 10110 } 10111 10112 10113 TEST(fmadd_fmsub_double) { 10114 // It's hard to check the result of fused operations because the only way to 10115 // calculate the result is using fma, which is what the simulator uses anyway. 10116 10117 // Basic operation. 10118 FmaddFmsubHelper(1.0, 2.0, 3.0, 5.0, 1.0, -5.0, -1.0); 10119 FmaddFmsubHelper(-1.0, 2.0, 3.0, 1.0, 5.0, -1.0, -5.0); 10120 10121 // Check the sign of exact zeroes. 10122 // n m a fmadd fmsub fnmadd fnmsub 10123 FmaddFmsubHelper(-0.0, +0.0, -0.0, -0.0, +0.0, +0.0, +0.0); 10124 FmaddFmsubHelper(+0.0, +0.0, -0.0, +0.0, -0.0, +0.0, +0.0); 10125 FmaddFmsubHelper(+0.0, +0.0, +0.0, +0.0, +0.0, -0.0, +0.0); 10126 FmaddFmsubHelper(-0.0, +0.0, +0.0, +0.0, +0.0, +0.0, -0.0); 10127 FmaddFmsubHelper(+0.0, -0.0, -0.0, -0.0, +0.0, +0.0, +0.0); 10128 FmaddFmsubHelper(-0.0, -0.0, -0.0, +0.0, -0.0, +0.0, +0.0); 10129 FmaddFmsubHelper(-0.0, -0.0, +0.0, +0.0, +0.0, -0.0, +0.0); 10130 FmaddFmsubHelper(+0.0, -0.0, +0.0, +0.0, +0.0, +0.0, -0.0); 10131 10132 // Check NaN generation. 10133 FmaddFmsubHelper(kFP64PositiveInfinity, 10134 0.0, 10135 42.0, 10136 kFP64DefaultNaN, 10137 kFP64DefaultNaN, 10138 kFP64DefaultNaN, 10139 kFP64DefaultNaN); 10140 FmaddFmsubHelper(0.0, 10141 kFP64PositiveInfinity, 10142 42.0, 10143 kFP64DefaultNaN, 10144 kFP64DefaultNaN, 10145 kFP64DefaultNaN, 10146 kFP64DefaultNaN); 10147 FmaddFmsubHelper(kFP64PositiveInfinity, 10148 1.0, 10149 kFP64PositiveInfinity, 10150 kFP64PositiveInfinity, // inf + ( inf * 1) = inf 10151 kFP64DefaultNaN, // inf + (-inf * 1) = NaN 10152 kFP64NegativeInfinity, // -inf + (-inf * 1) = -inf 10153 kFP64DefaultNaN); // -inf + ( inf * 1) = NaN 10154 FmaddFmsubHelper(kFP64NegativeInfinity, 10155 1.0, 10156 kFP64PositiveInfinity, 10157 kFP64DefaultNaN, // inf + (-inf * 1) = NaN 10158 kFP64PositiveInfinity, // inf + ( inf * 1) = inf 10159 kFP64DefaultNaN, // -inf + ( inf * 1) = NaN 10160 kFP64NegativeInfinity); // -inf + (-inf * 1) = -inf 10161 } 10162 10163 10164 static void FmaddFmsubHelper(float n, 10165 float m, 10166 float a, 10167 float fmadd, 10168 float fmsub, 10169 float fnmadd, 10170 float fnmsub) { 10171 SETUP(); 10172 START(); 10173 10174 __ Fmov(s0, n); 10175 __ Fmov(s1, m); 10176 __ Fmov(s2, a); 10177 __ Fmadd(s28, s0, s1, s2); 10178 __ Fmsub(s29, s0, s1, s2); 10179 __ Fnmadd(s30, s0, s1, s2); 10180 __ Fnmsub(s31, s0, s1, s2); 10181 10182 END(); 10183 RUN(); 10184 10185 ASSERT_EQUAL_FP32(fmadd, s28); 10186 ASSERT_EQUAL_FP32(fmsub, s29); 10187 ASSERT_EQUAL_FP32(fnmadd, s30); 10188 ASSERT_EQUAL_FP32(fnmsub, s31); 10189 10190 TEARDOWN(); 10191 } 10192 10193 10194 TEST(fmadd_fmsub_float) { 10195 // It's hard to check the result of fused operations because the only way to 10196 // calculate the result is using fma, which is what the simulator uses anyway. 10197 10198 // Basic operation. 10199 FmaddFmsubHelper(1.0f, 2.0f, 3.0f, 5.0f, 1.0f, -5.0f, -1.0f); 10200 FmaddFmsubHelper(-1.0f, 2.0f, 3.0f, 1.0f, 5.0f, -1.0f, -5.0f); 10201 10202 // Check the sign of exact zeroes. 10203 // n m a fmadd fmsub fnmadd fnmsub 10204 FmaddFmsubHelper(-0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f, +0.0f); 10205 FmaddFmsubHelper(+0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, +0.0f); 10206 FmaddFmsubHelper(+0.0f, +0.0f, +0.0f, +0.0f, +0.0f, -0.0f, +0.0f); 10207 FmaddFmsubHelper(-0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, -0.0f); 10208 FmaddFmsubHelper(+0.0f, -0.0f, -0.0f, -0.0f, +0.0f, +0.0f, +0.0f); 10209 FmaddFmsubHelper(-0.0f, -0.0f, -0.0f, +0.0f, -0.0f, +0.0f, +0.0f); 10210 FmaddFmsubHelper(-0.0f, -0.0f, +0.0f, +0.0f, +0.0f, -0.0f, +0.0f); 10211 FmaddFmsubHelper(+0.0f, -0.0f, +0.0f, +0.0f, +0.0f, +0.0f, -0.0f); 10212 10213 // Check NaN generation. 10214 FmaddFmsubHelper(kFP32PositiveInfinity, 10215 0.0f, 10216 42.0f, 10217 kFP32DefaultNaN, 10218 kFP32DefaultNaN, 10219 kFP32DefaultNaN, 10220 kFP32DefaultNaN); 10221 FmaddFmsubHelper(0.0f, 10222 kFP32PositiveInfinity, 10223 42.0f, 10224 kFP32DefaultNaN, 10225 kFP32DefaultNaN, 10226 kFP32DefaultNaN, 10227 kFP32DefaultNaN); 10228 FmaddFmsubHelper(kFP32PositiveInfinity, 10229 1.0f, 10230 kFP32PositiveInfinity, 10231 kFP32PositiveInfinity, // inf + ( inf * 1) = inf 10232 kFP32DefaultNaN, // inf + (-inf * 1) = NaN 10233 kFP32NegativeInfinity, // -inf + (-inf * 1) = -inf 10234 kFP32DefaultNaN); // -inf + ( inf * 1) = NaN 10235 FmaddFmsubHelper(kFP32NegativeInfinity, 10236 1.0f, 10237 kFP32PositiveInfinity, 10238 kFP32DefaultNaN, // inf + (-inf * 1) = NaN 10239 kFP32PositiveInfinity, // inf + ( inf * 1) = inf 10240 kFP32DefaultNaN, // -inf + ( inf * 1) = NaN 10241 kFP32NegativeInfinity); // -inf + (-inf * 1) = -inf 10242 } 10243 10244 10245 TEST(fmadd_fmsub_double_nans) { 10246 // Make sure that NaN propagation works correctly. 10247 double s1 = RawbitsToDouble(0x7ff5555511111111); 10248 double s2 = RawbitsToDouble(0x7ff5555522222222); 10249 double sa = RawbitsToDouble(0x7ff55555aaaaaaaa); 10250 double q1 = RawbitsToDouble(0x7ffaaaaa11111111); 10251 double q2 = RawbitsToDouble(0x7ffaaaaa22222222); 10252 double qa = RawbitsToDouble(0x7ffaaaaaaaaaaaaa); 10253 VIXL_ASSERT(IsSignallingNaN(s1)); 10254 VIXL_ASSERT(IsSignallingNaN(s2)); 10255 VIXL_ASSERT(IsSignallingNaN(sa)); 10256 VIXL_ASSERT(IsQuietNaN(q1)); 10257 VIXL_ASSERT(IsQuietNaN(q2)); 10258 VIXL_ASSERT(IsQuietNaN(qa)); 10259 10260 // The input NaNs after passing through ProcessNaN. 10261 double s1_proc = RawbitsToDouble(0x7ffd555511111111); 10262 double s2_proc = RawbitsToDouble(0x7ffd555522222222); 10263 double sa_proc = RawbitsToDouble(0x7ffd5555aaaaaaaa); 10264 double q1_proc = q1; 10265 double q2_proc = q2; 10266 double qa_proc = qa; 10267 VIXL_ASSERT(IsQuietNaN(s1_proc)); 10268 VIXL_ASSERT(IsQuietNaN(s2_proc)); 10269 VIXL_ASSERT(IsQuietNaN(sa_proc)); 10270 VIXL_ASSERT(IsQuietNaN(q1_proc)); 10271 VIXL_ASSERT(IsQuietNaN(q2_proc)); 10272 VIXL_ASSERT(IsQuietNaN(qa_proc)); 10273 10274 // Negated NaNs as it would be done on ARMv8 hardware. 10275 double s1_proc_neg = RawbitsToDouble(0xfffd555511111111); 10276 double sa_proc_neg = RawbitsToDouble(0xfffd5555aaaaaaaa); 10277 double q1_proc_neg = RawbitsToDouble(0xfffaaaaa11111111); 10278 double qa_proc_neg = RawbitsToDouble(0xfffaaaaaaaaaaaaa); 10279 VIXL_ASSERT(IsQuietNaN(s1_proc_neg)); 10280 VIXL_ASSERT(IsQuietNaN(sa_proc_neg)); 10281 VIXL_ASSERT(IsQuietNaN(q1_proc_neg)); 10282 VIXL_ASSERT(IsQuietNaN(qa_proc_neg)); 10283 10284 // Quiet NaNs are propagated. 10285 FmaddFmsubHelper(q1, 0, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc); 10286 FmaddFmsubHelper(0, q2, 0, q2_proc, q2_proc, q2_proc, q2_proc); 10287 FmaddFmsubHelper(0, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); 10288 FmaddFmsubHelper(q1, q2, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc); 10289 FmaddFmsubHelper(0, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); 10290 FmaddFmsubHelper(q1, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); 10291 FmaddFmsubHelper(q1, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); 10292 10293 // Signalling NaNs are propagated, and made quiet. 10294 FmaddFmsubHelper(s1, 0, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); 10295 FmaddFmsubHelper(0, s2, 0, s2_proc, s2_proc, s2_proc, s2_proc); 10296 FmaddFmsubHelper(0, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10297 FmaddFmsubHelper(s1, s2, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); 10298 FmaddFmsubHelper(0, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10299 FmaddFmsubHelper(s1, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10300 FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10301 10302 // Signalling NaNs take precedence over quiet NaNs. 10303 FmaddFmsubHelper(s1, q2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); 10304 FmaddFmsubHelper(q1, s2, qa, s2_proc, s2_proc, s2_proc, s2_proc); 10305 FmaddFmsubHelper(q1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10306 FmaddFmsubHelper(s1, s2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); 10307 FmaddFmsubHelper(q1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10308 FmaddFmsubHelper(s1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10309 FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10310 10311 // A NaN generated by the intermediate op1 * op2 overrides a quiet NaN in a. 10312 FmaddFmsubHelper(0, 10313 kFP64PositiveInfinity, 10314 qa, 10315 kFP64DefaultNaN, 10316 kFP64DefaultNaN, 10317 kFP64DefaultNaN, 10318 kFP64DefaultNaN); 10319 FmaddFmsubHelper(kFP64PositiveInfinity, 10320 0, 10321 qa, 10322 kFP64DefaultNaN, 10323 kFP64DefaultNaN, 10324 kFP64DefaultNaN, 10325 kFP64DefaultNaN); 10326 FmaddFmsubHelper(0, 10327 kFP64NegativeInfinity, 10328 qa, 10329 kFP64DefaultNaN, 10330 kFP64DefaultNaN, 10331 kFP64DefaultNaN, 10332 kFP64DefaultNaN); 10333 FmaddFmsubHelper(kFP64NegativeInfinity, 10334 0, 10335 qa, 10336 kFP64DefaultNaN, 10337 kFP64DefaultNaN, 10338 kFP64DefaultNaN, 10339 kFP64DefaultNaN); 10340 } 10341 10342 10343 TEST(fmadd_fmsub_float_nans) { 10344 // Make sure that NaN propagation works correctly. 10345 float s1 = RawbitsToFloat(0x7f951111); 10346 float s2 = RawbitsToFloat(0x7f952222); 10347 float sa = RawbitsToFloat(0x7f95aaaa); 10348 float q1 = RawbitsToFloat(0x7fea1111); 10349 float q2 = RawbitsToFloat(0x7fea2222); 10350 float qa = RawbitsToFloat(0x7feaaaaa); 10351 VIXL_ASSERT(IsSignallingNaN(s1)); 10352 VIXL_ASSERT(IsSignallingNaN(s2)); 10353 VIXL_ASSERT(IsSignallingNaN(sa)); 10354 VIXL_ASSERT(IsQuietNaN(q1)); 10355 VIXL_ASSERT(IsQuietNaN(q2)); 10356 VIXL_ASSERT(IsQuietNaN(qa)); 10357 10358 // The input NaNs after passing through ProcessNaN. 10359 float s1_proc = RawbitsToFloat(0x7fd51111); 10360 float s2_proc = RawbitsToFloat(0x7fd52222); 10361 float sa_proc = RawbitsToFloat(0x7fd5aaaa); 10362 float q1_proc = q1; 10363 float q2_proc = q2; 10364 float qa_proc = qa; 10365 VIXL_ASSERT(IsQuietNaN(s1_proc)); 10366 VIXL_ASSERT(IsQuietNaN(s2_proc)); 10367 VIXL_ASSERT(IsQuietNaN(sa_proc)); 10368 VIXL_ASSERT(IsQuietNaN(q1_proc)); 10369 VIXL_ASSERT(IsQuietNaN(q2_proc)); 10370 VIXL_ASSERT(IsQuietNaN(qa_proc)); 10371 10372 // Negated NaNs as it would be done on ARMv8 hardware. 10373 float s1_proc_neg = RawbitsToFloat(0xffd51111); 10374 float sa_proc_neg = RawbitsToFloat(0xffd5aaaa); 10375 float q1_proc_neg = RawbitsToFloat(0xffea1111); 10376 float qa_proc_neg = RawbitsToFloat(0xffeaaaaa); 10377 VIXL_ASSERT(IsQuietNaN(s1_proc_neg)); 10378 VIXL_ASSERT(IsQuietNaN(sa_proc_neg)); 10379 VIXL_ASSERT(IsQuietNaN(q1_proc_neg)); 10380 VIXL_ASSERT(IsQuietNaN(qa_proc_neg)); 10381 10382 // Quiet NaNs are propagated. 10383 FmaddFmsubHelper(q1, 0, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc); 10384 FmaddFmsubHelper(0, q2, 0, q2_proc, q2_proc, q2_proc, q2_proc); 10385 FmaddFmsubHelper(0, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); 10386 FmaddFmsubHelper(q1, q2, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc); 10387 FmaddFmsubHelper(0, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); 10388 FmaddFmsubHelper(q1, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); 10389 FmaddFmsubHelper(q1, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); 10390 10391 // Signalling NaNs are propagated, and made quiet. 10392 FmaddFmsubHelper(s1, 0, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); 10393 FmaddFmsubHelper(0, s2, 0, s2_proc, s2_proc, s2_proc, s2_proc); 10394 FmaddFmsubHelper(0, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10395 FmaddFmsubHelper(s1, s2, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); 10396 FmaddFmsubHelper(0, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10397 FmaddFmsubHelper(s1, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10398 FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10399 10400 // Signalling NaNs take precedence over quiet NaNs. 10401 FmaddFmsubHelper(s1, q2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); 10402 FmaddFmsubHelper(q1, s2, qa, s2_proc, s2_proc, s2_proc, s2_proc); 10403 FmaddFmsubHelper(q1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10404 FmaddFmsubHelper(s1, s2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); 10405 FmaddFmsubHelper(q1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10406 FmaddFmsubHelper(s1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10407 FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10408 10409 // A NaN generated by the intermediate op1 * op2 overrides a quiet NaN in a. 10410 FmaddFmsubHelper(0, 10411 kFP32PositiveInfinity, 10412 qa, 10413 kFP32DefaultNaN, 10414 kFP32DefaultNaN, 10415 kFP32DefaultNaN, 10416 kFP32DefaultNaN); 10417 FmaddFmsubHelper(kFP32PositiveInfinity, 10418 0, 10419 qa, 10420 kFP32DefaultNaN, 10421 kFP32DefaultNaN, 10422 kFP32DefaultNaN, 10423 kFP32DefaultNaN); 10424 FmaddFmsubHelper(0, 10425 kFP32NegativeInfinity, 10426 qa, 10427 kFP32DefaultNaN, 10428 kFP32DefaultNaN, 10429 kFP32DefaultNaN, 10430 kFP32DefaultNaN); 10431 FmaddFmsubHelper(kFP32NegativeInfinity, 10432 0, 10433 qa, 10434 kFP32DefaultNaN, 10435 kFP32DefaultNaN, 10436 kFP32DefaultNaN, 10437 kFP32DefaultNaN); 10438 } 10439 10440 10441 TEST(fdiv) { 10442 SETUP(); 10443 10444 START(); 10445 __ Fmov(s14, -0.0f); 10446 __ Fmov(s15, kFP32PositiveInfinity); 10447 __ Fmov(s16, kFP32NegativeInfinity); 10448 __ Fmov(s17, 3.25f); 10449 __ Fmov(s18, 2.0f); 10450 __ Fmov(s19, 2.0f); 10451 __ Fmov(s20, -2.0f); 10452 10453 __ Fmov(d26, -0.0); 10454 __ Fmov(d27, kFP64PositiveInfinity); 10455 __ Fmov(d28, kFP64NegativeInfinity); 10456 __ Fmov(d29, 0.0); 10457 __ Fmov(d30, -2.0); 10458 __ Fmov(d31, 2.25); 10459 10460 __ Fdiv(s0, s17, s18); 10461 __ Fdiv(s1, s18, s19); 10462 __ Fdiv(s2, s14, s18); 10463 __ Fdiv(s3, s18, s15); 10464 __ Fdiv(s4, s18, s16); 10465 __ Fdiv(s5, s15, s16); 10466 __ Fdiv(s6, s14, s14); 10467 10468 __ Fdiv(d7, d31, d30); 10469 __ Fdiv(d8, d29, d31); 10470 __ Fdiv(d9, d26, d31); 10471 __ Fdiv(d10, d31, d27); 10472 __ Fdiv(d11, d31, d28); 10473 __ Fdiv(d12, d28, d27); 10474 __ Fdiv(d13, d29, d29); 10475 END(); 10476 10477 RUN(); 10478 10479 ASSERT_EQUAL_FP32(1.625f, s0); 10480 ASSERT_EQUAL_FP32(1.0f, s1); 10481 ASSERT_EQUAL_FP32(-0.0f, s2); 10482 ASSERT_EQUAL_FP32(0.0f, s3); 10483 ASSERT_EQUAL_FP32(-0.0f, s4); 10484 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s5); 10485 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6); 10486 ASSERT_EQUAL_FP64(-1.125, d7); 10487 ASSERT_EQUAL_FP64(0.0, d8); 10488 ASSERT_EQUAL_FP64(-0.0, d9); 10489 ASSERT_EQUAL_FP64(0.0, d10); 10490 ASSERT_EQUAL_FP64(-0.0, d11); 10491 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d12); 10492 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13); 10493 10494 TEARDOWN(); 10495 } 10496 10497 10498 static float MinMaxHelper(float n, 10499 float m, 10500 bool min, 10501 float quiet_nan_substitute = 0.0) { 10502 const uint64_t kFP32QuietNaNMask = 0x00400000; 10503 uint32_t raw_n = FloatToRawbits(n); 10504 uint32_t raw_m = FloatToRawbits(m); 10505 10506 if (std::isnan(n) && ((raw_n & kFP32QuietNaNMask) == 0)) { 10507 // n is signalling NaN. 10508 return RawbitsToFloat(raw_n | kFP32QuietNaNMask); 10509 } else if (std::isnan(m) && ((raw_m & kFP32QuietNaNMask) == 0)) { 10510 // m is signalling NaN. 10511 return RawbitsToFloat(raw_m | kFP32QuietNaNMask); 10512 } else if (quiet_nan_substitute == 0.0) { 10513 if (std::isnan(n)) { 10514 // n is quiet NaN. 10515 return n; 10516 } else if (std::isnan(m)) { 10517 // m is quiet NaN. 10518 return m; 10519 } 10520 } else { 10521 // Substitute n or m if one is quiet, but not both. 10522 if (std::isnan(n) && !std::isnan(m)) { 10523 // n is quiet NaN: replace with substitute. 10524 n = quiet_nan_substitute; 10525 } else if (!std::isnan(n) && std::isnan(m)) { 10526 // m is quiet NaN: replace with substitute. 10527 m = quiet_nan_substitute; 10528 } 10529 } 10530 10531 if ((n == 0.0) && (m == 0.0) && (copysign(1.0, n) != copysign(1.0, m))) { 10532 return min ? -0.0 : 0.0; 10533 } 10534 10535 return min ? fminf(n, m) : fmaxf(n, m); 10536 } 10537 10538 10539 static double MinMaxHelper(double n, 10540 double m, 10541 bool min, 10542 double quiet_nan_substitute = 0.0) { 10543 const uint64_t kFP64QuietNaNMask = 0x0008000000000000; 10544 uint64_t raw_n = DoubleToRawbits(n); 10545 uint64_t raw_m = DoubleToRawbits(m); 10546 10547 if (std::isnan(n) && ((raw_n & kFP64QuietNaNMask) == 0)) { 10548 // n is signalling NaN. 10549 return RawbitsToDouble(raw_n | kFP64QuietNaNMask); 10550 } else if (std::isnan(m) && ((raw_m & kFP64QuietNaNMask) == 0)) { 10551 // m is signalling NaN. 10552 return RawbitsToDouble(raw_m | kFP64QuietNaNMask); 10553 } else if (quiet_nan_substitute == 0.0) { 10554 if (std::isnan(n)) { 10555 // n is quiet NaN. 10556 return n; 10557 } else if (std::isnan(m)) { 10558 // m is quiet NaN. 10559 return m; 10560 } 10561 } else { 10562 // Substitute n or m if one is quiet, but not both. 10563 if (std::isnan(n) && !std::isnan(m)) { 10564 // n is quiet NaN: replace with substitute. 10565 n = quiet_nan_substitute; 10566 } else if (!std::isnan(n) && std::isnan(m)) { 10567 // m is quiet NaN: replace with substitute. 10568 m = quiet_nan_substitute; 10569 } 10570 } 10571 10572 if ((n == 0.0) && (m == 0.0) && (copysign(1.0, n) != copysign(1.0, m))) { 10573 return min ? -0.0 : 0.0; 10574 } 10575 10576 return min ? fmin(n, m) : fmax(n, m); 10577 } 10578 10579 10580 static void FminFmaxDoubleHelper( 10581 double n, double m, double min, double max, double minnm, double maxnm) { 10582 SETUP(); 10583 10584 START(); 10585 __ Fmov(d0, n); 10586 __ Fmov(d1, m); 10587 __ Fmin(d28, d0, d1); 10588 __ Fmax(d29, d0, d1); 10589 __ Fminnm(d30, d0, d1); 10590 __ Fmaxnm(d31, d0, d1); 10591 END(); 10592 10593 RUN(); 10594 10595 ASSERT_EQUAL_FP64(min, d28); 10596 ASSERT_EQUAL_FP64(max, d29); 10597 ASSERT_EQUAL_FP64(minnm, d30); 10598 ASSERT_EQUAL_FP64(maxnm, d31); 10599 10600 TEARDOWN(); 10601 } 10602 10603 10604 TEST(fmax_fmin_d) { 10605 // Use non-standard NaNs to check that the payload bits are preserved. 10606 double snan = RawbitsToDouble(0x7ff5555512345678); 10607 double qnan = RawbitsToDouble(0x7ffaaaaa87654321); 10608 10609 double snan_processed = RawbitsToDouble(0x7ffd555512345678); 10610 double qnan_processed = qnan; 10611 10612 VIXL_ASSERT(IsSignallingNaN(snan)); 10613 VIXL_ASSERT(IsQuietNaN(qnan)); 10614 VIXL_ASSERT(IsQuietNaN(snan_processed)); 10615 VIXL_ASSERT(IsQuietNaN(qnan_processed)); 10616 10617 // Bootstrap tests. 10618 FminFmaxDoubleHelper(0, 0, 0, 0, 0, 0); 10619 FminFmaxDoubleHelper(0, 1, 0, 1, 0, 1); 10620 FminFmaxDoubleHelper(kFP64PositiveInfinity, 10621 kFP64NegativeInfinity, 10622 kFP64NegativeInfinity, 10623 kFP64PositiveInfinity, 10624 kFP64NegativeInfinity, 10625 kFP64PositiveInfinity); 10626 FminFmaxDoubleHelper(snan, 10627 0, 10628 snan_processed, 10629 snan_processed, 10630 snan_processed, 10631 snan_processed); 10632 FminFmaxDoubleHelper(0, 10633 snan, 10634 snan_processed, 10635 snan_processed, 10636 snan_processed, 10637 snan_processed); 10638 FminFmaxDoubleHelper(qnan, 0, qnan_processed, qnan_processed, 0, 0); 10639 FminFmaxDoubleHelper(0, qnan, qnan_processed, qnan_processed, 0, 0); 10640 FminFmaxDoubleHelper(qnan, 10641 snan, 10642 snan_processed, 10643 snan_processed, 10644 snan_processed, 10645 snan_processed); 10646 FminFmaxDoubleHelper(snan, 10647 qnan, 10648 snan_processed, 10649 snan_processed, 10650 snan_processed, 10651 snan_processed); 10652 10653 // Iterate over all combinations of inputs. 10654 double inputs[] = {DBL_MAX, 10655 DBL_MIN, 10656 1.0, 10657 0.0, 10658 -DBL_MAX, 10659 -DBL_MIN, 10660 -1.0, 10661 -0.0, 10662 kFP64PositiveInfinity, 10663 kFP64NegativeInfinity, 10664 kFP64QuietNaN, 10665 kFP64SignallingNaN}; 10666 10667 const int count = sizeof(inputs) / sizeof(inputs[0]); 10668 10669 for (int in = 0; in < count; in++) { 10670 double n = inputs[in]; 10671 for (int im = 0; im < count; im++) { 10672 double m = inputs[im]; 10673 FminFmaxDoubleHelper(n, 10674 m, 10675 MinMaxHelper(n, m, true), 10676 MinMaxHelper(n, m, false), 10677 MinMaxHelper(n, m, true, kFP64PositiveInfinity), 10678 MinMaxHelper(n, m, false, kFP64NegativeInfinity)); 10679 } 10680 } 10681 } 10682 10683 10684 static void FminFmaxFloatHelper( 10685 float n, float m, float min, float max, float minnm, float maxnm) { 10686 SETUP(); 10687 10688 START(); 10689 __ Fmov(s0, n); 10690 __ Fmov(s1, m); 10691 __ Fmin(s28, s0, s1); 10692 __ Fmax(s29, s0, s1); 10693 __ Fminnm(s30, s0, s1); 10694 __ Fmaxnm(s31, s0, s1); 10695 END(); 10696 10697 RUN(); 10698 10699 ASSERT_EQUAL_FP32(min, s28); 10700 ASSERT_EQUAL_FP32(max, s29); 10701 ASSERT_EQUAL_FP32(minnm, s30); 10702 ASSERT_EQUAL_FP32(maxnm, s31); 10703 10704 TEARDOWN(); 10705 } 10706 10707 10708 TEST(fmax_fmin_s) { 10709 // Use non-standard NaNs to check that the payload bits are preserved. 10710 float snan = RawbitsToFloat(0x7f951234); 10711 float qnan = RawbitsToFloat(0x7fea8765); 10712 10713 float snan_processed = RawbitsToFloat(0x7fd51234); 10714 float qnan_processed = qnan; 10715 10716 VIXL_ASSERT(IsSignallingNaN(snan)); 10717 VIXL_ASSERT(IsQuietNaN(qnan)); 10718 VIXL_ASSERT(IsQuietNaN(snan_processed)); 10719 VIXL_ASSERT(IsQuietNaN(qnan_processed)); 10720 10721 // Bootstrap tests. 10722 FminFmaxFloatHelper(0, 0, 0, 0, 0, 0); 10723 FminFmaxFloatHelper(0, 1, 0, 1, 0, 1); 10724 FminFmaxFloatHelper(kFP32PositiveInfinity, 10725 kFP32NegativeInfinity, 10726 kFP32NegativeInfinity, 10727 kFP32PositiveInfinity, 10728 kFP32NegativeInfinity, 10729 kFP32PositiveInfinity); 10730 FminFmaxFloatHelper(snan, 10731 0, 10732 snan_processed, 10733 snan_processed, 10734 snan_processed, 10735 snan_processed); 10736 FminFmaxFloatHelper(0, 10737 snan, 10738 snan_processed, 10739 snan_processed, 10740 snan_processed, 10741 snan_processed); 10742 FminFmaxFloatHelper(qnan, 0, qnan_processed, qnan_processed, 0, 0); 10743 FminFmaxFloatHelper(0, qnan, qnan_processed, qnan_processed, 0, 0); 10744 FminFmaxFloatHelper(qnan, 10745 snan, 10746 snan_processed, 10747 snan_processed, 10748 snan_processed, 10749 snan_processed); 10750 FminFmaxFloatHelper(snan, 10751 qnan, 10752 snan_processed, 10753 snan_processed, 10754 snan_processed, 10755 snan_processed); 10756 10757 // Iterate over all combinations of inputs. 10758 float inputs[] = {FLT_MAX, 10759 FLT_MIN, 10760 1.0, 10761 0.0, 10762 -FLT_MAX, 10763 -FLT_MIN, 10764 -1.0, 10765 -0.0, 10766 kFP32PositiveInfinity, 10767 kFP32NegativeInfinity, 10768 kFP32QuietNaN, 10769 kFP32SignallingNaN}; 10770 10771 const int count = sizeof(inputs) / sizeof(inputs[0]); 10772 10773 for (int in = 0; in < count; in++) { 10774 float n = inputs[in]; 10775 for (int im = 0; im < count; im++) { 10776 float m = inputs[im]; 10777 FminFmaxFloatHelper(n, 10778 m, 10779 MinMaxHelper(n, m, true), 10780 MinMaxHelper(n, m, false), 10781 MinMaxHelper(n, m, true, kFP32PositiveInfinity), 10782 MinMaxHelper(n, m, false, kFP32NegativeInfinity)); 10783 } 10784 } 10785 } 10786 10787 10788 TEST(fccmp) { 10789 SETUP(); 10790 10791 START(); 10792 __ Fmov(s16, 0.0); 10793 __ Fmov(s17, 0.5); 10794 __ Fmov(d18, -0.5); 10795 __ Fmov(d19, -1.0); 10796 __ Mov(x20, 0); 10797 __ Mov(x21, 0x7ff0000000000001); // Double precision NaN. 10798 __ Fmov(d21, x21); 10799 __ Mov(w22, 0x7f800001); // Single precision NaN. 10800 __ Fmov(s22, w22); 10801 10802 __ Cmp(x20, 0); 10803 __ Fccmp(s16, s16, NoFlag, eq); 10804 __ Mrs(x0, NZCV); 10805 10806 __ Cmp(x20, 0); 10807 __ Fccmp(s16, s16, VFlag, ne); 10808 __ Mrs(x1, NZCV); 10809 10810 __ Cmp(x20, 0); 10811 __ Fccmp(s16, s17, CFlag, ge); 10812 __ Mrs(x2, NZCV); 10813 10814 __ Cmp(x20, 0); 10815 __ Fccmp(s16, s17, CVFlag, lt); 10816 __ Mrs(x3, NZCV); 10817 10818 __ Cmp(x20, 0); 10819 __ Fccmp(d18, d18, ZFlag, le); 10820 __ Mrs(x4, NZCV); 10821 10822 __ Cmp(x20, 0); 10823 __ Fccmp(d18, d18, ZVFlag, gt); 10824 __ Mrs(x5, NZCV); 10825 10826 __ Cmp(x20, 0); 10827 __ Fccmp(d18, d19, ZCVFlag, ls); 10828 __ Mrs(x6, NZCV); 10829 10830 __ Cmp(x20, 0); 10831 __ Fccmp(d18, d19, NFlag, hi); 10832 __ Mrs(x7, NZCV); 10833 10834 // The Macro Assembler does not allow al or nv as condition. 10835 { 10836 ExactAssemblyScope scope(&masm, kInstructionSize); 10837 __ fccmp(s16, s16, NFlag, al); 10838 } 10839 __ Mrs(x8, NZCV); 10840 10841 { 10842 ExactAssemblyScope scope(&masm, kInstructionSize); 10843 __ fccmp(d18, d18, NFlag, nv); 10844 } 10845 __ Mrs(x9, NZCV); 10846 10847 __ Cmp(x20, 0); 10848 __ Fccmpe(s16, s16, NoFlag, eq); 10849 __ Mrs(x10, NZCV); 10850 10851 __ Cmp(x20, 0); 10852 __ Fccmpe(d18, d19, ZCVFlag, ls); 10853 __ Mrs(x11, NZCV); 10854 10855 __ Cmp(x20, 0); 10856 __ Fccmpe(d21, d21, NoFlag, eq); 10857 __ Mrs(x12, NZCV); 10858 10859 __ Cmp(x20, 0); 10860 __ Fccmpe(s22, s22, NoFlag, eq); 10861 __ Mrs(x13, NZCV); 10862 END(); 10863 10864 RUN(); 10865 10866 ASSERT_EQUAL_32(ZCFlag, w0); 10867 ASSERT_EQUAL_32(VFlag, w1); 10868 ASSERT_EQUAL_32(NFlag, w2); 10869 ASSERT_EQUAL_32(CVFlag, w3); 10870 ASSERT_EQUAL_32(ZCFlag, w4); 10871 ASSERT_EQUAL_32(ZVFlag, w5); 10872 ASSERT_EQUAL_32(CFlag, w6); 10873 ASSERT_EQUAL_32(NFlag, w7); 10874 ASSERT_EQUAL_32(ZCFlag, w8); 10875 ASSERT_EQUAL_32(ZCFlag, w9); 10876 ASSERT_EQUAL_32(ZCFlag, w10); 10877 ASSERT_EQUAL_32(CFlag, w11); 10878 ASSERT_EQUAL_32(CVFlag, w12); 10879 ASSERT_EQUAL_32(CVFlag, w13); 10880 10881 TEARDOWN(); 10882 } 10883 10884 10885 TEST(fcmp) { 10886 SETUP(); 10887 10888 START(); 10889 10890 // Some of these tests require a floating-point scratch register assigned to 10891 // the macro assembler, but most do not. 10892 { 10893 UseScratchRegisterScope temps(&masm); 10894 temps.ExcludeAll(); 10895 temps.Include(ip0, ip1); 10896 10897 __ Fmov(s8, 0.0); 10898 __ Fmov(s9, 0.5); 10899 __ Mov(w18, 0x7f800001); // Single precision NaN. 10900 __ Fmov(s18, w18); 10901 10902 __ Fcmp(s8, s8); 10903 __ Mrs(x0, NZCV); 10904 __ Fcmp(s8, s9); 10905 __ Mrs(x1, NZCV); 10906 __ Fcmp(s9, s8); 10907 __ Mrs(x2, NZCV); 10908 __ Fcmp(s8, s18); 10909 __ Mrs(x3, NZCV); 10910 __ Fcmp(s18, s18); 10911 __ Mrs(x4, NZCV); 10912 __ Fcmp(s8, 0.0); 10913 __ Mrs(x5, NZCV); 10914 temps.Include(d0); 10915 __ Fcmp(s8, 255.0); 10916 temps.Exclude(d0); 10917 __ Mrs(x6, NZCV); 10918 10919 __ Fmov(d19, 0.0); 10920 __ Fmov(d20, 0.5); 10921 __ Mov(x21, 0x7ff0000000000001); // Double precision NaN. 10922 __ Fmov(d21, x21); 10923 10924 __ Fcmp(d19, d19); 10925 __ Mrs(x10, NZCV); 10926 __ Fcmp(d19, d20); 10927 __ Mrs(x11, NZCV); 10928 __ Fcmp(d20, d19); 10929 __ Mrs(x12, NZCV); 10930 __ Fcmp(d19, d21); 10931 __ Mrs(x13, NZCV); 10932 __ Fcmp(d21, d21); 10933 __ Mrs(x14, NZCV); 10934 __ Fcmp(d19, 0.0); 10935 __ Mrs(x15, NZCV); 10936 temps.Include(d0); 10937 __ Fcmp(d19, 12.3456); 10938 temps.Exclude(d0); 10939 __ Mrs(x16, NZCV); 10940 10941 __ Fcmpe(s8, s8); 10942 __ Mrs(x22, NZCV); 10943 __ Fcmpe(s8, 0.0); 10944 __ Mrs(x23, NZCV); 10945 __ Fcmpe(d19, d19); 10946 __ Mrs(x24, NZCV); 10947 __ Fcmpe(d19, 0.0); 10948 __ Mrs(x25, NZCV); 10949 __ Fcmpe(s18, s18); 10950 __ Mrs(x26, NZCV); 10951 __ Fcmpe(d21, d21); 10952 __ Mrs(x27, NZCV); 10953 } 10954 10955 END(); 10956 10957 RUN(); 10958 10959 ASSERT_EQUAL_32(ZCFlag, w0); 10960 ASSERT_EQUAL_32(NFlag, w1); 10961 ASSERT_EQUAL_32(CFlag, w2); 10962 ASSERT_EQUAL_32(CVFlag, w3); 10963 ASSERT_EQUAL_32(CVFlag, w4); 10964 ASSERT_EQUAL_32(ZCFlag, w5); 10965 ASSERT_EQUAL_32(NFlag, w6); 10966 ASSERT_EQUAL_32(ZCFlag, w10); 10967 ASSERT_EQUAL_32(NFlag, w11); 10968 ASSERT_EQUAL_32(CFlag, w12); 10969 ASSERT_EQUAL_32(CVFlag, w13); 10970 ASSERT_EQUAL_32(CVFlag, w14); 10971 ASSERT_EQUAL_32(ZCFlag, w15); 10972 ASSERT_EQUAL_32(NFlag, w16); 10973 ASSERT_EQUAL_32(ZCFlag, w22); 10974 ASSERT_EQUAL_32(ZCFlag, w23); 10975 ASSERT_EQUAL_32(ZCFlag, w24); 10976 ASSERT_EQUAL_32(ZCFlag, w25); 10977 ASSERT_EQUAL_32(CVFlag, w26); 10978 ASSERT_EQUAL_32(CVFlag, w27); 10979 10980 TEARDOWN(); 10981 } 10982 10983 10984 TEST(fcsel) { 10985 SETUP(); 10986 10987 START(); 10988 __ Mov(x16, 0); 10989 __ Fmov(s16, 1.0); 10990 __ Fmov(s17, 2.0); 10991 __ Fmov(d18, 3.0); 10992 __ Fmov(d19, 4.0); 10993 10994 __ Cmp(x16, 0); 10995 __ Fcsel(s0, s16, s17, eq); 10996 __ Fcsel(s1, s16, s17, ne); 10997 __ Fcsel(d2, d18, d19, eq); 10998 __ Fcsel(d3, d18, d19, ne); 10999 // The Macro Assembler does not allow al or nv as condition. 11000 { 11001 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 11002 __ fcsel(s4, s16, s17, al); 11003 __ fcsel(d5, d18, d19, nv); 11004 } 11005 END(); 11006 11007 RUN(); 11008 11009 ASSERT_EQUAL_FP32(1.0, s0); 11010 ASSERT_EQUAL_FP32(2.0, s1); 11011 ASSERT_EQUAL_FP64(3.0, d2); 11012 ASSERT_EQUAL_FP64(4.0, d3); 11013 ASSERT_EQUAL_FP32(1.0, s4); 11014 ASSERT_EQUAL_FP64(3.0, d5); 11015 11016 TEARDOWN(); 11017 } 11018 11019 11020 TEST(fneg) { 11021 SETUP(); 11022 11023 START(); 11024 __ Fmov(s16, 1.0); 11025 __ Fmov(s17, 0.0); 11026 __ Fmov(s18, kFP32PositiveInfinity); 11027 __ Fmov(d19, 1.0); 11028 __ Fmov(d20, 0.0); 11029 __ Fmov(d21, kFP64PositiveInfinity); 11030 11031 __ Fneg(s0, s16); 11032 __ Fneg(s1, s0); 11033 __ Fneg(s2, s17); 11034 __ Fneg(s3, s2); 11035 __ Fneg(s4, s18); 11036 __ Fneg(s5, s4); 11037 __ Fneg(d6, d19); 11038 __ Fneg(d7, d6); 11039 __ Fneg(d8, d20); 11040 __ Fneg(d9, d8); 11041 __ Fneg(d10, d21); 11042 __ Fneg(d11, d10); 11043 END(); 11044 11045 RUN(); 11046 11047 ASSERT_EQUAL_FP32(-1.0, s0); 11048 ASSERT_EQUAL_FP32(1.0, s1); 11049 ASSERT_EQUAL_FP32(-0.0, s2); 11050 ASSERT_EQUAL_FP32(0.0, s3); 11051 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s4); 11052 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s5); 11053 ASSERT_EQUAL_FP64(-1.0, d6); 11054 ASSERT_EQUAL_FP64(1.0, d7); 11055 ASSERT_EQUAL_FP64(-0.0, d8); 11056 ASSERT_EQUAL_FP64(0.0, d9); 11057 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d10); 11058 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d11); 11059 11060 TEARDOWN(); 11061 } 11062 11063 11064 TEST(fabs) { 11065 SETUP(); 11066 11067 START(); 11068 __ Fmov(s16, -1.0); 11069 __ Fmov(s17, -0.0); 11070 __ Fmov(s18, kFP32NegativeInfinity); 11071 __ Fmov(d19, -1.0); 11072 __ Fmov(d20, -0.0); 11073 __ Fmov(d21, kFP64NegativeInfinity); 11074 11075 __ Fabs(s0, s16); 11076 __ Fabs(s1, s0); 11077 __ Fabs(s2, s17); 11078 __ Fabs(s3, s18); 11079 __ Fabs(d4, d19); 11080 __ Fabs(d5, d4); 11081 __ Fabs(d6, d20); 11082 __ Fabs(d7, d21); 11083 END(); 11084 11085 RUN(); 11086 11087 ASSERT_EQUAL_FP32(1.0, s0); 11088 ASSERT_EQUAL_FP32(1.0, s1); 11089 ASSERT_EQUAL_FP32(0.0, s2); 11090 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s3); 11091 ASSERT_EQUAL_FP64(1.0, d4); 11092 ASSERT_EQUAL_FP64(1.0, d5); 11093 ASSERT_EQUAL_FP64(0.0, d6); 11094 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d7); 11095 11096 TEARDOWN(); 11097 } 11098 11099 11100 TEST(fsqrt) { 11101 SETUP(); 11102 11103 START(); 11104 __ Fmov(s16, 0.0); 11105 __ Fmov(s17, 1.0); 11106 __ Fmov(s18, 0.25); 11107 __ Fmov(s19, 65536.0); 11108 __ Fmov(s20, -0.0); 11109 __ Fmov(s21, kFP32PositiveInfinity); 11110 __ Fmov(s22, -1.0); 11111 __ Fmov(d23, 0.0); 11112 __ Fmov(d24, 1.0); 11113 __ Fmov(d25, 0.25); 11114 __ Fmov(d26, 4294967296.0); 11115 __ Fmov(d27, -0.0); 11116 __ Fmov(d28, kFP64PositiveInfinity); 11117 __ Fmov(d29, -1.0); 11118 11119 __ Fsqrt(s0, s16); 11120 __ Fsqrt(s1, s17); 11121 __ Fsqrt(s2, s18); 11122 __ Fsqrt(s3, s19); 11123 __ Fsqrt(s4, s20); 11124 __ Fsqrt(s5, s21); 11125 __ Fsqrt(s6, s22); 11126 __ Fsqrt(d7, d23); 11127 __ Fsqrt(d8, d24); 11128 __ Fsqrt(d9, d25); 11129 __ Fsqrt(d10, d26); 11130 __ Fsqrt(d11, d27); 11131 __ Fsqrt(d12, d28); 11132 __ Fsqrt(d13, d29); 11133 END(); 11134 11135 RUN(); 11136 11137 ASSERT_EQUAL_FP32(0.0, s0); 11138 ASSERT_EQUAL_FP32(1.0, s1); 11139 ASSERT_EQUAL_FP32(0.5, s2); 11140 ASSERT_EQUAL_FP32(256.0, s3); 11141 ASSERT_EQUAL_FP32(-0.0, s4); 11142 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s5); 11143 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6); 11144 ASSERT_EQUAL_FP64(0.0, d7); 11145 ASSERT_EQUAL_FP64(1.0, d8); 11146 ASSERT_EQUAL_FP64(0.5, d9); 11147 ASSERT_EQUAL_FP64(65536.0, d10); 11148 ASSERT_EQUAL_FP64(-0.0, d11); 11149 ASSERT_EQUAL_FP64(kFP32PositiveInfinity, d12); 11150 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13); 11151 11152 TEARDOWN(); 11153 } 11154 11155 11156 TEST(frinta) { 11157 SETUP(); 11158 11159 START(); 11160 __ Fmov(s16, 1.0); 11161 __ Fmov(s17, 1.1); 11162 __ Fmov(s18, 1.5); 11163 __ Fmov(s19, 1.9); 11164 __ Fmov(s20, 2.5); 11165 __ Fmov(s21, -1.5); 11166 __ Fmov(s22, -2.5); 11167 __ Fmov(s23, kFP32PositiveInfinity); 11168 __ Fmov(s24, kFP32NegativeInfinity); 11169 __ Fmov(s25, 0.0); 11170 __ Fmov(s26, -0.0); 11171 __ Fmov(s27, -0.2); 11172 11173 __ Frinta(s0, s16); 11174 __ Frinta(s1, s17); 11175 __ Frinta(s2, s18); 11176 __ Frinta(s3, s19); 11177 __ Frinta(s4, s20); 11178 __ Frinta(s5, s21); 11179 __ Frinta(s6, s22); 11180 __ Frinta(s7, s23); 11181 __ Frinta(s8, s24); 11182 __ Frinta(s9, s25); 11183 __ Frinta(s10, s26); 11184 __ Frinta(s11, s27); 11185 11186 __ Fmov(d16, 1.0); 11187 __ Fmov(d17, 1.1); 11188 __ Fmov(d18, 1.5); 11189 __ Fmov(d19, 1.9); 11190 __ Fmov(d20, 2.5); 11191 __ Fmov(d21, -1.5); 11192 __ Fmov(d22, -2.5); 11193 __ Fmov(d23, kFP32PositiveInfinity); 11194 __ Fmov(d24, kFP32NegativeInfinity); 11195 __ Fmov(d25, 0.0); 11196 __ Fmov(d26, -0.0); 11197 __ Fmov(d27, -0.2); 11198 11199 __ Frinta(d12, d16); 11200 __ Frinta(d13, d17); 11201 __ Frinta(d14, d18); 11202 __ Frinta(d15, d19); 11203 __ Frinta(d16, d20); 11204 __ Frinta(d17, d21); 11205 __ Frinta(d18, d22); 11206 __ Frinta(d19, d23); 11207 __ Frinta(d20, d24); 11208 __ Frinta(d21, d25); 11209 __ Frinta(d22, d26); 11210 __ Frinta(d23, d27); 11211 END(); 11212 11213 RUN(); 11214 11215 ASSERT_EQUAL_FP32(1.0, s0); 11216 ASSERT_EQUAL_FP32(1.0, s1); 11217 ASSERT_EQUAL_FP32(2.0, s2); 11218 ASSERT_EQUAL_FP32(2.0, s3); 11219 ASSERT_EQUAL_FP32(3.0, s4); 11220 ASSERT_EQUAL_FP32(-2.0, s5); 11221 ASSERT_EQUAL_FP32(-3.0, s6); 11222 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7); 11223 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8); 11224 ASSERT_EQUAL_FP32(0.0, s9); 11225 ASSERT_EQUAL_FP32(-0.0, s10); 11226 ASSERT_EQUAL_FP32(-0.0, s11); 11227 ASSERT_EQUAL_FP64(1.0, d12); 11228 ASSERT_EQUAL_FP64(1.0, d13); 11229 ASSERT_EQUAL_FP64(2.0, d14); 11230 ASSERT_EQUAL_FP64(2.0, d15); 11231 ASSERT_EQUAL_FP64(3.0, d16); 11232 ASSERT_EQUAL_FP64(-2.0, d17); 11233 ASSERT_EQUAL_FP64(-3.0, d18); 11234 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19); 11235 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20); 11236 ASSERT_EQUAL_FP64(0.0, d21); 11237 ASSERT_EQUAL_FP64(-0.0, d22); 11238 ASSERT_EQUAL_FP64(-0.0, d23); 11239 11240 TEARDOWN(); 11241 } 11242 11243 11244 TEST(frinti) { 11245 // VIXL only supports the round-to-nearest FPCR mode, so this test has the 11246 // same results as frintn. 11247 SETUP(); 11248 11249 START(); 11250 __ Fmov(s16, 1.0); 11251 __ Fmov(s17, 1.1); 11252 __ Fmov(s18, 1.5); 11253 __ Fmov(s19, 1.9); 11254 __ Fmov(s20, 2.5); 11255 __ Fmov(s21, -1.5); 11256 __ Fmov(s22, -2.5); 11257 __ Fmov(s23, kFP32PositiveInfinity); 11258 __ Fmov(s24, kFP32NegativeInfinity); 11259 __ Fmov(s25, 0.0); 11260 __ Fmov(s26, -0.0); 11261 __ Fmov(s27, -0.2); 11262 11263 __ Frinti(s0, s16); 11264 __ Frinti(s1, s17); 11265 __ Frinti(s2, s18); 11266 __ Frinti(s3, s19); 11267 __ Frinti(s4, s20); 11268 __ Frinti(s5, s21); 11269 __ Frinti(s6, s22); 11270 __ Frinti(s7, s23); 11271 __ Frinti(s8, s24); 11272 __ Frinti(s9, s25); 11273 __ Frinti(s10, s26); 11274 __ Frinti(s11, s27); 11275 11276 __ Fmov(d16, 1.0); 11277 __ Fmov(d17, 1.1); 11278 __ Fmov(d18, 1.5); 11279 __ Fmov(d19, 1.9); 11280 __ Fmov(d20, 2.5); 11281 __ Fmov(d21, -1.5); 11282 __ Fmov(d22, -2.5); 11283 __ Fmov(d23, kFP32PositiveInfinity); 11284 __ Fmov(d24, kFP32NegativeInfinity); 11285 __ Fmov(d25, 0.0); 11286 __ Fmov(d26, -0.0); 11287 __ Fmov(d27, -0.2); 11288 11289 __ Frinti(d12, d16); 11290 __ Frinti(d13, d17); 11291 __ Frinti(d14, d18); 11292 __ Frinti(d15, d19); 11293 __ Frinti(d16, d20); 11294 __ Frinti(d17, d21); 11295 __ Frinti(d18, d22); 11296 __ Frinti(d19, d23); 11297 __ Frinti(d20, d24); 11298 __ Frinti(d21, d25); 11299 __ Frinti(d22, d26); 11300 __ Frinti(d23, d27); 11301 END(); 11302 11303 RUN(); 11304 11305 ASSERT_EQUAL_FP32(1.0, s0); 11306 ASSERT_EQUAL_FP32(1.0, s1); 11307 ASSERT_EQUAL_FP32(2.0, s2); 11308 ASSERT_EQUAL_FP32(2.0, s3); 11309 ASSERT_EQUAL_FP32(2.0, s4); 11310 ASSERT_EQUAL_FP32(-2.0, s5); 11311 ASSERT_EQUAL_FP32(-2.0, s6); 11312 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7); 11313 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8); 11314 ASSERT_EQUAL_FP32(0.0, s9); 11315 ASSERT_EQUAL_FP32(-0.0, s10); 11316 ASSERT_EQUAL_FP32(-0.0, s11); 11317 ASSERT_EQUAL_FP64(1.0, d12); 11318 ASSERT_EQUAL_FP64(1.0, d13); 11319 ASSERT_EQUAL_FP64(2.0, d14); 11320 ASSERT_EQUAL_FP64(2.0, d15); 11321 ASSERT_EQUAL_FP64(2.0, d16); 11322 ASSERT_EQUAL_FP64(-2.0, d17); 11323 ASSERT_EQUAL_FP64(-2.0, d18); 11324 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19); 11325 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20); 11326 ASSERT_EQUAL_FP64(0.0, d21); 11327 ASSERT_EQUAL_FP64(-0.0, d22); 11328 ASSERT_EQUAL_FP64(-0.0, d23); 11329 11330 TEARDOWN(); 11331 } 11332 11333 11334 TEST(frintm) { 11335 SETUP(); 11336 11337 START(); 11338 __ Fmov(s16, 1.0); 11339 __ Fmov(s17, 1.1); 11340 __ Fmov(s18, 1.5); 11341 __ Fmov(s19, 1.9); 11342 __ Fmov(s20, 2.5); 11343 __ Fmov(s21, -1.5); 11344 __ Fmov(s22, -2.5); 11345 __ Fmov(s23, kFP32PositiveInfinity); 11346 __ Fmov(s24, kFP32NegativeInfinity); 11347 __ Fmov(s25, 0.0); 11348 __ Fmov(s26, -0.0); 11349 __ Fmov(s27, -0.2); 11350 11351 __ Frintm(s0, s16); 11352 __ Frintm(s1, s17); 11353 __ Frintm(s2, s18); 11354 __ Frintm(s3, s19); 11355 __ Frintm(s4, s20); 11356 __ Frintm(s5, s21); 11357 __ Frintm(s6, s22); 11358 __ Frintm(s7, s23); 11359 __ Frintm(s8, s24); 11360 __ Frintm(s9, s25); 11361 __ Frintm(s10, s26); 11362 __ Frintm(s11, s27); 11363 11364 __ Fmov(d16, 1.0); 11365 __ Fmov(d17, 1.1); 11366 __ Fmov(d18, 1.5); 11367 __ Fmov(d19, 1.9); 11368 __ Fmov(d20, 2.5); 11369 __ Fmov(d21, -1.5); 11370 __ Fmov(d22, -2.5); 11371 __ Fmov(d23, kFP32PositiveInfinity); 11372 __ Fmov(d24, kFP32NegativeInfinity); 11373 __ Fmov(d25, 0.0); 11374 __ Fmov(d26, -0.0); 11375 __ Fmov(d27, -0.2); 11376 11377 __ Frintm(d12, d16); 11378 __ Frintm(d13, d17); 11379 __ Frintm(d14, d18); 11380 __ Frintm(d15, d19); 11381 __ Frintm(d16, d20); 11382 __ Frintm(d17, d21); 11383 __ Frintm(d18, d22); 11384 __ Frintm(d19, d23); 11385 __ Frintm(d20, d24); 11386 __ Frintm(d21, d25); 11387 __ Frintm(d22, d26); 11388 __ Frintm(d23, d27); 11389 END(); 11390 11391 RUN(); 11392 11393 ASSERT_EQUAL_FP32(1.0, s0); 11394 ASSERT_EQUAL_FP32(1.0, s1); 11395 ASSERT_EQUAL_FP32(1.0, s2); 11396 ASSERT_EQUAL_FP32(1.0, s3); 11397 ASSERT_EQUAL_FP32(2.0, s4); 11398 ASSERT_EQUAL_FP32(-2.0, s5); 11399 ASSERT_EQUAL_FP32(-3.0, s6); 11400 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7); 11401 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8); 11402 ASSERT_EQUAL_FP32(0.0, s9); 11403 ASSERT_EQUAL_FP32(-0.0, s10); 11404 ASSERT_EQUAL_FP32(-1.0, s11); 11405 ASSERT_EQUAL_FP64(1.0, d12); 11406 ASSERT_EQUAL_FP64(1.0, d13); 11407 ASSERT_EQUAL_FP64(1.0, d14); 11408 ASSERT_EQUAL_FP64(1.0, d15); 11409 ASSERT_EQUAL_FP64(2.0, d16); 11410 ASSERT_EQUAL_FP64(-2.0, d17); 11411 ASSERT_EQUAL_FP64(-3.0, d18); 11412 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19); 11413 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20); 11414 ASSERT_EQUAL_FP64(0.0, d21); 11415 ASSERT_EQUAL_FP64(-0.0, d22); 11416 ASSERT_EQUAL_FP64(-1.0, d23); 11417 11418 TEARDOWN(); 11419 } 11420 11421 11422 TEST(frintn) { 11423 SETUP(); 11424 11425 START(); 11426 __ Fmov(s16, 1.0); 11427 __ Fmov(s17, 1.1); 11428 __ Fmov(s18, 1.5); 11429 __ Fmov(s19, 1.9); 11430 __ Fmov(s20, 2.5); 11431 __ Fmov(s21, -1.5); 11432 __ Fmov(s22, -2.5); 11433 __ Fmov(s23, kFP32PositiveInfinity); 11434 __ Fmov(s24, kFP32NegativeInfinity); 11435 __ Fmov(s25, 0.0); 11436 __ Fmov(s26, -0.0); 11437 __ Fmov(s27, -0.2); 11438 11439 __ Frintn(s0, s16); 11440 __ Frintn(s1, s17); 11441 __ Frintn(s2, s18); 11442 __ Frintn(s3, s19); 11443 __ Frintn(s4, s20); 11444 __ Frintn(s5, s21); 11445 __ Frintn(s6, s22); 11446 __ Frintn(s7, s23); 11447 __ Frintn(s8, s24); 11448 __ Frintn(s9, s25); 11449 __ Frintn(s10, s26); 11450 __ Frintn(s11, s27); 11451 11452 __ Fmov(d16, 1.0); 11453 __ Fmov(d17, 1.1); 11454 __ Fmov(d18, 1.5); 11455 __ Fmov(d19, 1.9); 11456 __ Fmov(d20, 2.5); 11457 __ Fmov(d21, -1.5); 11458 __ Fmov(d22, -2.5); 11459 __ Fmov(d23, kFP32PositiveInfinity); 11460 __ Fmov(d24, kFP32NegativeInfinity); 11461 __ Fmov(d25, 0.0); 11462 __ Fmov(d26, -0.0); 11463 __ Fmov(d27, -0.2); 11464 11465 __ Frintn(d12, d16); 11466 __ Frintn(d13, d17); 11467 __ Frintn(d14, d18); 11468 __ Frintn(d15, d19); 11469 __ Frintn(d16, d20); 11470 __ Frintn(d17, d21); 11471 __ Frintn(d18, d22); 11472 __ Frintn(d19, d23); 11473 __ Frintn(d20, d24); 11474 __ Frintn(d21, d25); 11475 __ Frintn(d22, d26); 11476 __ Frintn(d23, d27); 11477 END(); 11478 11479 RUN(); 11480 11481 ASSERT_EQUAL_FP32(1.0, s0); 11482 ASSERT_EQUAL_FP32(1.0, s1); 11483 ASSERT_EQUAL_FP32(2.0, s2); 11484 ASSERT_EQUAL_FP32(2.0, s3); 11485 ASSERT_EQUAL_FP32(2.0, s4); 11486 ASSERT_EQUAL_FP32(-2.0, s5); 11487 ASSERT_EQUAL_FP32(-2.0, s6); 11488 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7); 11489 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8); 11490 ASSERT_EQUAL_FP32(0.0, s9); 11491 ASSERT_EQUAL_FP32(-0.0, s10); 11492 ASSERT_EQUAL_FP32(-0.0, s11); 11493 ASSERT_EQUAL_FP64(1.0, d12); 11494 ASSERT_EQUAL_FP64(1.0, d13); 11495 ASSERT_EQUAL_FP64(2.0, d14); 11496 ASSERT_EQUAL_FP64(2.0, d15); 11497 ASSERT_EQUAL_FP64(2.0, d16); 11498 ASSERT_EQUAL_FP64(-2.0, d17); 11499 ASSERT_EQUAL_FP64(-2.0, d18); 11500 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19); 11501 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20); 11502 ASSERT_EQUAL_FP64(0.0, d21); 11503 ASSERT_EQUAL_FP64(-0.0, d22); 11504 ASSERT_EQUAL_FP64(-0.0, d23); 11505 11506 TEARDOWN(); 11507 } 11508 11509 11510 TEST(frintp) { 11511 SETUP(); 11512 11513 START(); 11514 __ Fmov(s16, 1.0); 11515 __ Fmov(s17, 1.1); 11516 __ Fmov(s18, 1.5); 11517 __ Fmov(s19, 1.9); 11518 __ Fmov(s20, 2.5); 11519 __ Fmov(s21, -1.5); 11520 __ Fmov(s22, -2.5); 11521 __ Fmov(s23, kFP32PositiveInfinity); 11522 __ Fmov(s24, kFP32NegativeInfinity); 11523 __ Fmov(s25, 0.0); 11524 __ Fmov(s26, -0.0); 11525 __ Fmov(s27, -0.2); 11526 11527 __ Frintp(s0, s16); 11528 __ Frintp(s1, s17); 11529 __ Frintp(s2, s18); 11530 __ Frintp(s3, s19); 11531 __ Frintp(s4, s20); 11532 __ Frintp(s5, s21); 11533 __ Frintp(s6, s22); 11534 __ Frintp(s7, s23); 11535 __ Frintp(s8, s24); 11536 __ Frintp(s9, s25); 11537 __ Frintp(s10, s26); 11538 __ Frintp(s11, s27); 11539 11540 __ Fmov(d16, 1.0); 11541 __ Fmov(d17, 1.1); 11542 __ Fmov(d18, 1.5); 11543 __ Fmov(d19, 1.9); 11544 __ Fmov(d20, 2.5); 11545 __ Fmov(d21, -1.5); 11546 __ Fmov(d22, -2.5); 11547 __ Fmov(d23, kFP32PositiveInfinity); 11548 __ Fmov(d24, kFP32NegativeInfinity); 11549 __ Fmov(d25, 0.0); 11550 __ Fmov(d26, -0.0); 11551 __ Fmov(d27, -0.2); 11552 11553 __ Frintp(d12, d16); 11554 __ Frintp(d13, d17); 11555 __ Frintp(d14, d18); 11556 __ Frintp(d15, d19); 11557 __ Frintp(d16, d20); 11558 __ Frintp(d17, d21); 11559 __ Frintp(d18, d22); 11560 __ Frintp(d19, d23); 11561 __ Frintp(d20, d24); 11562 __ Frintp(d21, d25); 11563 __ Frintp(d22, d26); 11564 __ Frintp(d23, d27); 11565 END(); 11566 11567 RUN(); 11568 11569 ASSERT_EQUAL_FP32(1.0, s0); 11570 ASSERT_EQUAL_FP32(2.0, s1); 11571 ASSERT_EQUAL_FP32(2.0, s2); 11572 ASSERT_EQUAL_FP32(2.0, s3); 11573 ASSERT_EQUAL_FP32(3.0, s4); 11574 ASSERT_EQUAL_FP32(-1.0, s5); 11575 ASSERT_EQUAL_FP32(-2.0, s6); 11576 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7); 11577 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8); 11578 ASSERT_EQUAL_FP32(0.0, s9); 11579 ASSERT_EQUAL_FP32(-0.0, s10); 11580 ASSERT_EQUAL_FP32(-0.0, s11); 11581 ASSERT_EQUAL_FP64(1.0, d12); 11582 ASSERT_EQUAL_FP64(2.0, d13); 11583 ASSERT_EQUAL_FP64(2.0, d14); 11584 ASSERT_EQUAL_FP64(2.0, d15); 11585 ASSERT_EQUAL_FP64(3.0, d16); 11586 ASSERT_EQUAL_FP64(-1.0, d17); 11587 ASSERT_EQUAL_FP64(-2.0, d18); 11588 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19); 11589 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20); 11590 ASSERT_EQUAL_FP64(0.0, d21); 11591 ASSERT_EQUAL_FP64(-0.0, d22); 11592 ASSERT_EQUAL_FP64(-0.0, d23); 11593 11594 TEARDOWN(); 11595 } 11596 11597 11598 TEST(frintx) { 11599 // VIXL only supports the round-to-nearest FPCR mode, and it doesn't support 11600 // FP exceptions, so this test has the same results as frintn (and frinti). 11601 SETUP(); 11602 11603 START(); 11604 __ Fmov(s16, 1.0); 11605 __ Fmov(s17, 1.1); 11606 __ Fmov(s18, 1.5); 11607 __ Fmov(s19, 1.9); 11608 __ Fmov(s20, 2.5); 11609 __ Fmov(s21, -1.5); 11610 __ Fmov(s22, -2.5); 11611 __ Fmov(s23, kFP32PositiveInfinity); 11612 __ Fmov(s24, kFP32NegativeInfinity); 11613 __ Fmov(s25, 0.0); 11614 __ Fmov(s26, -0.0); 11615 __ Fmov(s27, -0.2); 11616 11617 __ Frintx(s0, s16); 11618 __ Frintx(s1, s17); 11619 __ Frintx(s2, s18); 11620 __ Frintx(s3, s19); 11621 __ Frintx(s4, s20); 11622 __ Frintx(s5, s21); 11623 __ Frintx(s6, s22); 11624 __ Frintx(s7, s23); 11625 __ Frintx(s8, s24); 11626 __ Frintx(s9, s25); 11627 __ Frintx(s10, s26); 11628 __ Frintx(s11, s27); 11629 11630 __ Fmov(d16, 1.0); 11631 __ Fmov(d17, 1.1); 11632 __ Fmov(d18, 1.5); 11633 __ Fmov(d19, 1.9); 11634 __ Fmov(d20, 2.5); 11635 __ Fmov(d21, -1.5); 11636 __ Fmov(d22, -2.5); 11637 __ Fmov(d23, kFP32PositiveInfinity); 11638 __ Fmov(d24, kFP32NegativeInfinity); 11639 __ Fmov(d25, 0.0); 11640 __ Fmov(d26, -0.0); 11641 __ Fmov(d27, -0.2); 11642 11643 __ Frintx(d12, d16); 11644 __ Frintx(d13, d17); 11645 __ Frintx(d14, d18); 11646 __ Frintx(d15, d19); 11647 __ Frintx(d16, d20); 11648 __ Frintx(d17, d21); 11649 __ Frintx(d18, d22); 11650 __ Frintx(d19, d23); 11651 __ Frintx(d20, d24); 11652 __ Frintx(d21, d25); 11653 __ Frintx(d22, d26); 11654 __ Frintx(d23, d27); 11655 END(); 11656 11657 RUN(); 11658 11659 ASSERT_EQUAL_FP32(1.0, s0); 11660 ASSERT_EQUAL_FP32(1.0, s1); 11661 ASSERT_EQUAL_FP32(2.0, s2); 11662 ASSERT_EQUAL_FP32(2.0, s3); 11663 ASSERT_EQUAL_FP32(2.0, s4); 11664 ASSERT_EQUAL_FP32(-2.0, s5); 11665 ASSERT_EQUAL_FP32(-2.0, s6); 11666 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7); 11667 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8); 11668 ASSERT_EQUAL_FP32(0.0, s9); 11669 ASSERT_EQUAL_FP32(-0.0, s10); 11670 ASSERT_EQUAL_FP32(-0.0, s11); 11671 ASSERT_EQUAL_FP64(1.0, d12); 11672 ASSERT_EQUAL_FP64(1.0, d13); 11673 ASSERT_EQUAL_FP64(2.0, d14); 11674 ASSERT_EQUAL_FP64(2.0, d15); 11675 ASSERT_EQUAL_FP64(2.0, d16); 11676 ASSERT_EQUAL_FP64(-2.0, d17); 11677 ASSERT_EQUAL_FP64(-2.0, d18); 11678 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19); 11679 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20); 11680 ASSERT_EQUAL_FP64(0.0, d21); 11681 ASSERT_EQUAL_FP64(-0.0, d22); 11682 ASSERT_EQUAL_FP64(-0.0, d23); 11683 11684 TEARDOWN(); 11685 } 11686 11687 11688 TEST(frintz) { 11689 SETUP(); 11690 11691 START(); 11692 __ Fmov(s16, 1.0); 11693 __ Fmov(s17, 1.1); 11694 __ Fmov(s18, 1.5); 11695 __ Fmov(s19, 1.9); 11696 __ Fmov(s20, 2.5); 11697 __ Fmov(s21, -1.5); 11698 __ Fmov(s22, -2.5); 11699 __ Fmov(s23, kFP32PositiveInfinity); 11700 __ Fmov(s24, kFP32NegativeInfinity); 11701 __ Fmov(s25, 0.0); 11702 __ Fmov(s26, -0.0); 11703 11704 __ Frintz(s0, s16); 11705 __ Frintz(s1, s17); 11706 __ Frintz(s2, s18); 11707 __ Frintz(s3, s19); 11708 __ Frintz(s4, s20); 11709 __ Frintz(s5, s21); 11710 __ Frintz(s6, s22); 11711 __ Frintz(s7, s23); 11712 __ Frintz(s8, s24); 11713 __ Frintz(s9, s25); 11714 __ Frintz(s10, s26); 11715 11716 __ Fmov(d16, 1.0); 11717 __ Fmov(d17, 1.1); 11718 __ Fmov(d18, 1.5); 11719 __ Fmov(d19, 1.9); 11720 __ Fmov(d20, 2.5); 11721 __ Fmov(d21, -1.5); 11722 __ Fmov(d22, -2.5); 11723 __ Fmov(d23, kFP32PositiveInfinity); 11724 __ Fmov(d24, kFP32NegativeInfinity); 11725 __ Fmov(d25, 0.0); 11726 __ Fmov(d26, -0.0); 11727 11728 __ Frintz(d11, d16); 11729 __ Frintz(d12, d17); 11730 __ Frintz(d13, d18); 11731 __ Frintz(d14, d19); 11732 __ Frintz(d15, d20); 11733 __ Frintz(d16, d21); 11734 __ Frintz(d17, d22); 11735 __ Frintz(d18, d23); 11736 __ Frintz(d19, d24); 11737 __ Frintz(d20, d25); 11738 __ Frintz(d21, d26); 11739 END(); 11740 11741 RUN(); 11742 11743 ASSERT_EQUAL_FP32(1.0, s0); 11744 ASSERT_EQUAL_FP32(1.0, s1); 11745 ASSERT_EQUAL_FP32(1.0, s2); 11746 ASSERT_EQUAL_FP32(1.0, s3); 11747 ASSERT_EQUAL_FP32(2.0, s4); 11748 ASSERT_EQUAL_FP32(-1.0, s5); 11749 ASSERT_EQUAL_FP32(-2.0, s6); 11750 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7); 11751 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8); 11752 ASSERT_EQUAL_FP32(0.0, s9); 11753 ASSERT_EQUAL_FP32(-0.0, s10); 11754 ASSERT_EQUAL_FP64(1.0, d11); 11755 ASSERT_EQUAL_FP64(1.0, d12); 11756 ASSERT_EQUAL_FP64(1.0, d13); 11757 ASSERT_EQUAL_FP64(1.0, d14); 11758 ASSERT_EQUAL_FP64(2.0, d15); 11759 ASSERT_EQUAL_FP64(-1.0, d16); 11760 ASSERT_EQUAL_FP64(-2.0, d17); 11761 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d18); 11762 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d19); 11763 ASSERT_EQUAL_FP64(0.0, d20); 11764 ASSERT_EQUAL_FP64(-0.0, d21); 11765 11766 TEARDOWN(); 11767 } 11768 11769 11770 TEST(fcvt_ds) { 11771 SETUP(); 11772 11773 START(); 11774 __ Fmov(s16, 1.0); 11775 __ Fmov(s17, 1.1); 11776 __ Fmov(s18, 1.5); 11777 __ Fmov(s19, 1.9); 11778 __ Fmov(s20, 2.5); 11779 __ Fmov(s21, -1.5); 11780 __ Fmov(s22, -2.5); 11781 __ Fmov(s23, kFP32PositiveInfinity); 11782 __ Fmov(s24, kFP32NegativeInfinity); 11783 __ Fmov(s25, 0.0); 11784 __ Fmov(s26, -0.0); 11785 __ Fmov(s27, FLT_MAX); 11786 __ Fmov(s28, FLT_MIN); 11787 __ Fmov(s29, RawbitsToFloat(0x7fc12345)); // Quiet NaN. 11788 __ Fmov(s30, RawbitsToFloat(0x7f812345)); // Signalling NaN. 11789 11790 __ Fcvt(d0, s16); 11791 __ Fcvt(d1, s17); 11792 __ Fcvt(d2, s18); 11793 __ Fcvt(d3, s19); 11794 __ Fcvt(d4, s20); 11795 __ Fcvt(d5, s21); 11796 __ Fcvt(d6, s22); 11797 __ Fcvt(d7, s23); 11798 __ Fcvt(d8, s24); 11799 __ Fcvt(d9, s25); 11800 __ Fcvt(d10, s26); 11801 __ Fcvt(d11, s27); 11802 __ Fcvt(d12, s28); 11803 __ Fcvt(d13, s29); 11804 __ Fcvt(d14, s30); 11805 END(); 11806 11807 RUN(); 11808 11809 ASSERT_EQUAL_FP64(1.0f, d0); 11810 ASSERT_EQUAL_FP64(1.1f, d1); 11811 ASSERT_EQUAL_FP64(1.5f, d2); 11812 ASSERT_EQUAL_FP64(1.9f, d3); 11813 ASSERT_EQUAL_FP64(2.5f, d4); 11814 ASSERT_EQUAL_FP64(-1.5f, d5); 11815 ASSERT_EQUAL_FP64(-2.5f, d6); 11816 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d7); 11817 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d8); 11818 ASSERT_EQUAL_FP64(0.0f, d9); 11819 ASSERT_EQUAL_FP64(-0.0f, d10); 11820 ASSERT_EQUAL_FP64(FLT_MAX, d11); 11821 ASSERT_EQUAL_FP64(FLT_MIN, d12); 11822 11823 // Check that the NaN payload is preserved according to Aarch64 conversion 11824 // rules: 11825 // - The sign bit is preserved. 11826 // - The top bit of the mantissa is forced to 1 (making it a quiet NaN). 11827 // - The remaining mantissa bits are copied until they run out. 11828 // - The low-order bits that haven't already been assigned are set to 0. 11829 ASSERT_EQUAL_FP64(RawbitsToDouble(0x7ff82468a0000000), d13); 11830 ASSERT_EQUAL_FP64(RawbitsToDouble(0x7ff82468a0000000), d14); 11831 11832 TEARDOWN(); 11833 } 11834 11835 11836 TEST(fcvt_sd) { 11837 // Test simple conversions here. Complex behaviour (such as rounding 11838 // specifics) are tested in the simulator tests. 11839 11840 SETUP(); 11841 11842 START(); 11843 __ Fmov(d16, 1.0); 11844 __ Fmov(d17, 1.1); 11845 __ Fmov(d18, 1.5); 11846 __ Fmov(d19, 1.9); 11847 __ Fmov(d20, 2.5); 11848 __ Fmov(d21, -1.5); 11849 __ Fmov(d22, -2.5); 11850 __ Fmov(d23, kFP32PositiveInfinity); 11851 __ Fmov(d24, kFP32NegativeInfinity); 11852 __ Fmov(d25, 0.0); 11853 __ Fmov(d26, -0.0); 11854 __ Fmov(d27, FLT_MAX); 11855 __ Fmov(d28, FLT_MIN); 11856 __ Fmov(d29, RawbitsToDouble(0x7ff82468a0000000)); // Quiet NaN. 11857 __ Fmov(d30, RawbitsToDouble(0x7ff02468a0000000)); // Signalling NaN. 11858 11859 __ Fcvt(s0, d16); 11860 __ Fcvt(s1, d17); 11861 __ Fcvt(s2, d18); 11862 __ Fcvt(s3, d19); 11863 __ Fcvt(s4, d20); 11864 __ Fcvt(s5, d21); 11865 __ Fcvt(s6, d22); 11866 __ Fcvt(s7, d23); 11867 __ Fcvt(s8, d24); 11868 __ Fcvt(s9, d25); 11869 __ Fcvt(s10, d26); 11870 __ Fcvt(s11, d27); 11871 __ Fcvt(s12, d28); 11872 __ Fcvt(s13, d29); 11873 __ Fcvt(s14, d30); 11874 END(); 11875 11876 RUN(); 11877 11878 ASSERT_EQUAL_FP32(1.0f, s0); 11879 ASSERT_EQUAL_FP32(1.1f, s1); 11880 ASSERT_EQUAL_FP32(1.5f, s2); 11881 ASSERT_EQUAL_FP32(1.9f, s3); 11882 ASSERT_EQUAL_FP32(2.5f, s4); 11883 ASSERT_EQUAL_FP32(-1.5f, s5); 11884 ASSERT_EQUAL_FP32(-2.5f, s6); 11885 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7); 11886 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8); 11887 ASSERT_EQUAL_FP32(0.0f, s9); 11888 ASSERT_EQUAL_FP32(-0.0f, s10); 11889 ASSERT_EQUAL_FP32(FLT_MAX, s11); 11890 ASSERT_EQUAL_FP32(FLT_MIN, s12); 11891 11892 // Check that the NaN payload is preserved according to Aarch64 conversion 11893 // rules: 11894 // - The sign bit is preserved. 11895 // - The top bit of the mantissa is forced to 1 (making it a quiet NaN). 11896 // - The remaining mantissa bits are copied until they run out. 11897 // - The low-order bits that haven't already been assigned are set to 0. 11898 ASSERT_EQUAL_FP32(RawbitsToFloat(0x7fc12345), s13); 11899 ASSERT_EQUAL_FP32(RawbitsToFloat(0x7fc12345), s14); 11900 11901 TEARDOWN(); 11902 } 11903 11904 11905 TEST(fcvt_half) { 11906 SETUP(); 11907 11908 START(); 11909 Label done; 11910 { 11911 // Check all exact conversions from half to float and back. 11912 Label ok, fail; 11913 __ Mov(w0, 0); 11914 for (int i = 0; i < 0xffff; i += 3) { 11915 if ((i & 0x7c00) == 0x7c00) continue; 11916 __ Mov(w1, i); 11917 __ Fmov(s1, w1); 11918 __ Fcvt(s2, h1); 11919 __ Fcvt(h2, s2); 11920 __ Fmov(w2, s2); 11921 __ Cmp(w1, w2); 11922 __ B(&fail, ne); 11923 } 11924 __ B(&ok); 11925 __ Bind(&fail); 11926 __ Mov(w0, 1); 11927 __ B(&done); 11928 __ Bind(&ok); 11929 } 11930 { 11931 // Check all exact conversions from half to double and back. 11932 Label ok, fail; 11933 for (int i = 0; i < 0xffff; i += 3) { 11934 if ((i & 0x7c00) == 0x7c00) continue; 11935 __ Mov(w1, i); 11936 __ Fmov(s1, w1); 11937 __ Fcvt(d2, h1); 11938 __ Fcvt(h2, d2); 11939 __ Mov(w2, v2.S(), 0); 11940 __ Cmp(w1, w2); 11941 __ B(&fail, ne); 11942 } 11943 __ B(&ok); 11944 __ Bind(&fail); 11945 __ Mov(w0, 2); 11946 __ Bind(&ok); 11947 } 11948 __ Bind(&done); 11949 11950 // Check some other interesting values. 11951 __ Fmov(s0, kFP32PositiveInfinity); 11952 __ Fmov(s1, kFP32NegativeInfinity); 11953 __ Fmov(s2, 65504); // Max half precision. 11954 __ Fmov(s3, 6.10352e-5); // Min positive normal. 11955 __ Fmov(s4, 6.09756e-5); // Max subnormal. 11956 __ Fmov(s5, 5.96046e-8); // Min positive subnormal. 11957 __ Fmov(s6, 5e-9); // Not representable -> zero. 11958 __ Fmov(s7, -0.0); 11959 __ Fcvt(h0, s0); 11960 __ Fcvt(h1, s1); 11961 __ Fcvt(h2, s2); 11962 __ Fcvt(h3, s3); 11963 __ Fcvt(h4, s4); 11964 __ Fcvt(h5, s5); 11965 __ Fcvt(h6, s6); 11966 __ Fcvt(h7, s7); 11967 11968 __ Fmov(d20, kFP64PositiveInfinity); 11969 __ Fmov(d21, kFP64NegativeInfinity); 11970 __ Fmov(d22, 65504); // Max half precision. 11971 __ Fmov(d23, 6.10352e-5); // Min positive normal. 11972 __ Fmov(d24, 6.09756e-5); // Max subnormal. 11973 __ Fmov(d25, 5.96046e-8); // Min positive subnormal. 11974 __ Fmov(d26, 5e-9); // Not representable -> zero. 11975 __ Fmov(d27, -0.0); 11976 __ Fcvt(h20, d20); 11977 __ Fcvt(h21, d21); 11978 __ Fcvt(h22, d22); 11979 __ Fcvt(h23, d23); 11980 __ Fcvt(h24, d24); 11981 __ Fcvt(h25, d25); 11982 __ Fcvt(h26, d26); 11983 __ Fcvt(h27, d27); 11984 END(); 11985 11986 RUN(); 11987 11988 ASSERT_EQUAL_32(0, w0); // 1 => float failed, 2 => double failed. 11989 ASSERT_EQUAL_128(0, kFP16PositiveInfinity, q0); 11990 ASSERT_EQUAL_128(0, kFP16NegativeInfinity, q1); 11991 ASSERT_EQUAL_128(0, 0x7bff, q2); 11992 ASSERT_EQUAL_128(0, 0x0400, q3); 11993 ASSERT_EQUAL_128(0, 0x03ff, q4); 11994 ASSERT_EQUAL_128(0, 0x0001, q5); 11995 ASSERT_EQUAL_128(0, 0, q6); 11996 ASSERT_EQUAL_128(0, 0x8000, q7); 11997 ASSERT_EQUAL_128(0, kFP16PositiveInfinity, q20); 11998 ASSERT_EQUAL_128(0, kFP16NegativeInfinity, q21); 11999 ASSERT_EQUAL_128(0, 0x7bff, q22); 12000 ASSERT_EQUAL_128(0, 0x0400, q23); 12001 ASSERT_EQUAL_128(0, 0x03ff, q24); 12002 ASSERT_EQUAL_128(0, 0x0001, q25); 12003 ASSERT_EQUAL_128(0, 0, q26); 12004 ASSERT_EQUAL_128(0, 0x8000, q27); 12005 TEARDOWN(); 12006 } 12007 12008 12009 TEST(fcvtas) { 12010 SETUP(); 12011 12012 START(); 12013 __ Fmov(s0, 1.0); 12014 __ Fmov(s1, 1.1); 12015 __ Fmov(s2, 2.5); 12016 __ Fmov(s3, -2.5); 12017 __ Fmov(s4, kFP32PositiveInfinity); 12018 __ Fmov(s5, kFP32NegativeInfinity); 12019 __ Fmov(s6, 0x7fffff80); // Largest float < INT32_MAX. 12020 __ Fneg(s7, s6); // Smallest float > INT32_MIN. 12021 __ Fmov(d8, 1.0); 12022 __ Fmov(d9, 1.1); 12023 __ Fmov(d10, 2.5); 12024 __ Fmov(d11, -2.5); 12025 __ Fmov(d12, kFP64PositiveInfinity); 12026 __ Fmov(d13, kFP64NegativeInfinity); 12027 __ Fmov(d14, kWMaxInt - 1); 12028 __ Fmov(d15, kWMinInt + 1); 12029 __ Fmov(s17, 1.1); 12030 __ Fmov(s18, 2.5); 12031 __ Fmov(s19, -2.5); 12032 __ Fmov(s20, kFP32PositiveInfinity); 12033 __ Fmov(s21, kFP32NegativeInfinity); 12034 __ Fmov(s22, 0x7fffff8000000000); // Largest float < INT64_MAX. 12035 __ Fneg(s23, s22); // Smallest float > INT64_MIN. 12036 __ Fmov(d24, 1.1); 12037 __ Fmov(d25, 2.5); 12038 __ Fmov(d26, -2.5); 12039 __ Fmov(d27, kFP64PositiveInfinity); 12040 __ Fmov(d28, kFP64NegativeInfinity); 12041 __ Fmov(d29, 0x7ffffffffffffc00); // Largest double < INT64_MAX. 12042 __ Fneg(d30, d29); // Smallest double > INT64_MIN. 12043 12044 __ Fcvtas(w0, s0); 12045 __ Fcvtas(w1, s1); 12046 __ Fcvtas(w2, s2); 12047 __ Fcvtas(w3, s3); 12048 __ Fcvtas(w4, s4); 12049 __ Fcvtas(w5, s5); 12050 __ Fcvtas(w6, s6); 12051 __ Fcvtas(w7, s7); 12052 __ Fcvtas(w8, d8); 12053 __ Fcvtas(w9, d9); 12054 __ Fcvtas(w10, d10); 12055 __ Fcvtas(w11, d11); 12056 __ Fcvtas(w12, d12); 12057 __ Fcvtas(w13, d13); 12058 __ Fcvtas(w14, d14); 12059 __ Fcvtas(w15, d15); 12060 __ Fcvtas(x17, s17); 12061 __ Fcvtas(x18, s18); 12062 __ Fcvtas(x19, s19); 12063 __ Fcvtas(x20, s20); 12064 __ Fcvtas(x21, s21); 12065 __ Fcvtas(x22, s22); 12066 __ Fcvtas(x23, s23); 12067 __ Fcvtas(x24, d24); 12068 __ Fcvtas(x25, d25); 12069 __ Fcvtas(x26, d26); 12070 __ Fcvtas(x27, d27); 12071 __ Fcvtas(x28, d28); 12072 __ Fcvtas(x29, d29); 12073 __ Fcvtas(x30, d30); 12074 END(); 12075 12076 RUN(); 12077 12078 ASSERT_EQUAL_64(1, x0); 12079 ASSERT_EQUAL_64(1, x1); 12080 ASSERT_EQUAL_64(3, x2); 12081 ASSERT_EQUAL_64(0xfffffffd, x3); 12082 ASSERT_EQUAL_64(0x7fffffff, x4); 12083 ASSERT_EQUAL_64(0x80000000, x5); 12084 ASSERT_EQUAL_64(0x7fffff80, x6); 12085 ASSERT_EQUAL_64(0x80000080, x7); 12086 ASSERT_EQUAL_64(1, x8); 12087 ASSERT_EQUAL_64(1, x9); 12088 ASSERT_EQUAL_64(3, x10); 12089 ASSERT_EQUAL_64(0xfffffffd, x11); 12090 ASSERT_EQUAL_64(0x7fffffff, x12); 12091 ASSERT_EQUAL_64(0x80000000, x13); 12092 ASSERT_EQUAL_64(0x7ffffffe, x14); 12093 ASSERT_EQUAL_64(0x80000001, x15); 12094 ASSERT_EQUAL_64(1, x17); 12095 ASSERT_EQUAL_64(3, x18); 12096 ASSERT_EQUAL_64(0xfffffffffffffffd, x19); 12097 ASSERT_EQUAL_64(0x7fffffffffffffff, x20); 12098 ASSERT_EQUAL_64(0x8000000000000000, x21); 12099 ASSERT_EQUAL_64(0x7fffff8000000000, x22); 12100 ASSERT_EQUAL_64(0x8000008000000000, x23); 12101 ASSERT_EQUAL_64(1, x24); 12102 ASSERT_EQUAL_64(3, x25); 12103 ASSERT_EQUAL_64(0xfffffffffffffffd, x26); 12104 ASSERT_EQUAL_64(0x7fffffffffffffff, x27); 12105 ASSERT_EQUAL_64(0x8000000000000000, x28); 12106 ASSERT_EQUAL_64(0x7ffffffffffffc00, x29); 12107 ASSERT_EQUAL_64(0x8000000000000400, x30); 12108 12109 TEARDOWN(); 12110 } 12111 12112 12113 TEST(fcvtau) { 12114 SETUP(); 12115 12116 START(); 12117 __ Fmov(s0, 1.0); 12118 __ Fmov(s1, 1.1); 12119 __ Fmov(s2, 2.5); 12120 __ Fmov(s3, -2.5); 12121 __ Fmov(s4, kFP32PositiveInfinity); 12122 __ Fmov(s5, kFP32NegativeInfinity); 12123 __ Fmov(s6, 0xffffff00); // Largest float < UINT32_MAX. 12124 __ Fmov(d8, 1.0); 12125 __ Fmov(d9, 1.1); 12126 __ Fmov(d10, 2.5); 12127 __ Fmov(d11, -2.5); 12128 __ Fmov(d12, kFP64PositiveInfinity); 12129 __ Fmov(d13, kFP64NegativeInfinity); 12130 __ Fmov(d14, 0xfffffffe); 12131 __ Fmov(s16, 1.0); 12132 __ Fmov(s17, 1.1); 12133 __ Fmov(s18, 2.5); 12134 __ Fmov(s19, -2.5); 12135 __ Fmov(s20, kFP32PositiveInfinity); 12136 __ Fmov(s21, kFP32NegativeInfinity); 12137 __ Fmov(s22, 0xffffff0000000000); // Largest float < UINT64_MAX. 12138 __ Fmov(d24, 1.1); 12139 __ Fmov(d25, 2.5); 12140 __ Fmov(d26, -2.5); 12141 __ Fmov(d27, kFP64PositiveInfinity); 12142 __ Fmov(d28, kFP64NegativeInfinity); 12143 __ Fmov(d29, 0xfffffffffffff800); // Largest double < UINT64_MAX. 12144 __ Fmov(s30, 0x100000000); 12145 12146 __ Fcvtau(w0, s0); 12147 __ Fcvtau(w1, s1); 12148 __ Fcvtau(w2, s2); 12149 __ Fcvtau(w3, s3); 12150 __ Fcvtau(w4, s4); 12151 __ Fcvtau(w5, s5); 12152 __ Fcvtau(w6, s6); 12153 __ Fcvtau(w8, d8); 12154 __ Fcvtau(w9, d9); 12155 __ Fcvtau(w10, d10); 12156 __ Fcvtau(w11, d11); 12157 __ Fcvtau(w12, d12); 12158 __ Fcvtau(w13, d13); 12159 __ Fcvtau(w14, d14); 12160 __ Fcvtau(w15, d15); 12161 __ Fcvtau(x16, s16); 12162 __ Fcvtau(x17, s17); 12163 __ Fcvtau(x18, s18); 12164 __ Fcvtau(x19, s19); 12165 __ Fcvtau(x20, s20); 12166 __ Fcvtau(x21, s21); 12167 __ Fcvtau(x22, s22); 12168 __ Fcvtau(x24, d24); 12169 __ Fcvtau(x25, d25); 12170 __ Fcvtau(x26, d26); 12171 __ Fcvtau(x27, d27); 12172 __ Fcvtau(x28, d28); 12173 __ Fcvtau(x29, d29); 12174 __ Fcvtau(w30, s30); 12175 END(); 12176 12177 RUN(); 12178 12179 ASSERT_EQUAL_64(1, x0); 12180 ASSERT_EQUAL_64(1, x1); 12181 ASSERT_EQUAL_64(3, x2); 12182 ASSERT_EQUAL_64(0, x3); 12183 ASSERT_EQUAL_64(0xffffffff, x4); 12184 ASSERT_EQUAL_64(0, x5); 12185 ASSERT_EQUAL_64(0xffffff00, x6); 12186 ASSERT_EQUAL_64(1, x8); 12187 ASSERT_EQUAL_64(1, x9); 12188 ASSERT_EQUAL_64(3, x10); 12189 ASSERT_EQUAL_64(0, x11); 12190 ASSERT_EQUAL_64(0xffffffff, x12); 12191 ASSERT_EQUAL_64(0, x13); 12192 ASSERT_EQUAL_64(0xfffffffe, x14); 12193 ASSERT_EQUAL_64(1, x16); 12194 ASSERT_EQUAL_64(1, x17); 12195 ASSERT_EQUAL_64(3, x18); 12196 ASSERT_EQUAL_64(0, x19); 12197 ASSERT_EQUAL_64(0xffffffffffffffff, x20); 12198 ASSERT_EQUAL_64(0, x21); 12199 ASSERT_EQUAL_64(0xffffff0000000000, x22); 12200 ASSERT_EQUAL_64(1, x24); 12201 ASSERT_EQUAL_64(3, x25); 12202 ASSERT_EQUAL_64(0, x26); 12203 ASSERT_EQUAL_64(0xffffffffffffffff, x27); 12204 ASSERT_EQUAL_64(0, x28); 12205 ASSERT_EQUAL_64(0xfffffffffffff800, x29); 12206 ASSERT_EQUAL_64(0xffffffff, x30); 12207 12208 TEARDOWN(); 12209 } 12210 12211 12212 TEST(fcvtms) { 12213 SETUP(); 12214 12215 START(); 12216 __ Fmov(s0, 1.0); 12217 __ Fmov(s1, 1.1); 12218 __ Fmov(s2, 1.5); 12219 __ Fmov(s3, -1.5); 12220 __ Fmov(s4, kFP32PositiveInfinity); 12221 __ Fmov(s5, kFP32NegativeInfinity); 12222 __ Fmov(s6, 0x7fffff80); // Largest float < INT32_MAX. 12223 __ Fneg(s7, s6); // Smallest float > INT32_MIN. 12224 __ Fmov(d8, 1.0); 12225 __ Fmov(d9, 1.1); 12226 __ Fmov(d10, 1.5); 12227 __ Fmov(d11, -1.5); 12228 __ Fmov(d12, kFP64PositiveInfinity); 12229 __ Fmov(d13, kFP64NegativeInfinity); 12230 __ Fmov(d14, kWMaxInt - 1); 12231 __ Fmov(d15, kWMinInt + 1); 12232 __ Fmov(s17, 1.1); 12233 __ Fmov(s18, 1.5); 12234 __ Fmov(s19, -1.5); 12235 __ Fmov(s20, kFP32PositiveInfinity); 12236 __ Fmov(s21, kFP32NegativeInfinity); 12237 __ Fmov(s22, 0x7fffff8000000000); // Largest float < INT64_MAX. 12238 __ Fneg(s23, s22); // Smallest float > INT64_MIN. 12239 __ Fmov(d24, 1.1); 12240 __ Fmov(d25, 1.5); 12241 __ Fmov(d26, -1.5); 12242 __ Fmov(d27, kFP64PositiveInfinity); 12243 __ Fmov(d28, kFP64NegativeInfinity); 12244 __ Fmov(d29, 0x7ffffffffffffc00); // Largest double < INT64_MAX. 12245 __ Fneg(d30, d29); // Smallest double > INT64_MIN. 12246 12247 __ Fcvtms(w0, s0); 12248 __ Fcvtms(w1, s1); 12249 __ Fcvtms(w2, s2); 12250 __ Fcvtms(w3, s3); 12251 __ Fcvtms(w4, s4); 12252 __ Fcvtms(w5, s5); 12253 __ Fcvtms(w6, s6); 12254 __ Fcvtms(w7, s7); 12255 __ Fcvtms(w8, d8); 12256 __ Fcvtms(w9, d9); 12257 __ Fcvtms(w10, d10); 12258 __ Fcvtms(w11, d11); 12259 __ Fcvtms(w12, d12); 12260 __ Fcvtms(w13, d13); 12261 __ Fcvtms(w14, d14); 12262 __ Fcvtms(w15, d15); 12263 __ Fcvtms(x17, s17); 12264 __ Fcvtms(x18, s18); 12265 __ Fcvtms(x19, s19); 12266 __ Fcvtms(x20, s20); 12267 __ Fcvtms(x21, s21); 12268 __ Fcvtms(x22, s22); 12269 __ Fcvtms(x23, s23); 12270 __ Fcvtms(x24, d24); 12271 __ Fcvtms(x25, d25); 12272 __ Fcvtms(x26, d26); 12273 __ Fcvtms(x27, d27); 12274 __ Fcvtms(x28, d28); 12275 __ Fcvtms(x29, d29); 12276 __ Fcvtms(x30, d30); 12277 END(); 12278 12279 RUN(); 12280 12281 ASSERT_EQUAL_64(1, x0); 12282 ASSERT_EQUAL_64(1, x1); 12283 ASSERT_EQUAL_64(1, x2); 12284 ASSERT_EQUAL_64(0xfffffffe, x3); 12285 ASSERT_EQUAL_64(0x7fffffff, x4); 12286 ASSERT_EQUAL_64(0x80000000, x5); 12287 ASSERT_EQUAL_64(0x7fffff80, x6); 12288 ASSERT_EQUAL_64(0x80000080, x7); 12289 ASSERT_EQUAL_64(1, x8); 12290 ASSERT_EQUAL_64(1, x9); 12291 ASSERT_EQUAL_64(1, x10); 12292 ASSERT_EQUAL_64(0xfffffffe, x11); 12293 ASSERT_EQUAL_64(0x7fffffff, x12); 12294 ASSERT_EQUAL_64(0x80000000, x13); 12295 ASSERT_EQUAL_64(0x7ffffffe, x14); 12296 ASSERT_EQUAL_64(0x80000001, x15); 12297 ASSERT_EQUAL_64(1, x17); 12298 ASSERT_EQUAL_64(1, x18); 12299 ASSERT_EQUAL_64(0xfffffffffffffffe, x19); 12300 ASSERT_EQUAL_64(0x7fffffffffffffff, x20); 12301 ASSERT_EQUAL_64(0x8000000000000000, x21); 12302 ASSERT_EQUAL_64(0x7fffff8000000000, x22); 12303 ASSERT_EQUAL_64(0x8000008000000000, x23); 12304 ASSERT_EQUAL_64(1, x24); 12305 ASSERT_EQUAL_64(1, x25); 12306 ASSERT_EQUAL_64(0xfffffffffffffffe, x26); 12307 ASSERT_EQUAL_64(0x7fffffffffffffff, x27); 12308 ASSERT_EQUAL_64(0x8000000000000000, x28); 12309 ASSERT_EQUAL_64(0x7ffffffffffffc00, x29); 12310 ASSERT_EQUAL_64(0x8000000000000400, x30); 12311 12312 TEARDOWN(); 12313 } 12314 12315 12316 TEST(fcvtmu) { 12317 SETUP(); 12318 12319 START(); 12320 __ Fmov(s0, 1.0); 12321 __ Fmov(s1, 1.1); 12322 __ Fmov(s2, 1.5); 12323 __ Fmov(s3, -1.5); 12324 __ Fmov(s4, kFP32PositiveInfinity); 12325 __ Fmov(s5, kFP32NegativeInfinity); 12326 __ Fmov(s6, 0x7fffff80); // Largest float < INT32_MAX. 12327 __ Fneg(s7, s6); // Smallest float > INT32_MIN. 12328 __ Fmov(d8, 1.0); 12329 __ Fmov(d9, 1.1); 12330 __ Fmov(d10, 1.5); 12331 __ Fmov(d11, -1.5); 12332 __ Fmov(d12, kFP64PositiveInfinity); 12333 __ Fmov(d13, kFP64NegativeInfinity); 12334 __ Fmov(d14, kWMaxInt - 1); 12335 __ Fmov(d15, kWMinInt + 1); 12336 __ Fmov(s17, 1.1); 12337 __ Fmov(s18, 1.5); 12338 __ Fmov(s19, -1.5); 12339 __ Fmov(s20, kFP32PositiveInfinity); 12340 __ Fmov(s21, kFP32NegativeInfinity); 12341 __ Fmov(s22, 0x7fffff8000000000); // Largest float < INT64_MAX. 12342 __ Fneg(s23, s22); // Smallest float > INT64_MIN. 12343 __ Fmov(d24, 1.1); 12344 __ Fmov(d25, 1.5); 12345 __ Fmov(d26, -1.5); 12346 __ Fmov(d27, kFP64PositiveInfinity); 12347 __ Fmov(d28, kFP64NegativeInfinity); 12348 __ Fmov(d29, 0x7ffffffffffffc00); // Largest double < INT64_MAX. 12349 __ Fneg(d30, d29); // Smallest double > INT64_MIN. 12350 12351 __ Fcvtmu(w0, s0); 12352 __ Fcvtmu(w1, s1); 12353 __ Fcvtmu(w2, s2); 12354 __ Fcvtmu(w3, s3); 12355 __ Fcvtmu(w4, s4); 12356 __ Fcvtmu(w5, s5); 12357 __ Fcvtmu(w6, s6); 12358 __ Fcvtmu(w7, s7); 12359 __ Fcvtmu(w8, d8); 12360 __ Fcvtmu(w9, d9); 12361 __ Fcvtmu(w10, d10); 12362 __ Fcvtmu(w11, d11); 12363 __ Fcvtmu(w12, d12); 12364 __ Fcvtmu(w13, d13); 12365 __ Fcvtmu(w14, d14); 12366 __ Fcvtmu(x17, s17); 12367 __ Fcvtmu(x18, s18); 12368 __ Fcvtmu(x19, s19); 12369 __ Fcvtmu(x20, s20); 12370 __ Fcvtmu(x21, s21); 12371 __ Fcvtmu(x22, s22); 12372 __ Fcvtmu(x23, s23); 12373 __ Fcvtmu(x24, d24); 12374 __ Fcvtmu(x25, d25); 12375 __ Fcvtmu(x26, d26); 12376 __ Fcvtmu(x27, d27); 12377 __ Fcvtmu(x28, d28); 12378 __ Fcvtmu(x29, d29); 12379 __ Fcvtmu(x30, d30); 12380 END(); 12381 12382 RUN(); 12383 12384 ASSERT_EQUAL_64(1, x0); 12385 ASSERT_EQUAL_64(1, x1); 12386 ASSERT_EQUAL_64(1, x2); 12387 ASSERT_EQUAL_64(0, x3); 12388 ASSERT_EQUAL_64(0xffffffff, x4); 12389 ASSERT_EQUAL_64(0, x5); 12390 ASSERT_EQUAL_64(0x7fffff80, x6); 12391 ASSERT_EQUAL_64(0, x7); 12392 ASSERT_EQUAL_64(1, x8); 12393 ASSERT_EQUAL_64(1, x9); 12394 ASSERT_EQUAL_64(1, x10); 12395 ASSERT_EQUAL_64(0, x11); 12396 ASSERT_EQUAL_64(0xffffffff, x12); 12397 ASSERT_EQUAL_64(0, x13); 12398 ASSERT_EQUAL_64(0x7ffffffe, x14); 12399 ASSERT_EQUAL_64(1, x17); 12400 ASSERT_EQUAL_64(1, x18); 12401 ASSERT_EQUAL_64(0, x19); 12402 ASSERT_EQUAL_64(0xffffffffffffffff, x20); 12403 ASSERT_EQUAL_64(0, x21); 12404 ASSERT_EQUAL_64(0x7fffff8000000000, x22); 12405 ASSERT_EQUAL_64(0, x23); 12406 ASSERT_EQUAL_64(1, x24); 12407 ASSERT_EQUAL_64(1, x25); 12408 ASSERT_EQUAL_64(0, x26); 12409 ASSERT_EQUAL_64(0xffffffffffffffff, x27); 12410 ASSERT_EQUAL_64(0, x28); 12411 ASSERT_EQUAL_64(0x7ffffffffffffc00, x29); 12412 ASSERT_EQUAL_64(0, x30); 12413 12414 TEARDOWN(); 12415 } 12416 12417 12418 TEST(fcvtns) { 12419 SETUP(); 12420 12421 START(); 12422 __ Fmov(s0, 1.0); 12423 __ Fmov(s1, 1.1); 12424 __ Fmov(s2, 1.5); 12425 __ Fmov(s3, -1.5); 12426 __ Fmov(s4, kFP32PositiveInfinity); 12427 __ Fmov(s5, kFP32NegativeInfinity); 12428 __ Fmov(s6, 0x7fffff80); // Largest float < INT32_MAX. 12429 __ Fneg(s7, s6); // Smallest float > INT32_MIN. 12430 __ Fmov(d8, 1.0); 12431 __ Fmov(d9, 1.1); 12432 __ Fmov(d10, 1.5); 12433 __ Fmov(d11, -1.5); 12434 __ Fmov(d12, kFP64PositiveInfinity); 12435 __ Fmov(d13, kFP64NegativeInfinity); 12436 __ Fmov(d14, kWMaxInt - 1); 12437 __ Fmov(d15, kWMinInt + 1); 12438 __ Fmov(s17, 1.1); 12439 __ Fmov(s18, 1.5); 12440 __ Fmov(s19, -1.5); 12441 __ Fmov(s20, kFP32PositiveInfinity); 12442 __ Fmov(s21, kFP32NegativeInfinity); 12443 __ Fmov(s22, 0x7fffff8000000000); // Largest float < INT64_MAX. 12444 __ Fneg(s23, s22); // Smallest float > INT64_MIN. 12445 __ Fmov(d24, 1.1); 12446 __ Fmov(d25, 1.5); 12447 __ Fmov(d26, -1.5); 12448 __ Fmov(d27, kFP64PositiveInfinity); 12449 __ Fmov(d28, kFP64NegativeInfinity); 12450 __ Fmov(d29, 0x7ffffffffffffc00); // Largest double < INT64_MAX. 12451 __ Fneg(d30, d29); // Smallest double > INT64_MIN. 12452 12453 __ Fcvtns(w0, s0); 12454 __ Fcvtns(w1, s1); 12455 __ Fcvtns(w2, s2); 12456 __ Fcvtns(w3, s3); 12457 __ Fcvtns(w4, s4); 12458 __ Fcvtns(w5, s5); 12459 __ Fcvtns(w6, s6); 12460 __ Fcvtns(w7, s7); 12461 __ Fcvtns(w8, d8); 12462 __ Fcvtns(w9, d9); 12463 __ Fcvtns(w10, d10); 12464 __ Fcvtns(w11, d11); 12465 __ Fcvtns(w12, d12); 12466 __ Fcvtns(w13, d13); 12467 __ Fcvtns(w14, d14); 12468 __ Fcvtns(w15, d15); 12469 __ Fcvtns(x17, s17); 12470 __ Fcvtns(x18, s18); 12471 __ Fcvtns(x19, s19); 12472 __ Fcvtns(x20, s20); 12473 __ Fcvtns(x21, s21); 12474 __ Fcvtns(x22, s22); 12475 __ Fcvtns(x23, s23); 12476 __ Fcvtns(x24, d24); 12477 __ Fcvtns(x25, d25); 12478 __ Fcvtns(x26, d26); 12479 __ Fcvtns(x27, d27); 12480 __ Fcvtns(x28, d28); 12481 __ Fcvtns(x29, d29); 12482 __ Fcvtns(x30, d30); 12483 END(); 12484 12485 RUN(); 12486 12487 ASSERT_EQUAL_64(1, x0); 12488 ASSERT_EQUAL_64(1, x1); 12489 ASSERT_EQUAL_64(2, x2); 12490 ASSERT_EQUAL_64(0xfffffffe, x3); 12491 ASSERT_EQUAL_64(0x7fffffff, x4); 12492 ASSERT_EQUAL_64(0x80000000, x5); 12493 ASSERT_EQUAL_64(0x7fffff80, x6); 12494 ASSERT_EQUAL_64(0x80000080, x7); 12495 ASSERT_EQUAL_64(1, x8); 12496 ASSERT_EQUAL_64(1, x9); 12497 ASSERT_EQUAL_64(2, x10); 12498 ASSERT_EQUAL_64(0xfffffffe, x11); 12499 ASSERT_EQUAL_64(0x7fffffff, x12); 12500 ASSERT_EQUAL_64(0x80000000, x13); 12501 ASSERT_EQUAL_64(0x7ffffffe, x14); 12502 ASSERT_EQUAL_64(0x80000001, x15); 12503 ASSERT_EQUAL_64(1, x17); 12504 ASSERT_EQUAL_64(2, x18); 12505 ASSERT_EQUAL_64(0xfffffffffffffffe, x19); 12506 ASSERT_EQUAL_64(0x7fffffffffffffff, x20); 12507 ASSERT_EQUAL_64(0x8000000000000000, x21); 12508 ASSERT_EQUAL_64(0x7fffff8000000000, x22); 12509 ASSERT_EQUAL_64(0x8000008000000000, x23); 12510 ASSERT_EQUAL_64(1, x24); 12511 ASSERT_EQUAL_64(2, x25); 12512 ASSERT_EQUAL_64(0xfffffffffffffffe, x26); 12513 ASSERT_EQUAL_64(0x7fffffffffffffff, x27); 12514 ASSERT_EQUAL_64(0x8000000000000000, x28); 12515 ASSERT_EQUAL_64(0x7ffffffffffffc00, x29); 12516 ASSERT_EQUAL_64(0x8000000000000400, x30); 12517 12518 TEARDOWN(); 12519 } 12520 12521 12522 TEST(fcvtnu) { 12523 SETUP(); 12524 12525 START(); 12526 __ Fmov(s0, 1.0); 12527 __ Fmov(s1, 1.1); 12528 __ Fmov(s2, 1.5); 12529 __ Fmov(s3, -1.5); 12530 __ Fmov(s4, kFP32PositiveInfinity); 12531 __ Fmov(s5, kFP32NegativeInfinity); 12532 __ Fmov(s6, 0xffffff00); // Largest float < UINT32_MAX. 12533 __ Fmov(d8, 1.0); 12534 __ Fmov(d9, 1.1); 12535 __ Fmov(d10, 1.5); 12536 __ Fmov(d11, -1.5); 12537 __ Fmov(d12, kFP64PositiveInfinity); 12538 __ Fmov(d13, kFP64NegativeInfinity); 12539 __ Fmov(d14, 0xfffffffe); 12540 __ Fmov(s16, 1.0); 12541 __ Fmov(s17, 1.1); 12542 __ Fmov(s18, 1.5); 12543 __ Fmov(s19, -1.5); 12544 __ Fmov(s20, kFP32PositiveInfinity); 12545 __ Fmov(s21, kFP32NegativeInfinity); 12546 __ Fmov(s22, 0xffffff0000000000); // Largest float < UINT64_MAX. 12547 __ Fmov(d24, 1.1); 12548 __ Fmov(d25, 1.5); 12549 __ Fmov(d26, -1.5); 12550 __ Fmov(d27, kFP64PositiveInfinity); 12551 __ Fmov(d28, kFP64NegativeInfinity); 12552 __ Fmov(d29, 0xfffffffffffff800); // Largest double < UINT64_MAX. 12553 __ Fmov(s30, 0x100000000); 12554 12555 __ Fcvtnu(w0, s0); 12556 __ Fcvtnu(w1, s1); 12557 __ Fcvtnu(w2, s2); 12558 __ Fcvtnu(w3, s3); 12559 __ Fcvtnu(w4, s4); 12560 __ Fcvtnu(w5, s5); 12561 __ Fcvtnu(w6, s6); 12562 __ Fcvtnu(w8, d8); 12563 __ Fcvtnu(w9, d9); 12564 __ Fcvtnu(w10, d10); 12565 __ Fcvtnu(w11, d11); 12566 __ Fcvtnu(w12, d12); 12567 __ Fcvtnu(w13, d13); 12568 __ Fcvtnu(w14, d14); 12569 __ Fcvtnu(w15, d15); 12570 __ Fcvtnu(x16, s16); 12571 __ Fcvtnu(x17, s17); 12572 __ Fcvtnu(x18, s18); 12573 __ Fcvtnu(x19, s19); 12574 __ Fcvtnu(x20, s20); 12575 __ Fcvtnu(x21, s21); 12576 __ Fcvtnu(x22, s22); 12577 __ Fcvtnu(x24, d24); 12578 __ Fcvtnu(x25, d25); 12579 __ Fcvtnu(x26, d26); 12580 __ Fcvtnu(x27, d27); 12581 __ Fcvtnu(x28, d28); 12582 __ Fcvtnu(x29, d29); 12583 __ Fcvtnu(w30, s30); 12584 END(); 12585 12586 RUN(); 12587 12588 ASSERT_EQUAL_64(1, x0); 12589 ASSERT_EQUAL_64(1, x1); 12590 ASSERT_EQUAL_64(2, x2); 12591 ASSERT_EQUAL_64(0, x3); 12592 ASSERT_EQUAL_64(0xffffffff, x4); 12593 ASSERT_EQUAL_64(0, x5); 12594 ASSERT_EQUAL_64(0xffffff00, x6); 12595 ASSERT_EQUAL_64(1, x8); 12596 ASSERT_EQUAL_64(1, x9); 12597 ASSERT_EQUAL_64(2, x10); 12598 ASSERT_EQUAL_64(0, x11); 12599 ASSERT_EQUAL_64(0xffffffff, x12); 12600 ASSERT_EQUAL_64(0, x13); 12601 ASSERT_EQUAL_64(0xfffffffe, x14); 12602 ASSERT_EQUAL_64(1, x16); 12603 ASSERT_EQUAL_64(1, x17); 12604 ASSERT_EQUAL_64(2, x18); 12605 ASSERT_EQUAL_64(0, x19); 12606 ASSERT_EQUAL_64(0xffffffffffffffff, x20); 12607 ASSERT_EQUAL_64(0, x21); 12608 ASSERT_EQUAL_64(0xffffff0000000000, x22); 12609 ASSERT_EQUAL_64(1, x24); 12610 ASSERT_EQUAL_64(2, x25); 12611 ASSERT_EQUAL_64(0, x26); 12612 ASSERT_EQUAL_64(0xffffffffffffffff, x27); 12613 ASSERT_EQUAL_64(0, x28); 12614 ASSERT_EQUAL_64(0xfffffffffffff800, x29); 12615 ASSERT_EQUAL_64(0xffffffff, x30); 12616 12617 TEARDOWN(); 12618 } 12619 12620 12621 TEST(fcvtzs) { 12622 SETUP(); 12623 12624 START(); 12625 __ Fmov(s0, 1.0); 12626 __ Fmov(s1, 1.1); 12627 __ Fmov(s2, 1.5); 12628 __ Fmov(s3, -1.5); 12629 __ Fmov(s4, kFP32PositiveInfinity); 12630 __ Fmov(s5, kFP32NegativeInfinity); 12631 __ Fmov(s6, 0x7fffff80); // Largest float < INT32_MAX. 12632 __ Fneg(s7, s6); // Smallest float > INT32_MIN. 12633 __ Fmov(d8, 1.0); 12634 __ Fmov(d9, 1.1); 12635 __ Fmov(d10, 1.5); 12636 __ Fmov(d11, -1.5); 12637 __ Fmov(d12, kFP64PositiveInfinity); 12638 __ Fmov(d13, kFP64NegativeInfinity); 12639 __ Fmov(d14, kWMaxInt - 1); 12640 __ Fmov(d15, kWMinInt + 1); 12641 __ Fmov(s17, 1.1); 12642 __ Fmov(s18, 1.5); 12643 __ Fmov(s19, -1.5); 12644 __ Fmov(s20, kFP32PositiveInfinity); 12645 __ Fmov(s21, kFP32NegativeInfinity); 12646 __ Fmov(s22, 0x7fffff8000000000); // Largest float < INT64_MAX. 12647 __ Fneg(s23, s22); // Smallest float > INT64_MIN. 12648 __ Fmov(d24, 1.1); 12649 __ Fmov(d25, 1.5); 12650 __ Fmov(d26, -1.5); 12651 __ Fmov(d27, kFP64PositiveInfinity); 12652 __ Fmov(d28, kFP64NegativeInfinity); 12653 __ Fmov(d29, 0x7ffffffffffffc00); // Largest double < INT64_MAX. 12654 __ Fneg(d30, d29); // Smallest double > INT64_MIN. 12655 12656 __ Fcvtzs(w0, s0); 12657 __ Fcvtzs(w1, s1); 12658 __ Fcvtzs(w2, s2); 12659 __ Fcvtzs(w3, s3); 12660 __ Fcvtzs(w4, s4); 12661 __ Fcvtzs(w5, s5); 12662 __ Fcvtzs(w6, s6); 12663 __ Fcvtzs(w7, s7); 12664 __ Fcvtzs(w8, d8); 12665 __ Fcvtzs(w9, d9); 12666 __ Fcvtzs(w10, d10); 12667 __ Fcvtzs(w11, d11); 12668 __ Fcvtzs(w12, d12); 12669 __ Fcvtzs(w13, d13); 12670 __ Fcvtzs(w14, d14); 12671 __ Fcvtzs(w15, d15); 12672 __ Fcvtzs(x17, s17); 12673 __ Fcvtzs(x18, s18); 12674 __ Fcvtzs(x19, s19); 12675 __ Fcvtzs(x20, s20); 12676 __ Fcvtzs(x21, s21); 12677 __ Fcvtzs(x22, s22); 12678 __ Fcvtzs(x23, s23); 12679 __ Fcvtzs(x24, d24); 12680 __ Fcvtzs(x25, d25); 12681 __ Fcvtzs(x26, d26); 12682 __ Fcvtzs(x27, d27); 12683 __ Fcvtzs(x28, d28); 12684 __ Fcvtzs(x29, d29); 12685 __ Fcvtzs(x30, d30); 12686 END(); 12687 12688 RUN(); 12689 12690 ASSERT_EQUAL_64(1, x0); 12691 ASSERT_EQUAL_64(1, x1); 12692 ASSERT_EQUAL_64(1, x2); 12693 ASSERT_EQUAL_64(0xffffffff, x3); 12694 ASSERT_EQUAL_64(0x7fffffff, x4); 12695 ASSERT_EQUAL_64(0x80000000, x5); 12696 ASSERT_EQUAL_64(0x7fffff80, x6); 12697 ASSERT_EQUAL_64(0x80000080, x7); 12698 ASSERT_EQUAL_64(1, x8); 12699 ASSERT_EQUAL_64(1, x9); 12700 ASSERT_EQUAL_64(1, x10); 12701 ASSERT_EQUAL_64(0xffffffff, x11); 12702 ASSERT_EQUAL_64(0x7fffffff, x12); 12703 ASSERT_EQUAL_64(0x80000000, x13); 12704 ASSERT_EQUAL_64(0x7ffffffe, x14); 12705 ASSERT_EQUAL_64(0x80000001, x15); 12706 ASSERT_EQUAL_64(1, x17); 12707 ASSERT_EQUAL_64(1, x18); 12708 ASSERT_EQUAL_64(0xffffffffffffffff, x19); 12709 ASSERT_EQUAL_64(0x7fffffffffffffff, x20); 12710 ASSERT_EQUAL_64(0x8000000000000000, x21); 12711 ASSERT_EQUAL_64(0x7fffff8000000000, x22); 12712 ASSERT_EQUAL_64(0x8000008000000000, x23); 12713 ASSERT_EQUAL_64(1, x24); 12714 ASSERT_EQUAL_64(1, x25); 12715 ASSERT_EQUAL_64(0xffffffffffffffff, x26); 12716 ASSERT_EQUAL_64(0x7fffffffffffffff, x27); 12717 ASSERT_EQUAL_64(0x8000000000000000, x28); 12718 ASSERT_EQUAL_64(0x7ffffffffffffc00, x29); 12719 ASSERT_EQUAL_64(0x8000000000000400, x30); 12720 12721 TEARDOWN(); 12722 } 12723 12724 TEST(fcvtzu) { 12725 SETUP(); 12726 12727 START(); 12728 __ Fmov(s0, 1.0); 12729 __ Fmov(s1, 1.1); 12730 __ Fmov(s2, 1.5); 12731 __ Fmov(s3, -1.5); 12732 __ Fmov(s4, kFP32PositiveInfinity); 12733 __ Fmov(s5, kFP32NegativeInfinity); 12734 __ Fmov(s6, 0x7fffff80); // Largest float < INT32_MAX. 12735 __ Fneg(s7, s6); // Smallest float > INT32_MIN. 12736 __ Fmov(d8, 1.0); 12737 __ Fmov(d9, 1.1); 12738 __ Fmov(d10, 1.5); 12739 __ Fmov(d11, -1.5); 12740 __ Fmov(d12, kFP64PositiveInfinity); 12741 __ Fmov(d13, kFP64NegativeInfinity); 12742 __ Fmov(d14, kWMaxInt - 1); 12743 __ Fmov(d15, kWMinInt + 1); 12744 __ Fmov(s17, 1.1); 12745 __ Fmov(s18, 1.5); 12746 __ Fmov(s19, -1.5); 12747 __ Fmov(s20, kFP32PositiveInfinity); 12748 __ Fmov(s21, kFP32NegativeInfinity); 12749 __ Fmov(s22, 0x7fffff8000000000); // Largest float < INT64_MAX. 12750 __ Fneg(s23, s22); // Smallest float > INT64_MIN. 12751 __ Fmov(d24, 1.1); 12752 __ Fmov(d25, 1.5); 12753 __ Fmov(d26, -1.5); 12754 __ Fmov(d27, kFP64PositiveInfinity); 12755 __ Fmov(d28, kFP64NegativeInfinity); 12756 __ Fmov(d29, 0x7ffffffffffffc00); // Largest double < INT64_MAX. 12757 __ Fneg(d30, d29); // Smallest double > INT64_MIN. 12758 12759 __ Fcvtzu(w0, s0); 12760 __ Fcvtzu(w1, s1); 12761 __ Fcvtzu(w2, s2); 12762 __ Fcvtzu(w3, s3); 12763 __ Fcvtzu(w4, s4); 12764 __ Fcvtzu(w5, s5); 12765 __ Fcvtzu(w6, s6); 12766 __ Fcvtzu(w7, s7); 12767 __ Fcvtzu(w8, d8); 12768 __ Fcvtzu(w9, d9); 12769 __ Fcvtzu(w10, d10); 12770 __ Fcvtzu(w11, d11); 12771 __ Fcvtzu(w12, d12); 12772 __ Fcvtzu(w13, d13); 12773 __ Fcvtzu(w14, d14); 12774 __ Fcvtzu(x17, s17); 12775 __ Fcvtzu(x18, s18); 12776 __ Fcvtzu(x19, s19); 12777 __ Fcvtzu(x20, s20); 12778 __ Fcvtzu(x21, s21); 12779 __ Fcvtzu(x22, s22); 12780 __ Fcvtzu(x23, s23); 12781 __ Fcvtzu(x24, d24); 12782 __ Fcvtzu(x25, d25); 12783 __ Fcvtzu(x26, d26); 12784 __ Fcvtzu(x27, d27); 12785 __ Fcvtzu(x28, d28); 12786 __ Fcvtzu(x29, d29); 12787 __ Fcvtzu(x30, d30); 12788 END(); 12789 12790 RUN(); 12791 12792 ASSERT_EQUAL_64(1, x0); 12793 ASSERT_EQUAL_64(1, x1); 12794 ASSERT_EQUAL_64(1, x2); 12795 ASSERT_EQUAL_64(0, x3); 12796 ASSERT_EQUAL_64(0xffffffff, x4); 12797 ASSERT_EQUAL_64(0, x5); 12798 ASSERT_EQUAL_64(0x7fffff80, x6); 12799 ASSERT_EQUAL_64(0, x7); 12800 ASSERT_EQUAL_64(1, x8); 12801 ASSERT_EQUAL_64(1, x9); 12802 ASSERT_EQUAL_64(1, x10); 12803 ASSERT_EQUAL_64(0, x11); 12804 ASSERT_EQUAL_64(0xffffffff, x12); 12805 ASSERT_EQUAL_64(0, x13); 12806 ASSERT_EQUAL_64(0x7ffffffe, x14); 12807 ASSERT_EQUAL_64(1, x17); 12808 ASSERT_EQUAL_64(1, x18); 12809 ASSERT_EQUAL_64(0, x19); 12810 ASSERT_EQUAL_64(0xffffffffffffffff, x20); 12811 ASSERT_EQUAL_64(0, x21); 12812 ASSERT_EQUAL_64(0x7fffff8000000000, x22); 12813 ASSERT_EQUAL_64(0, x23); 12814 ASSERT_EQUAL_64(1, x24); 12815 ASSERT_EQUAL_64(1, x25); 12816 ASSERT_EQUAL_64(0, x26); 12817 ASSERT_EQUAL_64(0xffffffffffffffff, x27); 12818 ASSERT_EQUAL_64(0, x28); 12819 ASSERT_EQUAL_64(0x7ffffffffffffc00, x29); 12820 ASSERT_EQUAL_64(0, x30); 12821 12822 TEARDOWN(); 12823 } 12824 12825 12826 TEST(neon_fcvtl) { 12827 SETUP(); 12828 12829 START(); 12830 12831 __ Movi(v0.V2D(), 0x000080007efffeff, 0x3100b1007c00fc00); 12832 __ Movi(v1.V2D(), 0x03ff83ff00038003, 0x000180017c01fc01); 12833 __ Movi(v2.V2D(), 0x3e200000be200000, 0x7f800000ff800000); 12834 __ Movi(v3.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff); 12835 __ Movi(v4.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001); 12836 __ Fcvtl(v16.V4S(), v0.V4H()); 12837 __ Fcvtl2(v17.V4S(), v0.V8H()); 12838 __ Fcvtl(v18.V4S(), v1.V4H()); 12839 __ Fcvtl2(v19.V4S(), v1.V8H()); 12840 12841 __ Fcvtl(v20.V2D(), v2.V2S()); 12842 __ Fcvtl2(v21.V2D(), v2.V4S()); 12843 __ Fcvtl(v22.V2D(), v3.V2S()); 12844 __ Fcvtl2(v23.V2D(), v3.V4S()); 12845 __ Fcvtl(v24.V2D(), v4.V2S()); 12846 __ Fcvtl2(v25.V2D(), v4.V4S()); 12847 12848 END(); 12849 12850 RUN(); 12851 ASSERT_EQUAL_128(0x3e200000be200000, 0x7f800000ff800000, q16); 12852 ASSERT_EQUAL_128(0x0000000080000000, 0x7fdfe000ffdfe000, q17); 12853 ASSERT_EQUAL_128(0x33800000b3800000, 0x7fc02000ffc02000, q18); 12854 ASSERT_EQUAL_128(0x387fc000b87fc000, 0x34400000b4400000, q19); 12855 ASSERT_EQUAL_128(0x7ff0000000000000, 0xfff0000000000000, q20); 12856 ASSERT_EQUAL_128(0x3fc4000000000000, 0xbfc4000000000000, q21); 12857 ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q22); 12858 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000000, q23); 12859 ASSERT_EQUAL_128(0x36a0000000000000, 0xb6a0000000000000, q24); 12860 ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q25); 12861 TEARDOWN(); 12862 } 12863 12864 12865 TEST(neon_fcvtn) { 12866 SETUP(); 12867 12868 START(); 12869 12870 __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000); 12871 __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff); 12872 __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001); 12873 __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000); 12874 __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000); 12875 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000); 12876 __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff); 12877 __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff); 12878 __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001); 12879 12880 __ Fcvtn(v16.V4H(), v0.V4S()); 12881 __ Fcvtn2(v16.V8H(), v1.V4S()); 12882 __ Fcvtn(v17.V4H(), v2.V4S()); 12883 __ Fcvtn(v18.V2S(), v3.V2D()); 12884 __ Fcvtn2(v18.V4S(), v4.V2D()); 12885 __ Fcvtn(v19.V2S(), v5.V2D()); 12886 __ Fcvtn2(v19.V4S(), v6.V2D()); 12887 __ Fcvtn(v20.V2S(), v7.V2D()); 12888 __ Fcvtn2(v20.V4S(), v8.V2D()); 12889 END(); 12890 12891 RUN(); 12892 ASSERT_EQUAL_128(0x000080007e7ffe7f, 0x3100b1007c00fc00, q16); 12893 ASSERT_EQUAL_64(0x7e7ffe7f00008000, d17); 12894 ASSERT_EQUAL_128(0x7f800000ff800000, 0x3e200000be200000, q18); 12895 ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x0000000080000000, q19); 12896 ASSERT_EQUAL_128(0x0000000080000000, 0x7fc7ffffffc7ffff, q20); 12897 TEARDOWN(); 12898 } 12899 12900 12901 TEST(neon_fcvtxn) { 12902 SETUP(); 12903 12904 START(); 12905 __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000); 12906 __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff); 12907 __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001); 12908 __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000); 12909 __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000); 12910 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000); 12911 __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff); 12912 __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff); 12913 __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001); 12914 __ Movi(v9.V2D(), 0x41ed000000000000, 0x41efffffffefffff); 12915 __ Fcvtxn(v16.V2S(), v0.V2D()); 12916 __ Fcvtxn2(v16.V4S(), v1.V2D()); 12917 __ Fcvtxn(v17.V2S(), v2.V2D()); 12918 __ Fcvtxn2(v17.V4S(), v3.V2D()); 12919 __ Fcvtxn(v18.V2S(), v4.V2D()); 12920 __ Fcvtxn2(v18.V4S(), v5.V2D()); 12921 __ Fcvtxn(v19.V2S(), v6.V2D()); 12922 __ Fcvtxn2(v19.V4S(), v7.V2D()); 12923 __ Fcvtxn(v20.V2S(), v8.V2D()); 12924 __ Fcvtxn2(v20.V4S(), v9.V2D()); 12925 __ Fcvtxn(s21, d0); 12926 END(); 12927 12928 RUN(); 12929 ASSERT_EQUAL_128(0x000000017f7fffff, 0x310000057f7fffff, q16); 12930 ASSERT_EQUAL_128(0x3e200000be200000, 0x7f7fffff00000001, q17); 12931 ASSERT_EQUAL_128(0x0000000080000000, 0x7f800000ff800000, q18); 12932 ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x7fc7ffffffc7ffff, q19); 12933 ASSERT_EQUAL_128(0x4f6800004f7fffff, 0x0000000180000001, q20); 12934 ASSERT_EQUAL_128(0, 0x7f7fffff, q21); 12935 TEARDOWN(); 12936 } 12937 12938 12939 // Test that scvtf and ucvtf can convert the 64-bit input into the expected 12940 // value. All possible values of 'fbits' are tested. The expected value is 12941 // modified accordingly in each case. 12942 // 12943 // The expected value is specified as the bit encoding of the expected double 12944 // produced by scvtf (expected_scvtf_bits) as well as ucvtf 12945 // (expected_ucvtf_bits). 12946 // 12947 // Where the input value is representable by int32_t or uint32_t, conversions 12948 // from W registers will also be tested. 12949 static void TestUScvtfHelper(uint64_t in, 12950 uint64_t expected_scvtf_bits, 12951 uint64_t expected_ucvtf_bits) { 12952 uint64_t u64 = in; 12953 uint32_t u32 = u64 & 0xffffffff; 12954 int64_t s64 = static_cast<int64_t>(in); 12955 int32_t s32 = s64 & 0x7fffffff; 12956 12957 bool cvtf_s32 = (s64 == s32); 12958 bool cvtf_u32 = (u64 == u32); 12959 12960 double results_scvtf_x[65]; 12961 double results_ucvtf_x[65]; 12962 double results_scvtf_w[33]; 12963 double results_ucvtf_w[33]; 12964 12965 SETUP(); 12966 START(); 12967 12968 __ Mov(x0, reinterpret_cast<uintptr_t>(results_scvtf_x)); 12969 __ Mov(x1, reinterpret_cast<uintptr_t>(results_ucvtf_x)); 12970 __ Mov(x2, reinterpret_cast<uintptr_t>(results_scvtf_w)); 12971 __ Mov(x3, reinterpret_cast<uintptr_t>(results_ucvtf_w)); 12972 12973 __ Mov(x10, s64); 12974 12975 // Corrupt the top word, in case it is accidentally used during W-register 12976 // conversions. 12977 __ Mov(x11, 0x5555555555555555); 12978 __ Bfi(x11, x10, 0, kWRegSize); 12979 12980 // Test integer conversions. 12981 __ Scvtf(d0, x10); 12982 __ Ucvtf(d1, x10); 12983 __ Scvtf(d2, w11); 12984 __ Ucvtf(d3, w11); 12985 __ Str(d0, MemOperand(x0)); 12986 __ Str(d1, MemOperand(x1)); 12987 __ Str(d2, MemOperand(x2)); 12988 __ Str(d3, MemOperand(x3)); 12989 12990 // Test all possible values of fbits. 12991 for (int fbits = 1; fbits <= 32; fbits++) { 12992 __ Scvtf(d0, x10, fbits); 12993 __ Ucvtf(d1, x10, fbits); 12994 __ Scvtf(d2, w11, fbits); 12995 __ Ucvtf(d3, w11, fbits); 12996 __ Str(d0, MemOperand(x0, fbits * kDRegSizeInBytes)); 12997 __ Str(d1, MemOperand(x1, fbits * kDRegSizeInBytes)); 12998 __ Str(d2, MemOperand(x2, fbits * kDRegSizeInBytes)); 12999 __ Str(d3, MemOperand(x3, fbits * kDRegSizeInBytes)); 13000 } 13001 13002 // Conversions from W registers can only handle fbits values <= 32, so just 13003 // test conversions from X registers for 32 < fbits <= 64. 13004 for (int fbits = 33; fbits <= 64; fbits++) { 13005 __ Scvtf(d0, x10, fbits); 13006 __ Ucvtf(d1, x10, fbits); 13007 __ Str(d0, MemOperand(x0, fbits * kDRegSizeInBytes)); 13008 __ Str(d1, MemOperand(x1, fbits * kDRegSizeInBytes)); 13009 } 13010 13011 END(); 13012 RUN(); 13013 13014 // Check the results. 13015 double expected_scvtf_base = RawbitsToDouble(expected_scvtf_bits); 13016 double expected_ucvtf_base = RawbitsToDouble(expected_ucvtf_bits); 13017 13018 for (int fbits = 0; fbits <= 32; fbits++) { 13019 double expected_scvtf = expected_scvtf_base / std::pow(2, fbits); 13020 double expected_ucvtf = expected_ucvtf_base / std::pow(2, fbits); 13021 ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_x[fbits]); 13022 ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_x[fbits]); 13023 if (cvtf_s32) ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_w[fbits]); 13024 if (cvtf_u32) ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_w[fbits]); 13025 } 13026 for (int fbits = 33; fbits <= 64; fbits++) { 13027 double expected_scvtf = expected_scvtf_base / std::pow(2, fbits); 13028 double expected_ucvtf = expected_ucvtf_base / std::pow(2, fbits); 13029 ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_x[fbits]); 13030 ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_x[fbits]); 13031 } 13032 13033 TEARDOWN(); 13034 } 13035 13036 13037 TEST(scvtf_ucvtf_double) { 13038 // Simple conversions of positive numbers which require no rounding; the 13039 // results should not depened on the rounding mode, and ucvtf and scvtf should 13040 // produce the same result. 13041 TestUScvtfHelper(0x0000000000000000, 0x0000000000000000, 0x0000000000000000); 13042 TestUScvtfHelper(0x0000000000000001, 0x3ff0000000000000, 0x3ff0000000000000); 13043 TestUScvtfHelper(0x0000000040000000, 0x41d0000000000000, 0x41d0000000000000); 13044 TestUScvtfHelper(0x0000000100000000, 0x41f0000000000000, 0x41f0000000000000); 13045 TestUScvtfHelper(0x4000000000000000, 0x43d0000000000000, 0x43d0000000000000); 13046 // Test mantissa extremities. 13047 TestUScvtfHelper(0x4000000000000400, 0x43d0000000000001, 0x43d0000000000001); 13048 // The largest int32_t that fits in a double. 13049 TestUScvtfHelper(0x000000007fffffff, 0x41dfffffffc00000, 0x41dfffffffc00000); 13050 // Values that would be negative if treated as an int32_t. 13051 TestUScvtfHelper(0x00000000ffffffff, 0x41efffffffe00000, 0x41efffffffe00000); 13052 TestUScvtfHelper(0x0000000080000000, 0x41e0000000000000, 0x41e0000000000000); 13053 TestUScvtfHelper(0x0000000080000001, 0x41e0000000200000, 0x41e0000000200000); 13054 // The largest int64_t that fits in a double. 13055 TestUScvtfHelper(0x7ffffffffffffc00, 0x43dfffffffffffff, 0x43dfffffffffffff); 13056 // Check for bit pattern reproduction. 13057 TestUScvtfHelper(0x0123456789abcde0, 0x43723456789abcde, 0x43723456789abcde); 13058 TestUScvtfHelper(0x0000000012345678, 0x41b2345678000000, 0x41b2345678000000); 13059 13060 // Simple conversions of negative int64_t values. These require no rounding, 13061 // and the results should not depend on the rounding mode. 13062 TestUScvtfHelper(0xffffffffc0000000, 0xc1d0000000000000, 0x43effffffff80000); 13063 TestUScvtfHelper(0xffffffff00000000, 0xc1f0000000000000, 0x43efffffffe00000); 13064 TestUScvtfHelper(0xc000000000000000, 0xc3d0000000000000, 0x43e8000000000000); 13065 13066 // Conversions which require rounding. 13067 TestUScvtfHelper(0x1000000000000000, 0x43b0000000000000, 0x43b0000000000000); 13068 TestUScvtfHelper(0x1000000000000001, 0x43b0000000000000, 0x43b0000000000000); 13069 TestUScvtfHelper(0x1000000000000080, 0x43b0000000000000, 0x43b0000000000000); 13070 TestUScvtfHelper(0x1000000000000081, 0x43b0000000000001, 0x43b0000000000001); 13071 TestUScvtfHelper(0x1000000000000100, 0x43b0000000000001, 0x43b0000000000001); 13072 TestUScvtfHelper(0x1000000000000101, 0x43b0000000000001, 0x43b0000000000001); 13073 TestUScvtfHelper(0x1000000000000180, 0x43b0000000000002, 0x43b0000000000002); 13074 TestUScvtfHelper(0x1000000000000181, 0x43b0000000000002, 0x43b0000000000002); 13075 TestUScvtfHelper(0x1000000000000200, 0x43b0000000000002, 0x43b0000000000002); 13076 TestUScvtfHelper(0x1000000000000201, 0x43b0000000000002, 0x43b0000000000002); 13077 TestUScvtfHelper(0x1000000000000280, 0x43b0000000000002, 0x43b0000000000002); 13078 TestUScvtfHelper(0x1000000000000281, 0x43b0000000000003, 0x43b0000000000003); 13079 TestUScvtfHelper(0x1000000000000300, 0x43b0000000000003, 0x43b0000000000003); 13080 // Check rounding of negative int64_t values (and large uint64_t values). 13081 TestUScvtfHelper(0x8000000000000000, 0xc3e0000000000000, 0x43e0000000000000); 13082 TestUScvtfHelper(0x8000000000000001, 0xc3e0000000000000, 0x43e0000000000000); 13083 TestUScvtfHelper(0x8000000000000200, 0xc3e0000000000000, 0x43e0000000000000); 13084 TestUScvtfHelper(0x8000000000000201, 0xc3dfffffffffffff, 0x43e0000000000000); 13085 TestUScvtfHelper(0x8000000000000400, 0xc3dfffffffffffff, 0x43e0000000000000); 13086 TestUScvtfHelper(0x8000000000000401, 0xc3dfffffffffffff, 0x43e0000000000001); 13087 TestUScvtfHelper(0x8000000000000600, 0xc3dffffffffffffe, 0x43e0000000000001); 13088 TestUScvtfHelper(0x8000000000000601, 0xc3dffffffffffffe, 0x43e0000000000001); 13089 TestUScvtfHelper(0x8000000000000800, 0xc3dffffffffffffe, 0x43e0000000000001); 13090 TestUScvtfHelper(0x8000000000000801, 0xc3dffffffffffffe, 0x43e0000000000001); 13091 TestUScvtfHelper(0x8000000000000a00, 0xc3dffffffffffffe, 0x43e0000000000001); 13092 TestUScvtfHelper(0x8000000000000a01, 0xc3dffffffffffffd, 0x43e0000000000001); 13093 TestUScvtfHelper(0x8000000000000c00, 0xc3dffffffffffffd, 0x43e0000000000002); 13094 // Round up to produce a result that's too big for the input to represent. 13095 TestUScvtfHelper(0x7ffffffffffffe00, 0x43e0000000000000, 0x43e0000000000000); 13096 TestUScvtfHelper(0x7fffffffffffffff, 0x43e0000000000000, 0x43e0000000000000); 13097 TestUScvtfHelper(0xfffffffffffffc00, 0xc090000000000000, 0x43f0000000000000); 13098 TestUScvtfHelper(0xffffffffffffffff, 0xbff0000000000000, 0x43f0000000000000); 13099 } 13100 13101 13102 // The same as TestUScvtfHelper, but convert to floats. 13103 static void TestUScvtf32Helper(uint64_t in, 13104 uint32_t expected_scvtf_bits, 13105 uint32_t expected_ucvtf_bits) { 13106 uint64_t u64 = in; 13107 uint32_t u32 = u64 & 0xffffffff; 13108 int64_t s64 = static_cast<int64_t>(in); 13109 int32_t s32 = s64 & 0x7fffffff; 13110 13111 bool cvtf_s32 = (s64 == s32); 13112 bool cvtf_u32 = (u64 == u32); 13113 13114 float results_scvtf_x[65]; 13115 float results_ucvtf_x[65]; 13116 float results_scvtf_w[33]; 13117 float results_ucvtf_w[33]; 13118 13119 SETUP(); 13120 START(); 13121 13122 __ Mov(x0, reinterpret_cast<uintptr_t>(results_scvtf_x)); 13123 __ Mov(x1, reinterpret_cast<uintptr_t>(results_ucvtf_x)); 13124 __ Mov(x2, reinterpret_cast<uintptr_t>(results_scvtf_w)); 13125 __ Mov(x3, reinterpret_cast<uintptr_t>(results_ucvtf_w)); 13126 13127 __ Mov(x10, s64); 13128 13129 // Corrupt the top word, in case it is accidentally used during W-register 13130 // conversions. 13131 __ Mov(x11, 0x5555555555555555); 13132 __ Bfi(x11, x10, 0, kWRegSize); 13133 13134 // Test integer conversions. 13135 __ Scvtf(s0, x10); 13136 __ Ucvtf(s1, x10); 13137 __ Scvtf(s2, w11); 13138 __ Ucvtf(s3, w11); 13139 __ Str(s0, MemOperand(x0)); 13140 __ Str(s1, MemOperand(x1)); 13141 __ Str(s2, MemOperand(x2)); 13142 __ Str(s3, MemOperand(x3)); 13143 13144 // Test all possible values of fbits. 13145 for (int fbits = 1; fbits <= 32; fbits++) { 13146 __ Scvtf(s0, x10, fbits); 13147 __ Ucvtf(s1, x10, fbits); 13148 __ Scvtf(s2, w11, fbits); 13149 __ Ucvtf(s3, w11, fbits); 13150 __ Str(s0, MemOperand(x0, fbits * kSRegSizeInBytes)); 13151 __ Str(s1, MemOperand(x1, fbits * kSRegSizeInBytes)); 13152 __ Str(s2, MemOperand(x2, fbits * kSRegSizeInBytes)); 13153 __ Str(s3, MemOperand(x3, fbits * kSRegSizeInBytes)); 13154 } 13155 13156 // Conversions from W registers can only handle fbits values <= 32, so just 13157 // test conversions from X registers for 32 < fbits <= 64. 13158 for (int fbits = 33; fbits <= 64; fbits++) { 13159 __ Scvtf(s0, x10, fbits); 13160 __ Ucvtf(s1, x10, fbits); 13161 __ Str(s0, MemOperand(x0, fbits * kSRegSizeInBytes)); 13162 __ Str(s1, MemOperand(x1, fbits * kSRegSizeInBytes)); 13163 } 13164 13165 END(); 13166 RUN(); 13167 13168 // Check the results. 13169 float expected_scvtf_base = RawbitsToFloat(expected_scvtf_bits); 13170 float expected_ucvtf_base = RawbitsToFloat(expected_ucvtf_bits); 13171 13172 for (int fbits = 0; fbits <= 32; fbits++) { 13173 float expected_scvtf = expected_scvtf_base / std::pow(2.0f, fbits); 13174 float expected_ucvtf = expected_ucvtf_base / std::pow(2.0f, fbits); 13175 ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_x[fbits]); 13176 ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_x[fbits]); 13177 if (cvtf_s32) ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_w[fbits]); 13178 if (cvtf_u32) ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_w[fbits]); 13179 } 13180 for (int fbits = 33; fbits <= 64; fbits++) { 13181 float expected_scvtf = expected_scvtf_base / std::pow(2.0f, fbits); 13182 float expected_ucvtf = expected_ucvtf_base / std::pow(2.0f, fbits); 13183 ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_x[fbits]); 13184 ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_x[fbits]); 13185 } 13186 13187 TEARDOWN(); 13188 } 13189 13190 13191 TEST(scvtf_ucvtf_float) { 13192 // Simple conversions of positive numbers which require no rounding; the 13193 // results should not depened on the rounding mode, and ucvtf and scvtf should 13194 // produce the same result. 13195 TestUScvtf32Helper(0x0000000000000000, 0x00000000, 0x00000000); 13196 TestUScvtf32Helper(0x0000000000000001, 0x3f800000, 0x3f800000); 13197 TestUScvtf32Helper(0x0000000040000000, 0x4e800000, 0x4e800000); 13198 TestUScvtf32Helper(0x0000000100000000, 0x4f800000, 0x4f800000); 13199 TestUScvtf32Helper(0x4000000000000000, 0x5e800000, 0x5e800000); 13200 // Test mantissa extremities. 13201 TestUScvtf32Helper(0x0000000000800001, 0x4b000001, 0x4b000001); 13202 TestUScvtf32Helper(0x4000008000000000, 0x5e800001, 0x5e800001); 13203 // The largest int32_t that fits in a float. 13204 TestUScvtf32Helper(0x000000007fffff80, 0x4effffff, 0x4effffff); 13205 // Values that would be negative if treated as an int32_t. 13206 TestUScvtf32Helper(0x00000000ffffff00, 0x4f7fffff, 0x4f7fffff); 13207 TestUScvtf32Helper(0x0000000080000000, 0x4f000000, 0x4f000000); 13208 TestUScvtf32Helper(0x0000000080000100, 0x4f000001, 0x4f000001); 13209 // The largest int64_t that fits in a float. 13210 TestUScvtf32Helper(0x7fffff8000000000, 0x5effffff, 0x5effffff); 13211 // Check for bit pattern reproduction. 13212 TestUScvtf32Helper(0x0000000000876543, 0x4b076543, 0x4b076543); 13213 13214 // Simple conversions of negative int64_t values. These require no rounding, 13215 // and the results should not depend on the rounding mode. 13216 TestUScvtf32Helper(0xfffffc0000000000, 0xd4800000, 0x5f7ffffc); 13217 TestUScvtf32Helper(0xc000000000000000, 0xde800000, 0x5f400000); 13218 13219 // Conversions which require rounding. 13220 TestUScvtf32Helper(0x0000800000000000, 0x57000000, 0x57000000); 13221 TestUScvtf32Helper(0x0000800000000001, 0x57000000, 0x57000000); 13222 TestUScvtf32Helper(0x0000800000800000, 0x57000000, 0x57000000); 13223 TestUScvtf32Helper(0x0000800000800001, 0x57000001, 0x57000001); 13224 TestUScvtf32Helper(0x0000800001000000, 0x57000001, 0x57000001); 13225 TestUScvtf32Helper(0x0000800001000001, 0x57000001, 0x57000001); 13226 TestUScvtf32Helper(0x0000800001800000, 0x57000002, 0x57000002); 13227 TestUScvtf32Helper(0x0000800001800001, 0x57000002, 0x57000002); 13228 TestUScvtf32Helper(0x0000800002000000, 0x57000002, 0x57000002); 13229 TestUScvtf32Helper(0x0000800002000001, 0x57000002, 0x57000002); 13230 TestUScvtf32Helper(0x0000800002800000, 0x57000002, 0x57000002); 13231 TestUScvtf32Helper(0x0000800002800001, 0x57000003, 0x57000003); 13232 TestUScvtf32Helper(0x0000800003000000, 0x57000003, 0x57000003); 13233 // Check rounding of negative int64_t values (and large uint64_t values). 13234 TestUScvtf32Helper(0x8000000000000000, 0xdf000000, 0x5f000000); 13235 TestUScvtf32Helper(0x8000000000000001, 0xdf000000, 0x5f000000); 13236 TestUScvtf32Helper(0x8000004000000000, 0xdf000000, 0x5f000000); 13237 TestUScvtf32Helper(0x8000004000000001, 0xdeffffff, 0x5f000000); 13238 TestUScvtf32Helper(0x8000008000000000, 0xdeffffff, 0x5f000000); 13239 TestUScvtf32Helper(0x8000008000000001, 0xdeffffff, 0x5f000001); 13240 TestUScvtf32Helper(0x800000c000000000, 0xdefffffe, 0x5f000001); 13241 TestUScvtf32Helper(0x800000c000000001, 0xdefffffe, 0x5f000001); 13242 TestUScvtf32Helper(0x8000010000000000, 0xdefffffe, 0x5f000001); 13243 TestUScvtf32Helper(0x8000010000000001, 0xdefffffe, 0x5f000001); 13244 TestUScvtf32Helper(0x8000014000000000, 0xdefffffe, 0x5f000001); 13245 TestUScvtf32Helper(0x8000014000000001, 0xdefffffd, 0x5f000001); 13246 TestUScvtf32Helper(0x8000018000000000, 0xdefffffd, 0x5f000002); 13247 // Round up to produce a result that's too big for the input to represent. 13248 TestUScvtf32Helper(0x000000007fffffc0, 0x4f000000, 0x4f000000); 13249 TestUScvtf32Helper(0x000000007fffffff, 0x4f000000, 0x4f000000); 13250 TestUScvtf32Helper(0x00000000ffffff80, 0x4f800000, 0x4f800000); 13251 TestUScvtf32Helper(0x00000000ffffffff, 0x4f800000, 0x4f800000); 13252 TestUScvtf32Helper(0x7fffffc000000000, 0x5f000000, 0x5f000000); 13253 TestUScvtf32Helper(0x7fffffffffffffff, 0x5f000000, 0x5f000000); 13254 TestUScvtf32Helper(0xffffff8000000000, 0xd3000000, 0x5f800000); 13255 TestUScvtf32Helper(0xffffffffffffffff, 0xbf800000, 0x5f800000); 13256 } 13257 13258 13259 TEST(system_mrs) { 13260 SETUP(); 13261 13262 START(); 13263 __ Mov(w0, 0); 13264 __ Mov(w1, 1); 13265 __ Mov(w2, 0x80000000); 13266 13267 // Set the Z and C flags. 13268 __ Cmp(w0, w0); 13269 __ Mrs(x3, NZCV); 13270 13271 // Set the N flag. 13272 __ Cmp(w0, w1); 13273 __ Mrs(x4, NZCV); 13274 13275 // Set the Z, C and V flags. 13276 __ Adds(w0, w2, w2); 13277 __ Mrs(x5, NZCV); 13278 13279 // Read the default FPCR. 13280 __ Mrs(x6, FPCR); 13281 END(); 13282 13283 RUN(); 13284 13285 // NZCV 13286 ASSERT_EQUAL_32(ZCFlag, w3); 13287 ASSERT_EQUAL_32(NFlag, w4); 13288 ASSERT_EQUAL_32(ZCVFlag, w5); 13289 13290 // FPCR 13291 // The default FPCR on Linux-based platforms is 0. 13292 ASSERT_EQUAL_32(0, w6); 13293 13294 TEARDOWN(); 13295 } 13296 13297 13298 TEST(system_msr) { 13299 // All FPCR fields that must be implemented: AHP, DN, FZ, RMode 13300 const uint64_t fpcr_core = 0x07c00000; 13301 13302 // All FPCR fields (including fields which may be read-as-zero): 13303 // Stride, Len 13304 // IDE, IXE, UFE, OFE, DZE, IOE 13305 const uint64_t fpcr_all = fpcr_core | 0x00379f00; 13306 13307 SETUP(); 13308 13309 START(); 13310 __ Mov(w0, 0); 13311 __ Mov(w1, 0x7fffffff); 13312 13313 __ Mov(x7, 0); 13314 13315 __ Mov(x10, NVFlag); 13316 __ Cmp(w0, w0); // Set Z and C. 13317 __ Msr(NZCV, x10); // Set N and V. 13318 // The Msr should have overwritten every flag set by the Cmp. 13319 __ Cinc(x7, x7, mi); // N 13320 __ Cinc(x7, x7, ne); // !Z 13321 __ Cinc(x7, x7, lo); // !C 13322 __ Cinc(x7, x7, vs); // V 13323 13324 __ Mov(x10, ZCFlag); 13325 __ Cmn(w1, w1); // Set N and V. 13326 __ Msr(NZCV, x10); // Set Z and C. 13327 // The Msr should have overwritten every flag set by the Cmn. 13328 __ Cinc(x7, x7, pl); // !N 13329 __ Cinc(x7, x7, eq); // Z 13330 __ Cinc(x7, x7, hs); // C 13331 __ Cinc(x7, x7, vc); // !V 13332 13333 // All core FPCR fields must be writable. 13334 __ Mov(x8, fpcr_core); 13335 __ Msr(FPCR, x8); 13336 __ Mrs(x8, FPCR); 13337 13338 // All FPCR fields, including optional ones. This part of the test doesn't 13339 // achieve much other than ensuring that supported fields can be cleared by 13340 // the next test. 13341 __ Mov(x9, fpcr_all); 13342 __ Msr(FPCR, x9); 13343 __ Mrs(x9, FPCR); 13344 __ And(x9, x9, fpcr_core); 13345 13346 // The undefined bits must ignore writes. 13347 // It's conceivable that a future version of the architecture could use these 13348 // fields (making this test fail), but in the meantime this is a useful test 13349 // for the simulator. 13350 __ Mov(x10, ~fpcr_all); 13351 __ Msr(FPCR, x10); 13352 __ Mrs(x10, FPCR); 13353 13354 END(); 13355 13356 RUN(); 13357 13358 // We should have incremented x7 (from 0) exactly 8 times. 13359 ASSERT_EQUAL_64(8, x7); 13360 13361 ASSERT_EQUAL_64(fpcr_core, x8); 13362 ASSERT_EQUAL_64(fpcr_core, x9); 13363 ASSERT_EQUAL_64(0, x10); 13364 13365 TEARDOWN(); 13366 } 13367 13368 13369 TEST(system_nop) { 13370 SETUP(); 13371 RegisterDump before; 13372 13373 START(); 13374 before.Dump(&masm); 13375 __ Nop(); 13376 END(); 13377 13378 RUN(); 13379 13380 ASSERT_EQUAL_REGISTERS(before); 13381 ASSERT_EQUAL_NZCV(before.flags_nzcv()); 13382 13383 TEARDOWN(); 13384 } 13385 13386 13387 TEST(zero_dest) { 13388 SETUP(); 13389 RegisterDump before; 13390 13391 START(); 13392 // Preserve the stack pointer, in case we clobber it. 13393 __ Mov(x30, sp); 13394 // Initialize the other registers used in this test. 13395 uint64_t literal_base = 0x0100001000100101; 13396 __ Mov(x0, 0); 13397 __ Mov(x1, literal_base); 13398 for (unsigned i = 2; i < x30.GetCode(); i++) { 13399 __ Add(Register::GetXRegFromCode(i), Register::GetXRegFromCode(i - 1), x1); 13400 } 13401 before.Dump(&masm); 13402 13403 // All of these instructions should be NOPs in these forms, but have 13404 // alternate forms which can write into the stack pointer. 13405 { 13406 ExactAssemblyScope scope(&masm, 3 * 7 * kInstructionSize); 13407 __ add(xzr, x0, x1); 13408 __ add(xzr, x1, xzr); 13409 __ add(xzr, xzr, x1); 13410 13411 __ and_(xzr, x0, x2); 13412 __ and_(xzr, x2, xzr); 13413 __ and_(xzr, xzr, x2); 13414 13415 __ bic(xzr, x0, x3); 13416 __ bic(xzr, x3, xzr); 13417 __ bic(xzr, xzr, x3); 13418 13419 __ eon(xzr, x0, x4); 13420 __ eon(xzr, x4, xzr); 13421 __ eon(xzr, xzr, x4); 13422 13423 __ eor(xzr, x0, x5); 13424 __ eor(xzr, x5, xzr); 13425 __ eor(xzr, xzr, x5); 13426 13427 __ orr(xzr, x0, x6); 13428 __ orr(xzr, x6, xzr); 13429 __ orr(xzr, xzr, x6); 13430 13431 __ sub(xzr, x0, x7); 13432 __ sub(xzr, x7, xzr); 13433 __ sub(xzr, xzr, x7); 13434 } 13435 13436 // Swap the saved stack pointer with the real one. If sp was written 13437 // during the test, it will show up in x30. This is done because the test 13438 // framework assumes that sp will be valid at the end of the test. 13439 __ Mov(x29, x30); 13440 __ Mov(x30, sp); 13441 __ Mov(sp, x29); 13442 // We used x29 as a scratch register, so reset it to make sure it doesn't 13443 // trigger a test failure. 13444 __ Add(x29, x28, x1); 13445 END(); 13446 13447 RUN(); 13448 13449 ASSERT_EQUAL_REGISTERS(before); 13450 ASSERT_EQUAL_NZCV(before.flags_nzcv()); 13451 13452 TEARDOWN(); 13453 } 13454 13455 13456 TEST(zero_dest_setflags) { 13457 SETUP(); 13458 RegisterDump before; 13459 13460 START(); 13461 // Preserve the stack pointer, in case we clobber it. 13462 __ Mov(x30, sp); 13463 // Initialize the other registers used in this test. 13464 uint64_t literal_base = 0x0100001000100101; 13465 __ Mov(x0, 0); 13466 __ Mov(x1, literal_base); 13467 for (int i = 2; i < 30; i++) { 13468 __ Add(Register::GetXRegFromCode(i), Register::GetXRegFromCode(i - 1), x1); 13469 } 13470 before.Dump(&masm); 13471 13472 // All of these instructions should only write to the flags in these forms, 13473 // but have alternate forms which can write into the stack pointer. 13474 { 13475 ExactAssemblyScope scope(&masm, 6 * kInstructionSize); 13476 __ adds(xzr, x0, Operand(x1, UXTX)); 13477 __ adds(xzr, x1, Operand(xzr, UXTX)); 13478 __ adds(xzr, x1, 1234); 13479 __ adds(xzr, x0, x1); 13480 __ adds(xzr, x1, xzr); 13481 __ adds(xzr, xzr, x1); 13482 } 13483 13484 { 13485 ExactAssemblyScope scope(&masm, 5 * kInstructionSize); 13486 __ ands(xzr, x2, ~0xf); 13487 __ ands(xzr, xzr, ~0xf); 13488 __ ands(xzr, x0, x2); 13489 __ ands(xzr, x2, xzr); 13490 __ ands(xzr, xzr, x2); 13491 } 13492 13493 { 13494 ExactAssemblyScope scope(&masm, 5 * kInstructionSize); 13495 __ bics(xzr, x3, ~0xf); 13496 __ bics(xzr, xzr, ~0xf); 13497 __ bics(xzr, x0, x3); 13498 __ bics(xzr, x3, xzr); 13499 __ bics(xzr, xzr, x3); 13500 } 13501 13502 { 13503 ExactAssemblyScope scope(&masm, 6 * kInstructionSize); 13504 __ subs(xzr, x0, Operand(x3, UXTX)); 13505 __ subs(xzr, x3, Operand(xzr, UXTX)); 13506 __ subs(xzr, x3, 1234); 13507 __ subs(xzr, x0, x3); 13508 __ subs(xzr, x3, xzr); 13509 __ subs(xzr, xzr, x3); 13510 } 13511 13512 // Swap the saved stack pointer with the real one. If sp was written 13513 // during the test, it will show up in x30. This is done because the test 13514 // framework assumes that sp will be valid at the end of the test. 13515 __ Mov(x29, x30); 13516 __ Mov(x30, sp); 13517 __ Mov(sp, x29); 13518 // We used x29 as a scratch register, so reset it to make sure it doesn't 13519 // trigger a test failure. 13520 __ Add(x29, x28, x1); 13521 END(); 13522 13523 RUN(); 13524 13525 ASSERT_EQUAL_REGISTERS(before); 13526 13527 TEARDOWN(); 13528 } 13529 13530 13531 TEST(stack_pointer_override) { 13532 // This test generates some stack maintenance code, but the test only checks 13533 // the reported state. 13534 SETUP(); 13535 START(); 13536 13537 // The default stack pointer in VIXL is sp. 13538 VIXL_CHECK(sp.Is(__ StackPointer())); 13539 __ SetStackPointer(x0); 13540 VIXL_CHECK(x0.Is(__ StackPointer())); 13541 __ SetStackPointer(x28); 13542 VIXL_CHECK(x28.Is(__ StackPointer())); 13543 __ SetStackPointer(sp); 13544 VIXL_CHECK(sp.Is(__ StackPointer())); 13545 13546 END(); 13547 RUN(); 13548 TEARDOWN(); 13549 } 13550 13551 13552 TEST(peek_poke_simple) { 13553 SETUP(); 13554 START(); 13555 13556 static const RegList x0_to_x3 = 13557 x0.GetBit() | x1.GetBit() | x2.GetBit() | x3.GetBit(); 13558 static const RegList x10_to_x13 = 13559 x10.GetBit() | x11.GetBit() | x12.GetBit() | x13.GetBit(); 13560 13561 // The literal base is chosen to have two useful properties: 13562 // * When multiplied by small values (such as a register index), this value 13563 // is clearly readable in the result. 13564 // * The value is not formed from repeating fixed-size smaller values, so it 13565 // can be used to detect endianness-related errors. 13566 uint64_t literal_base = 0x0100001000100101; 13567 13568 // Initialize the registers. 13569 __ Mov(x0, literal_base); 13570 __ Add(x1, x0, x0); 13571 __ Add(x2, x1, x0); 13572 __ Add(x3, x2, x0); 13573 13574 __ Claim(32); 13575 13576 // Simple exchange. 13577 // After this test: 13578 // x0-x3 should be unchanged. 13579 // w10-w13 should contain the lower words of x0-x3. 13580 __ Poke(x0, 0); 13581 __ Poke(x1, 8); 13582 __ Poke(x2, 16); 13583 __ Poke(x3, 24); 13584 Clobber(&masm, x0_to_x3); 13585 __ Peek(x0, 0); 13586 __ Peek(x1, 8); 13587 __ Peek(x2, 16); 13588 __ Peek(x3, 24); 13589 13590 __ Poke(w0, 0); 13591 __ Poke(w1, 4); 13592 __ Poke(w2, 8); 13593 __ Poke(w3, 12); 13594 Clobber(&masm, x10_to_x13); 13595 __ Peek(w10, 0); 13596 __ Peek(w11, 4); 13597 __ Peek(w12, 8); 13598 __ Peek(w13, 12); 13599 13600 __ Drop(32); 13601 13602 END(); 13603 RUN(); 13604 13605 ASSERT_EQUAL_64(literal_base * 1, x0); 13606 ASSERT_EQUAL_64(literal_base * 2, x1); 13607 ASSERT_EQUAL_64(literal_base * 3, x2); 13608 ASSERT_EQUAL_64(literal_base * 4, x3); 13609 13610 ASSERT_EQUAL_64((literal_base * 1) & 0xffffffff, x10); 13611 ASSERT_EQUAL_64((literal_base * 2) & 0xffffffff, x11); 13612 ASSERT_EQUAL_64((literal_base * 3) & 0xffffffff, x12); 13613 ASSERT_EQUAL_64((literal_base * 4) & 0xffffffff, x13); 13614 13615 TEARDOWN(); 13616 } 13617 13618 13619 TEST(peek_poke_unaligned) { 13620 SETUP(); 13621 START(); 13622 13623 // The literal base is chosen to have two useful properties: 13624 // * When multiplied by small values (such as a register index), this value 13625 // is clearly readable in the result. 13626 // * The value is not formed from repeating fixed-size smaller values, so it 13627 // can be used to detect endianness-related errors. 13628 uint64_t literal_base = 0x0100001000100101; 13629 13630 // Initialize the registers. 13631 __ Mov(x0, literal_base); 13632 __ Add(x1, x0, x0); 13633 __ Add(x2, x1, x0); 13634 __ Add(x3, x2, x0); 13635 __ Add(x4, x3, x0); 13636 __ Add(x5, x4, x0); 13637 __ Add(x6, x5, x0); 13638 13639 __ Claim(32); 13640 13641 // Unaligned exchanges. 13642 // After this test: 13643 // x0-x6 should be unchanged. 13644 // w10-w12 should contain the lower words of x0-x2. 13645 __ Poke(x0, 1); 13646 Clobber(&masm, x0.GetBit()); 13647 __ Peek(x0, 1); 13648 __ Poke(x1, 2); 13649 Clobber(&masm, x1.GetBit()); 13650 __ Peek(x1, 2); 13651 __ Poke(x2, 3); 13652 Clobber(&masm, x2.GetBit()); 13653 __ Peek(x2, 3); 13654 __ Poke(x3, 4); 13655 Clobber(&masm, x3.GetBit()); 13656 __ Peek(x3, 4); 13657 __ Poke(x4, 5); 13658 Clobber(&masm, x4.GetBit()); 13659 __ Peek(x4, 5); 13660 __ Poke(x5, 6); 13661 Clobber(&masm, x5.GetBit()); 13662 __ Peek(x5, 6); 13663 __ Poke(x6, 7); 13664 Clobber(&masm, x6.GetBit()); 13665 __ Peek(x6, 7); 13666 13667 __ Poke(w0, 1); 13668 Clobber(&masm, w10.GetBit()); 13669 __ Peek(w10, 1); 13670 __ Poke(w1, 2); 13671 Clobber(&masm, w11.GetBit()); 13672 __ Peek(w11, 2); 13673 __ Poke(w2, 3); 13674 Clobber(&masm, w12.GetBit()); 13675 __ Peek(w12, 3); 13676 13677 __ Drop(32); 13678 13679 END(); 13680 RUN(); 13681 13682 ASSERT_EQUAL_64(literal_base * 1, x0); 13683 ASSERT_EQUAL_64(literal_base * 2, x1); 13684 ASSERT_EQUAL_64(literal_base * 3, x2); 13685 ASSERT_EQUAL_64(literal_base * 4, x3); 13686 ASSERT_EQUAL_64(literal_base * 5, x4); 13687 ASSERT_EQUAL_64(literal_base * 6, x5); 13688 ASSERT_EQUAL_64(literal_base * 7, x6); 13689 13690 ASSERT_EQUAL_64((literal_base * 1) & 0xffffffff, x10); 13691 ASSERT_EQUAL_64((literal_base * 2) & 0xffffffff, x11); 13692 ASSERT_EQUAL_64((literal_base * 3) & 0xffffffff, x12); 13693 13694 TEARDOWN(); 13695 } 13696 13697 13698 TEST(peek_poke_endianness) { 13699 SETUP(); 13700 START(); 13701 13702 // The literal base is chosen to have two useful properties: 13703 // * When multiplied by small values (such as a register index), this value 13704 // is clearly readable in the result. 13705 // * The value is not formed from repeating fixed-size smaller values, so it 13706 // can be used to detect endianness-related errors. 13707 uint64_t literal_base = 0x0100001000100101; 13708 13709 // Initialize the registers. 13710 __ Mov(x0, literal_base); 13711 __ Add(x1, x0, x0); 13712 13713 __ Claim(32); 13714 13715 // Endianness tests. 13716 // After this section: 13717 // x4 should match x0[31:0]:x0[63:32] 13718 // w5 should match w1[15:0]:w1[31:16] 13719 __ Poke(x0, 0); 13720 __ Poke(x0, 8); 13721 __ Peek(x4, 4); 13722 13723 __ Poke(w1, 0); 13724 __ Poke(w1, 4); 13725 __ Peek(w5, 2); 13726 13727 __ Drop(32); 13728 13729 END(); 13730 RUN(); 13731 13732 uint64_t x0_expected = literal_base * 1; 13733 uint64_t x1_expected = literal_base * 2; 13734 uint64_t x4_expected = (x0_expected << 32) | (x0_expected >> 32); 13735 uint64_t x5_expected = 13736 ((x1_expected << 16) & 0xffff0000) | ((x1_expected >> 16) & 0x0000ffff); 13737 13738 ASSERT_EQUAL_64(x0_expected, x0); 13739 ASSERT_EQUAL_64(x1_expected, x1); 13740 ASSERT_EQUAL_64(x4_expected, x4); 13741 ASSERT_EQUAL_64(x5_expected, x5); 13742 13743 TEARDOWN(); 13744 } 13745 13746 13747 TEST(peek_poke_mixed) { 13748 SETUP(); 13749 START(); 13750 13751 // Acquire all temps from the MacroAssembler. They are used arbitrarily below. 13752 UseScratchRegisterScope temps(&masm); 13753 temps.ExcludeAll(); 13754 13755 // The literal base is chosen to have two useful properties: 13756 // * When multiplied by small values (such as a register index), this value 13757 // is clearly readable in the result. 13758 // * The value is not formed from repeating fixed-size smaller values, so it 13759 // can be used to detect endianness-related errors. 13760 uint64_t literal_base = 0x0100001000100101; 13761 13762 // Initialize the registers. 13763 __ Mov(x0, literal_base); 13764 __ Add(x1, x0, x0); 13765 __ Add(x2, x1, x0); 13766 __ Add(x3, x2, x0); 13767 13768 __ Claim(32); 13769 13770 // Mix with other stack operations. 13771 // After this section: 13772 // x0-x3 should be unchanged. 13773 // x6 should match x1[31:0]:x0[63:32] 13774 // w7 should match x1[15:0]:x0[63:48] 13775 __ Poke(x1, 8); 13776 __ Poke(x0, 0); 13777 { 13778 VIXL_ASSERT(__ StackPointer().Is(sp)); 13779 __ Mov(x4, __ StackPointer()); 13780 __ SetStackPointer(x4); 13781 13782 __ Poke(wzr, 0); // Clobber the space we're about to drop. 13783 __ Drop(4); 13784 __ Peek(x6, 0); 13785 __ Claim(8); 13786 __ Peek(w7, 10); 13787 __ Poke(x3, 28); 13788 __ Poke(xzr, 0); // Clobber the space we're about to drop. 13789 __ Drop(8); 13790 __ Poke(x2, 12); 13791 __ Push(w0); 13792 13793 __ Mov(sp, __ StackPointer()); 13794 __ SetStackPointer(sp); 13795 } 13796 13797 __ Pop(x0, x1, x2, x3); 13798 13799 END(); 13800 RUN(); 13801 13802 uint64_t x0_expected = literal_base * 1; 13803 uint64_t x1_expected = literal_base * 2; 13804 uint64_t x2_expected = literal_base * 3; 13805 uint64_t x3_expected = literal_base * 4; 13806 uint64_t x6_expected = (x1_expected << 32) | (x0_expected >> 32); 13807 uint64_t x7_expected = 13808 ((x1_expected << 16) & 0xffff0000) | ((x0_expected >> 48) & 0x0000ffff); 13809 13810 ASSERT_EQUAL_64(x0_expected, x0); 13811 ASSERT_EQUAL_64(x1_expected, x1); 13812 ASSERT_EQUAL_64(x2_expected, x2); 13813 ASSERT_EQUAL_64(x3_expected, x3); 13814 ASSERT_EQUAL_64(x6_expected, x6); 13815 ASSERT_EQUAL_64(x7_expected, x7); 13816 13817 TEARDOWN(); 13818 } 13819 13820 13821 TEST(peek_poke_reglist) { 13822 SETUP(); 13823 START(); 13824 13825 // Acquire all temps from the MacroAssembler. They are used arbitrarily below. 13826 UseScratchRegisterScope temps(&masm); 13827 temps.ExcludeAll(); 13828 13829 // The literal base is chosen to have two useful properties: 13830 // * When multiplied by small values (such as a register index), this value 13831 // is clearly readable in the result. 13832 // * The value is not formed from repeating fixed-size smaller values, so it 13833 // can be used to detect endianness-related errors. 13834 uint64_t base = 0x0100001000100101; 13835 13836 // Initialize the registers. 13837 __ Mov(x1, base); 13838 __ Add(x2, x1, x1); 13839 __ Add(x3, x2, x1); 13840 __ Add(x4, x3, x1); 13841 13842 CPURegList list_1(x1, x2, x3, x4); 13843 CPURegList list_2(x11, x12, x13, x14); 13844 int list_1_size = list_1.GetTotalSizeInBytes(); 13845 13846 __ Claim(2 * list_1_size); 13847 13848 __ PokeCPURegList(list_1, 0); 13849 __ PokeXRegList(list_1.GetList(), list_1_size); 13850 __ PeekCPURegList(list_2, 2 * kXRegSizeInBytes); 13851 __ PeekXRegList(x15.GetBit(), kWRegSizeInBytes); 13852 __ PeekWRegList(w16.GetBit() | w17.GetBit(), 3 * kXRegSizeInBytes); 13853 13854 __ Drop(2 * list_1_size); 13855 13856 13857 uint64_t base_d = 0x1010010001000010; 13858 13859 // Initialize the registers. 13860 __ Mov(x1, base_d); 13861 __ Add(x2, x1, x1); 13862 __ Add(x3, x2, x1); 13863 __ Add(x4, x3, x1); 13864 __ Fmov(d1, x1); 13865 __ Fmov(d2, x2); 13866 __ Fmov(d3, x3); 13867 __ Fmov(d4, x4); 13868 13869 CPURegList list_d_1(d1, d2, d3, d4); 13870 CPURegList list_d_2(d11, d12, d13, d14); 13871 int list_d_1_size = list_d_1.GetTotalSizeInBytes(); 13872 13873 __ Claim(2 * list_d_1_size); 13874 13875 __ PokeCPURegList(list_d_1, 0); 13876 __ PokeDRegList(list_d_1.GetList(), list_d_1_size); 13877 __ PeekCPURegList(list_d_2, 2 * kDRegSizeInBytes); 13878 __ PeekDRegList(d15.GetBit(), kSRegSizeInBytes); 13879 __ PeekSRegList(s16.GetBit() | s17.GetBit(), 3 * kDRegSizeInBytes); 13880 13881 __ Drop(2 * list_d_1_size); 13882 13883 13884 END(); 13885 RUN(); 13886 13887 ASSERT_EQUAL_64(3 * base, x11); 13888 ASSERT_EQUAL_64(4 * base, x12); 13889 ASSERT_EQUAL_64(1 * base, x13); 13890 ASSERT_EQUAL_64(2 * base, x14); 13891 ASSERT_EQUAL_64(((1 * base) >> kWRegSize) | ((2 * base) << kWRegSize), x15); 13892 ASSERT_EQUAL_64(2 * base, x14); 13893 ASSERT_EQUAL_32((4 * base) & kWRegMask, w16); 13894 ASSERT_EQUAL_32((4 * base) >> kWRegSize, w17); 13895 13896 ASSERT_EQUAL_FP64(RawbitsToDouble(3 * base_d), d11); 13897 ASSERT_EQUAL_FP64(RawbitsToDouble(4 * base_d), d12); 13898 ASSERT_EQUAL_FP64(RawbitsToDouble(1 * base_d), d13); 13899 ASSERT_EQUAL_FP64(RawbitsToDouble(2 * base_d), d14); 13900 ASSERT_EQUAL_FP64(RawbitsToDouble((base_d >> kSRegSize) | 13901 ((2 * base_d) << kSRegSize)), 13902 d15); 13903 ASSERT_EQUAL_FP64(RawbitsToDouble(2 * base_d), d14); 13904 ASSERT_EQUAL_FP32(RawbitsToFloat((4 * base_d) & kSRegMask), s16); 13905 ASSERT_EQUAL_FP32(RawbitsToFloat((4 * base_d) >> kSRegSize), s17); 13906 13907 TEARDOWN(); 13908 } 13909 13910 13911 TEST(load_store_reglist) { 13912 SETUP(); 13913 START(); 13914 13915 // The literal base is chosen to have two useful properties: 13916 // * When multiplied by small values (such as a register index), this value 13917 // is clearly readable in the result. 13918 // * The value is not formed from repeating fixed-size smaller values, so it 13919 // can be used to detect endianness-related errors. 13920 uint64_t high_base = UINT32_C(0x01000010); 13921 uint64_t low_base = UINT32_C(0x00100101); 13922 uint64_t base = (high_base << 32) | low_base; 13923 uint64_t array[21]; 13924 memset(array, 0, sizeof(array)); 13925 13926 // Initialize the registers. 13927 __ Mov(x1, base); 13928 __ Add(x2, x1, x1); 13929 __ Add(x3, x2, x1); 13930 __ Add(x4, x3, x1); 13931 __ Fmov(d1, x1); 13932 __ Fmov(d2, x2); 13933 __ Fmov(d3, x3); 13934 __ Fmov(d4, x4); 13935 __ Fmov(d5, x1); 13936 __ Fmov(d6, x2); 13937 __ Fmov(d7, x3); 13938 __ Fmov(d8, x4); 13939 13940 Register reg_base = x20; 13941 Register reg_index = x21; 13942 int size_stored = 0; 13943 13944 __ Mov(reg_base, reinterpret_cast<uintptr_t>(&array)); 13945 13946 // Test aligned accesses. 13947 CPURegList list_src(w1, w2, w3, w4); 13948 CPURegList list_dst(w11, w12, w13, w14); 13949 CPURegList list_fp_src_1(d1, d2, d3, d4); 13950 CPURegList list_fp_dst_1(d11, d12, d13, d14); 13951 13952 __ StoreCPURegList(list_src, MemOperand(reg_base, 0 * sizeof(uint64_t))); 13953 __ LoadCPURegList(list_dst, MemOperand(reg_base, 0 * sizeof(uint64_t))); 13954 size_stored += 4 * kWRegSizeInBytes; 13955 13956 __ Mov(reg_index, size_stored); 13957 __ StoreCPURegList(list_src, MemOperand(reg_base, reg_index)); 13958 __ LoadCPURegList(list_dst, MemOperand(reg_base, reg_index)); 13959 size_stored += 4 * kWRegSizeInBytes; 13960 13961 __ StoreCPURegList(list_fp_src_1, MemOperand(reg_base, size_stored)); 13962 __ LoadCPURegList(list_fp_dst_1, MemOperand(reg_base, size_stored)); 13963 size_stored += 4 * kDRegSizeInBytes; 13964 13965 __ Mov(reg_index, size_stored); 13966 __ StoreCPURegList(list_fp_src_1, MemOperand(reg_base, reg_index)); 13967 __ LoadCPURegList(list_fp_dst_1, MemOperand(reg_base, reg_index)); 13968 size_stored += 4 * kDRegSizeInBytes; 13969 13970 // Test unaligned accesses. 13971 CPURegList list_fp_src_2(d5, d6, d7, d8); 13972 CPURegList list_fp_dst_2(d15, d16, d17, d18); 13973 13974 __ Str(wzr, MemOperand(reg_base, size_stored)); 13975 size_stored += 1 * kWRegSizeInBytes; 13976 __ StoreCPURegList(list_fp_src_2, MemOperand(reg_base, size_stored)); 13977 __ LoadCPURegList(list_fp_dst_2, MemOperand(reg_base, size_stored)); 13978 size_stored += 4 * kDRegSizeInBytes; 13979 13980 __ Mov(reg_index, size_stored); 13981 __ StoreCPURegList(list_fp_src_2, MemOperand(reg_base, reg_index)); 13982 __ LoadCPURegList(list_fp_dst_2, MemOperand(reg_base, reg_index)); 13983 13984 END(); 13985 RUN(); 13986 13987 VIXL_CHECK(array[0] == (1 * low_base) + (2 * low_base << kWRegSize)); 13988 VIXL_CHECK(array[1] == (3 * low_base) + (4 * low_base << kWRegSize)); 13989 VIXL_CHECK(array[2] == (1 * low_base) + (2 * low_base << kWRegSize)); 13990 VIXL_CHECK(array[3] == (3 * low_base) + (4 * low_base << kWRegSize)); 13991 VIXL_CHECK(array[4] == 1 * base); 13992 VIXL_CHECK(array[5] == 2 * base); 13993 VIXL_CHECK(array[6] == 3 * base); 13994 VIXL_CHECK(array[7] == 4 * base); 13995 VIXL_CHECK(array[8] == 1 * base); 13996 VIXL_CHECK(array[9] == 2 * base); 13997 VIXL_CHECK(array[10] == 3 * base); 13998 VIXL_CHECK(array[11] == 4 * base); 13999 VIXL_CHECK(array[12] == ((1 * low_base) << kSRegSize)); 14000 VIXL_CHECK(array[13] == (((2 * low_base) << kSRegSize) | (1 * high_base))); 14001 VIXL_CHECK(array[14] == (((3 * low_base) << kSRegSize) | (2 * high_base))); 14002 VIXL_CHECK(array[15] == (((4 * low_base) << kSRegSize) | (3 * high_base))); 14003 VIXL_CHECK(array[16] == (((1 * low_base) << kSRegSize) | (4 * high_base))); 14004 VIXL_CHECK(array[17] == (((2 * low_base) << kSRegSize) | (1 * high_base))); 14005 VIXL_CHECK(array[18] == (((3 * low_base) << kSRegSize) | (2 * high_base))); 14006 VIXL_CHECK(array[19] == (((4 * low_base) << kSRegSize) | (3 * high_base))); 14007 VIXL_CHECK(array[20] == (4 * high_base)); 14008 14009 ASSERT_EQUAL_64(1 * low_base, x11); 14010 ASSERT_EQUAL_64(2 * low_base, x12); 14011 ASSERT_EQUAL_64(3 * low_base, x13); 14012 ASSERT_EQUAL_64(4 * low_base, x14); 14013 ASSERT_EQUAL_FP64(RawbitsToDouble(1 * base), d11); 14014 ASSERT_EQUAL_FP64(RawbitsToDouble(2 * base), d12); 14015 ASSERT_EQUAL_FP64(RawbitsToDouble(3 * base), d13); 14016 ASSERT_EQUAL_FP64(RawbitsToDouble(4 * base), d14); 14017 ASSERT_EQUAL_FP64(RawbitsToDouble(1 * base), d15); 14018 ASSERT_EQUAL_FP64(RawbitsToDouble(2 * base), d16); 14019 ASSERT_EQUAL_FP64(RawbitsToDouble(3 * base), d17); 14020 ASSERT_EQUAL_FP64(RawbitsToDouble(4 * base), d18); 14021 14022 TEARDOWN(); 14023 } 14024 14025 14026 // This enum is used only as an argument to the push-pop test helpers. 14027 enum PushPopMethod { 14028 // Push or Pop using the Push and Pop methods, with blocks of up to four 14029 // registers. (Smaller blocks will be used if necessary.) 14030 PushPopByFour, 14031 14032 // Use Push<Size>RegList and Pop<Size>RegList to transfer the registers. 14033 PushPopRegList 14034 }; 14035 14036 14037 // For the PushPop* tests, use the maximum number of registers that the test 14038 // supports (where a reg_count argument would otherwise be provided). 14039 static int const kPushPopUseMaxRegCount = -1; 14040 14041 // Test a simple push-pop pattern: 14042 // * Claim <claim> bytes to set the stack alignment. 14043 // * Push <reg_count> registers with size <reg_size>. 14044 // * Clobber the register contents. 14045 // * Pop <reg_count> registers to restore the original contents. 14046 // * Drop <claim> bytes to restore the original stack pointer. 14047 // 14048 // Different push and pop methods can be specified independently to test for 14049 // proper word-endian behaviour. 14050 static void PushPopSimpleHelper(int reg_count, 14051 int claim, 14052 int reg_size, 14053 PushPopMethod push_method, 14054 PushPopMethod pop_method) { 14055 SETUP(); 14056 14057 START(); 14058 14059 // Arbitrarily pick a register to use as a stack pointer. 14060 const Register& stack_pointer = x20; 14061 const RegList allowed = ~stack_pointer.GetBit(); 14062 if (reg_count == kPushPopUseMaxRegCount) { 14063 reg_count = CountSetBits(allowed, kNumberOfRegisters); 14064 } 14065 // Work out which registers to use, based on reg_size. 14066 Register r[kNumberOfRegisters]; 14067 Register x[kNumberOfRegisters]; 14068 RegList list = 14069 PopulateRegisterArray(NULL, x, r, reg_size, reg_count, allowed); 14070 14071 // Acquire all temps from the MacroAssembler. They are used arbitrarily below. 14072 UseScratchRegisterScope temps(&masm); 14073 temps.ExcludeAll(); 14074 14075 // The literal base is chosen to have two useful properties: 14076 // * When multiplied by small values (such as a register index), this value 14077 // is clearly readable in the result. 14078 // * The value is not formed from repeating fixed-size smaller values, so it 14079 // can be used to detect endianness-related errors. 14080 uint64_t literal_base = 0x0100001000100101; 14081 14082 { 14083 VIXL_ASSERT(__ StackPointer().Is(sp)); 14084 __ Mov(stack_pointer, __ StackPointer()); 14085 __ SetStackPointer(stack_pointer); 14086 14087 int i; 14088 14089 // Initialize the registers. 14090 for (i = 0; i < reg_count; i++) { 14091 // Always write into the X register, to ensure that the upper word is 14092 // properly ignored by Push when testing W registers. 14093 __ Mov(x[i], literal_base * i); 14094 } 14095 14096 // Claim memory first, as requested. 14097 __ Claim(claim); 14098 14099 switch (push_method) { 14100 case PushPopByFour: 14101 // Push high-numbered registers first (to the highest addresses). 14102 for (i = reg_count; i >= 4; i -= 4) { 14103 __ Push(r[i - 1], r[i - 2], r[i - 3], r[i - 4]); 14104 } 14105 // Finish off the leftovers. 14106 switch (i) { 14107 case 3: 14108 __ Push(r[2], r[1], r[0]); 14109 break; 14110 case 2: 14111 __ Push(r[1], r[0]); 14112 break; 14113 case 1: 14114 __ Push(r[0]); 14115 break; 14116 default: 14117 VIXL_ASSERT(i == 0); 14118 break; 14119 } 14120 break; 14121 case PushPopRegList: 14122 __ PushSizeRegList(list, reg_size); 14123 break; 14124 } 14125 14126 // Clobber all the registers, to ensure that they get repopulated by Pop. 14127 Clobber(&masm, list); 14128 14129 switch (pop_method) { 14130 case PushPopByFour: 14131 // Pop low-numbered registers first (from the lowest addresses). 14132 for (i = 0; i <= (reg_count - 4); i += 4) { 14133 __ Pop(r[i], r[i + 1], r[i + 2], r[i + 3]); 14134 } 14135 // Finish off the leftovers. 14136 switch (reg_count - i) { 14137 case 3: 14138 __ Pop(r[i], r[i + 1], r[i + 2]); 14139 break; 14140 case 2: 14141 __ Pop(r[i], r[i + 1]); 14142 break; 14143 case 1: 14144 __ Pop(r[i]); 14145 break; 14146 default: 14147 VIXL_ASSERT(i == reg_count); 14148 break; 14149 } 14150 break; 14151 case PushPopRegList: 14152 __ PopSizeRegList(list, reg_size); 14153 break; 14154 } 14155 14156 // Drop memory to restore stack_pointer. 14157 __ Drop(claim); 14158 14159 __ Mov(sp, __ StackPointer()); 14160 __ SetStackPointer(sp); 14161 } 14162 14163 END(); 14164 14165 RUN(); 14166 14167 // Check that the register contents were preserved. 14168 // Always use ASSERT_EQUAL_64, even when testing W registers, so we can test 14169 // that the upper word was properly cleared by Pop. 14170 literal_base &= (0xffffffffffffffff >> (64 - reg_size)); 14171 for (int i = 0; i < reg_count; i++) { 14172 if (x[i].Is(xzr)) { 14173 ASSERT_EQUAL_64(0, x[i]); 14174 } else { 14175 ASSERT_EQUAL_64(literal_base * i, x[i]); 14176 } 14177 } 14178 14179 TEARDOWN(); 14180 } 14181 14182 14183 TEST(push_pop_xreg_simple_32) { 14184 for (int claim = 0; claim <= 8; claim++) { 14185 for (int count = 0; count <= 8; count++) { 14186 PushPopSimpleHelper(count, 14187 claim, 14188 kWRegSize, 14189 PushPopByFour, 14190 PushPopByFour); 14191 PushPopSimpleHelper(count, 14192 claim, 14193 kWRegSize, 14194 PushPopByFour, 14195 PushPopRegList); 14196 PushPopSimpleHelper(count, 14197 claim, 14198 kWRegSize, 14199 PushPopRegList, 14200 PushPopByFour); 14201 PushPopSimpleHelper(count, 14202 claim, 14203 kWRegSize, 14204 PushPopRegList, 14205 PushPopRegList); 14206 } 14207 // Test with the maximum number of registers. 14208 PushPopSimpleHelper(kPushPopUseMaxRegCount, 14209 claim, 14210 kWRegSize, 14211 PushPopByFour, 14212 PushPopByFour); 14213 PushPopSimpleHelper(kPushPopUseMaxRegCount, 14214 claim, 14215 kWRegSize, 14216 PushPopByFour, 14217 PushPopRegList); 14218 PushPopSimpleHelper(kPushPopUseMaxRegCount, 14219 claim, 14220 kWRegSize, 14221 PushPopRegList, 14222 PushPopByFour); 14223 PushPopSimpleHelper(kPushPopUseMaxRegCount, 14224 claim, 14225 kWRegSize, 14226 PushPopRegList, 14227 PushPopRegList); 14228 } 14229 } 14230 14231 14232 TEST(push_pop_xreg_simple_64) { 14233 for (int claim = 0; claim <= 8; claim++) { 14234 for (int count = 0; count <= 8; count++) { 14235 PushPopSimpleHelper(count, 14236 claim, 14237 kXRegSize, 14238 PushPopByFour, 14239 PushPopByFour); 14240 PushPopSimpleHelper(count, 14241 claim, 14242 kXRegSize, 14243 PushPopByFour, 14244 PushPopRegList); 14245 PushPopSimpleHelper(count, 14246 claim, 14247 kXRegSize, 14248 PushPopRegList, 14249 PushPopByFour); 14250 PushPopSimpleHelper(count, 14251 claim, 14252 kXRegSize, 14253 PushPopRegList, 14254 PushPopRegList); 14255 } 14256 // Test with the maximum number of registers. 14257 PushPopSimpleHelper(kPushPopUseMaxRegCount, 14258 claim, 14259 kXRegSize, 14260 PushPopByFour, 14261 PushPopByFour); 14262 PushPopSimpleHelper(kPushPopUseMaxRegCount, 14263 claim, 14264 kXRegSize, 14265 PushPopByFour, 14266 PushPopRegList); 14267 PushPopSimpleHelper(kPushPopUseMaxRegCount, 14268 claim, 14269 kXRegSize, 14270 PushPopRegList, 14271 PushPopByFour); 14272 PushPopSimpleHelper(kPushPopUseMaxRegCount, 14273 claim, 14274 kXRegSize, 14275 PushPopRegList, 14276 PushPopRegList); 14277 } 14278 } 14279 14280 // For the PushPopFP* tests, use the maximum number of registers that the test 14281 // supports (where a reg_count argument would otherwise be provided). 14282 static int const kPushPopFPUseMaxRegCount = -1; 14283 14284 // Test a simple push-pop pattern: 14285 // * Claim <claim> bytes to set the stack alignment. 14286 // * Push <reg_count> FP registers with size <reg_size>. 14287 // * Clobber the register contents. 14288 // * Pop <reg_count> FP registers to restore the original contents. 14289 // * Drop <claim> bytes to restore the original stack pointer. 14290 // 14291 // Different push and pop methods can be specified independently to test for 14292 // proper word-endian behaviour. 14293 static void PushPopFPSimpleHelper(int reg_count, 14294 int claim, 14295 int reg_size, 14296 PushPopMethod push_method, 14297 PushPopMethod pop_method) { 14298 SETUP(); 14299 14300 START(); 14301 14302 // We can use any floating-point register. None of them are reserved for 14303 // debug code, for example. 14304 static RegList const allowed = ~0; 14305 if (reg_count == kPushPopFPUseMaxRegCount) { 14306 reg_count = CountSetBits(allowed, kNumberOfFPRegisters); 14307 } 14308 // Work out which registers to use, based on reg_size. 14309 FPRegister v[kNumberOfRegisters]; 14310 FPRegister d[kNumberOfRegisters]; 14311 RegList list = 14312 PopulateFPRegisterArray(NULL, d, v, reg_size, reg_count, allowed); 14313 14314 // Arbitrarily pick a register to use as a stack pointer. 14315 const Register& stack_pointer = x10; 14316 14317 // Acquire all temps from the MacroAssembler. They are used arbitrarily below. 14318 UseScratchRegisterScope temps(&masm); 14319 temps.ExcludeAll(); 14320 14321 // The literal base is chosen to have two useful properties: 14322 // * When multiplied (using an integer) by small values (such as a register 14323 // index), this value is clearly readable in the result. 14324 // * The value is not formed from repeating fixed-size smaller values, so it 14325 // can be used to detect endianness-related errors. 14326 // * It is never a floating-point NaN, and will therefore always compare 14327 // equal to itself. 14328 uint64_t literal_base = 0x0100001000100101; 14329 14330 { 14331 VIXL_ASSERT(__ StackPointer().Is(sp)); 14332 __ Mov(stack_pointer, __ StackPointer()); 14333 __ SetStackPointer(stack_pointer); 14334 14335 int i; 14336 14337 // Initialize the registers, using X registers to load the literal. 14338 __ Mov(x0, 0); 14339 __ Mov(x1, literal_base); 14340 for (i = 0; i < reg_count; i++) { 14341 // Always write into the D register, to ensure that the upper word is 14342 // properly ignored by Push when testing S registers. 14343 __ Fmov(d[i], x0); 14344 // Calculate the next literal. 14345 __ Add(x0, x0, x1); 14346 } 14347 14348 // Claim memory first, as requested. 14349 __ Claim(claim); 14350 14351 switch (push_method) { 14352 case PushPopByFour: 14353 // Push high-numbered registers first (to the highest addresses). 14354 for (i = reg_count; i >= 4; i -= 4) { 14355 __ Push(v[i - 1], v[i - 2], v[i - 3], v[i - 4]); 14356 } 14357 // Finish off the leftovers. 14358 switch (i) { 14359 case 3: 14360 __ Push(v[2], v[1], v[0]); 14361 break; 14362 case 2: 14363 __ Push(v[1], v[0]); 14364 break; 14365 case 1: 14366 __ Push(v[0]); 14367 break; 14368 default: 14369 VIXL_ASSERT(i == 0); 14370 break; 14371 } 14372 break; 14373 case PushPopRegList: 14374 __ PushSizeRegList(list, reg_size, CPURegister::kVRegister); 14375 break; 14376 } 14377 14378 // Clobber all the registers, to ensure that they get repopulated by Pop. 14379 ClobberFP(&masm, list); 14380 14381 switch (pop_method) { 14382 case PushPopByFour: 14383 // Pop low-numbered registers first (from the lowest addresses). 14384 for (i = 0; i <= (reg_count - 4); i += 4) { 14385 __ Pop(v[i], v[i + 1], v[i + 2], v[i + 3]); 14386 } 14387 // Finish off the leftovers. 14388 switch (reg_count - i) { 14389 case 3: 14390 __ Pop(v[i], v[i + 1], v[i + 2]); 14391 break; 14392 case 2: 14393 __ Pop(v[i], v[i + 1]); 14394 break; 14395 case 1: 14396 __ Pop(v[i]); 14397 break; 14398 default: 14399 VIXL_ASSERT(i == reg_count); 14400 break; 14401 } 14402 break; 14403 case PushPopRegList: 14404 __ PopSizeRegList(list, reg_size, CPURegister::kVRegister); 14405 break; 14406 } 14407 14408 // Drop memory to restore the stack pointer. 14409 __ Drop(claim); 14410 14411 __ Mov(sp, __ StackPointer()); 14412 __ SetStackPointer(sp); 14413 } 14414 14415 END(); 14416 14417 RUN(); 14418 14419 // Check that the register contents were preserved. 14420 // Always use ASSERT_EQUAL_FP64, even when testing S registers, so we can 14421 // test that the upper word was properly cleared by Pop. 14422 literal_base &= (0xffffffffffffffff >> (64 - reg_size)); 14423 for (int i = 0; i < reg_count; i++) { 14424 uint64_t literal = literal_base * i; 14425 double expected; 14426 memcpy(&expected, &literal, sizeof(expected)); 14427 ASSERT_EQUAL_FP64(expected, d[i]); 14428 } 14429 14430 TEARDOWN(); 14431 } 14432 14433 14434 TEST(push_pop_fp_xreg_simple_32) { 14435 for (int claim = 0; claim <= 8; claim++) { 14436 for (int count = 0; count <= 8; count++) { 14437 PushPopFPSimpleHelper(count, 14438 claim, 14439 kSRegSize, 14440 PushPopByFour, 14441 PushPopByFour); 14442 PushPopFPSimpleHelper(count, 14443 claim, 14444 kSRegSize, 14445 PushPopByFour, 14446 PushPopRegList); 14447 PushPopFPSimpleHelper(count, 14448 claim, 14449 kSRegSize, 14450 PushPopRegList, 14451 PushPopByFour); 14452 PushPopFPSimpleHelper(count, 14453 claim, 14454 kSRegSize, 14455 PushPopRegList, 14456 PushPopRegList); 14457 } 14458 // Test with the maximum number of registers. 14459 PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount, 14460 claim, 14461 kSRegSize, 14462 PushPopByFour, 14463 PushPopByFour); 14464 PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount, 14465 claim, 14466 kSRegSize, 14467 PushPopByFour, 14468 PushPopRegList); 14469 PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount, 14470 claim, 14471 kSRegSize, 14472 PushPopRegList, 14473 PushPopByFour); 14474 PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount, 14475 claim, 14476 kSRegSize, 14477 PushPopRegList, 14478 PushPopRegList); 14479 } 14480 } 14481 14482 14483 TEST(push_pop_fp_xreg_simple_64) { 14484 for (int claim = 0; claim <= 8; claim++) { 14485 for (int count = 0; count <= 8; count++) { 14486 PushPopFPSimpleHelper(count, 14487 claim, 14488 kDRegSize, 14489 PushPopByFour, 14490 PushPopByFour); 14491 PushPopFPSimpleHelper(count, 14492 claim, 14493 kDRegSize, 14494 PushPopByFour, 14495 PushPopRegList); 14496 PushPopFPSimpleHelper(count, 14497 claim, 14498 kDRegSize, 14499 PushPopRegList, 14500 PushPopByFour); 14501 PushPopFPSimpleHelper(count, 14502 claim, 14503 kDRegSize, 14504 PushPopRegList, 14505 PushPopRegList); 14506 } 14507 // Test with the maximum number of registers. 14508 PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount, 14509 claim, 14510 kDRegSize, 14511 PushPopByFour, 14512 PushPopByFour); 14513 PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount, 14514 claim, 14515 kDRegSize, 14516 PushPopByFour, 14517 PushPopRegList); 14518 PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount, 14519 claim, 14520 kDRegSize, 14521 PushPopRegList, 14522 PushPopByFour); 14523 PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount, 14524 claim, 14525 kDRegSize, 14526 PushPopRegList, 14527 PushPopRegList); 14528 } 14529 } 14530 14531 14532 // Push and pop data using an overlapping combination of Push/Pop and 14533 // RegList-based methods. 14534 static void PushPopMixedMethodsHelper(int claim, int reg_size) { 14535 SETUP(); 14536 14537 // Arbitrarily pick a register to use as a stack pointer. 14538 const Register& stack_pointer = x5; 14539 const RegList allowed = ~stack_pointer.GetBit(); 14540 // Work out which registers to use, based on reg_size. 14541 Register r[10]; 14542 Register x[10]; 14543 PopulateRegisterArray(NULL, x, r, reg_size, 10, allowed); 14544 14545 // Calculate some handy register lists. 14546 RegList r0_to_r3 = 0; 14547 for (int i = 0; i <= 3; i++) { 14548 r0_to_r3 |= x[i].GetBit(); 14549 } 14550 RegList r4_to_r5 = 0; 14551 for (int i = 4; i <= 5; i++) { 14552 r4_to_r5 |= x[i].GetBit(); 14553 } 14554 RegList r6_to_r9 = 0; 14555 for (int i = 6; i <= 9; i++) { 14556 r6_to_r9 |= x[i].GetBit(); 14557 } 14558 14559 // Acquire all temps from the MacroAssembler. They are used arbitrarily below. 14560 UseScratchRegisterScope temps(&masm); 14561 temps.ExcludeAll(); 14562 14563 // The literal base is chosen to have two useful properties: 14564 // * When multiplied by small values (such as a register index), this value 14565 // is clearly readable in the result. 14566 // * The value is not formed from repeating fixed-size smaller values, so it 14567 // can be used to detect endianness-related errors. 14568 uint64_t literal_base = 0x0100001000100101; 14569 14570 START(); 14571 { 14572 VIXL_ASSERT(__ StackPointer().Is(sp)); 14573 __ Mov(stack_pointer, __ StackPointer()); 14574 __ SetStackPointer(stack_pointer); 14575 14576 // Claim memory first, as requested. 14577 __ Claim(claim); 14578 14579 __ Mov(x[3], literal_base * 3); 14580 __ Mov(x[2], literal_base * 2); 14581 __ Mov(x[1], literal_base * 1); 14582 __ Mov(x[0], literal_base * 0); 14583 14584 __ PushSizeRegList(r0_to_r3, reg_size); 14585 __ Push(r[3], r[2]); 14586 14587 Clobber(&masm, r0_to_r3); 14588 __ PopSizeRegList(r0_to_r3, reg_size); 14589 14590 __ Push(r[2], r[1], r[3], r[0]); 14591 14592 Clobber(&masm, r4_to_r5); 14593 __ Pop(r[4], r[5]); 14594 Clobber(&masm, r6_to_r9); 14595 __ Pop(r[6], r[7], r[8], r[9]); 14596 14597 // Drop memory to restore stack_pointer. 14598 __ Drop(claim); 14599 14600 __ Mov(sp, __ StackPointer()); 14601 __ SetStackPointer(sp); 14602 } 14603 14604 END(); 14605 14606 RUN(); 14607 14608 // Always use ASSERT_EQUAL_64, even when testing W registers, so we can test 14609 // that the upper word was properly cleared by Pop. 14610 literal_base &= (0xffffffffffffffff >> (64 - reg_size)); 14611 14612 ASSERT_EQUAL_64(literal_base * 3, x[9]); 14613 ASSERT_EQUAL_64(literal_base * 2, x[8]); 14614 ASSERT_EQUAL_64(literal_base * 0, x[7]); 14615 ASSERT_EQUAL_64(literal_base * 3, x[6]); 14616 ASSERT_EQUAL_64(literal_base * 1, x[5]); 14617 ASSERT_EQUAL_64(literal_base * 2, x[4]); 14618 14619 TEARDOWN(); 14620 } 14621 14622 14623 TEST(push_pop_xreg_mixed_methods_64) { 14624 for (int claim = 0; claim <= 8; claim++) { 14625 PushPopMixedMethodsHelper(claim, kXRegSize); 14626 } 14627 } 14628 14629 14630 TEST(push_pop_xreg_mixed_methods_32) { 14631 for (int claim = 0; claim <= 8; claim++) { 14632 PushPopMixedMethodsHelper(claim, kWRegSize); 14633 } 14634 } 14635 14636 14637 // Push and pop data using overlapping X- and W-sized quantities. 14638 static void PushPopWXOverlapHelper(int reg_count, int claim) { 14639 SETUP(); 14640 14641 // Arbitrarily pick a register to use as a stack pointer. 14642 const Register& stack_pointer = x10; 14643 const RegList allowed = ~stack_pointer.GetBit(); 14644 if (reg_count == kPushPopUseMaxRegCount) { 14645 reg_count = CountSetBits(allowed, kNumberOfRegisters); 14646 } 14647 // Work out which registers to use, based on reg_size. 14648 Register w[kNumberOfRegisters]; 14649 Register x[kNumberOfRegisters]; 14650 RegList list = PopulateRegisterArray(w, x, NULL, 0, reg_count, allowed); 14651 14652 // The number of W-sized slots we expect to pop. When we pop, we alternate 14653 // between W and X registers, so we need reg_count*1.5 W-sized slots. 14654 int const requested_w_slots = reg_count + reg_count / 2; 14655 14656 // Track what _should_ be on the stack, using W-sized slots. 14657 static int const kMaxWSlots = kNumberOfRegisters + kNumberOfRegisters / 2; 14658 uint32_t stack[kMaxWSlots]; 14659 for (int i = 0; i < kMaxWSlots; i++) { 14660 stack[i] = 0xdeadbeef; 14661 } 14662 14663 // Acquire all temps from the MacroAssembler. They are used arbitrarily below. 14664 UseScratchRegisterScope temps(&masm); 14665 temps.ExcludeAll(); 14666 14667 // The literal base is chosen to have two useful properties: 14668 // * When multiplied by small values (such as a register index), this value 14669 // is clearly readable in the result. 14670 // * The value is not formed from repeating fixed-size smaller values, so it 14671 // can be used to detect endianness-related errors. 14672 static uint64_t const literal_base = 0x0100001000100101; 14673 static uint64_t const literal_base_hi = literal_base >> 32; 14674 static uint64_t const literal_base_lo = literal_base & 0xffffffff; 14675 static uint64_t const literal_base_w = literal_base & 0xffffffff; 14676 14677 START(); 14678 { 14679 VIXL_ASSERT(__ StackPointer().Is(sp)); 14680 __ Mov(stack_pointer, __ StackPointer()); 14681 __ SetStackPointer(stack_pointer); 14682 14683 // Initialize the registers. 14684 for (int i = 0; i < reg_count; i++) { 14685 // Always write into the X register, to ensure that the upper word is 14686 // properly ignored by Push when testing W registers. 14687 __ Mov(x[i], literal_base * i); 14688 } 14689 14690 // Claim memory first, as requested. 14691 __ Claim(claim); 14692 14693 // The push-pop pattern is as follows: 14694 // Push: Pop: 14695 // x[0](hi) -> w[0] 14696 // x[0](lo) -> x[1](hi) 14697 // w[1] -> x[1](lo) 14698 // w[1] -> w[2] 14699 // x[2](hi) -> x[2](hi) 14700 // x[2](lo) -> x[2](lo) 14701 // x[2](hi) -> w[3] 14702 // x[2](lo) -> x[4](hi) 14703 // x[2](hi) -> x[4](lo) 14704 // x[2](lo) -> w[5] 14705 // w[3] -> x[5](hi) 14706 // w[3] -> x[6](lo) 14707 // w[3] -> w[7] 14708 // w[3] -> x[8](hi) 14709 // x[4](hi) -> x[8](lo) 14710 // x[4](lo) -> w[9] 14711 // ... pattern continues ... 14712 // 14713 // That is, registers are pushed starting with the lower numbers, 14714 // alternating between x and w registers, and pushing i%4+1 copies of each, 14715 // where i is the register number. 14716 // Registers are popped starting with the higher numbers one-by-one, 14717 // alternating between x and w registers, but only popping one at a time. 14718 // 14719 // This pattern provides a wide variety of alignment effects and overlaps. 14720 14721 // ---- Push ---- 14722 14723 int active_w_slots = 0; 14724 for (int i = 0; active_w_slots < requested_w_slots; i++) { 14725 VIXL_ASSERT(i < reg_count); 14726 // In order to test various arguments to PushMultipleTimes, and to try to 14727 // exercise different alignment and overlap effects, we push each 14728 // register a different number of times. 14729 int times = i % 4 + 1; 14730 if (i & 1) { 14731 // Push odd-numbered registers as W registers. 14732 __ PushMultipleTimes(times, w[i]); 14733 // Fill in the expected stack slots. 14734 for (int j = 0; j < times; j++) { 14735 if (w[i].Is(wzr)) { 14736 // The zero register always writes zeroes. 14737 stack[active_w_slots++] = 0; 14738 } else { 14739 stack[active_w_slots++] = literal_base_w * i; 14740 } 14741 } 14742 } else { 14743 // Push even-numbered registers as X registers. 14744 __ PushMultipleTimes(times, x[i]); 14745 // Fill in the expected stack slots. 14746 for (int j = 0; j < times; j++) { 14747 if (x[i].Is(xzr)) { 14748 // The zero register always writes zeroes. 14749 stack[active_w_slots++] = 0; 14750 stack[active_w_slots++] = 0; 14751 } else { 14752 stack[active_w_slots++] = literal_base_hi * i; 14753 stack[active_w_slots++] = literal_base_lo * i; 14754 } 14755 } 14756 } 14757 } 14758 // Because we were pushing several registers at a time, we probably pushed 14759 // more than we needed to. 14760 if (active_w_slots > requested_w_slots) { 14761 __ Drop((active_w_slots - requested_w_slots) * kWRegSizeInBytes); 14762 // Bump the number of active W-sized slots back to where it should be, 14763 // and fill the empty space with a dummy value. 14764 do { 14765 stack[active_w_slots--] = 0xdeadbeef; 14766 } while (active_w_slots > requested_w_slots); 14767 } 14768 14769 // ---- Pop ---- 14770 14771 Clobber(&masm, list); 14772 14773 // If popping an even number of registers, the first one will be X-sized. 14774 // Otherwise, the first one will be W-sized. 14775 bool next_is_64 = !(reg_count & 1); 14776 for (int i = reg_count - 1; i >= 0; i--) { 14777 if (next_is_64) { 14778 __ Pop(x[i]); 14779 active_w_slots -= 2; 14780 } else { 14781 __ Pop(w[i]); 14782 active_w_slots -= 1; 14783 } 14784 next_is_64 = !next_is_64; 14785 } 14786 VIXL_ASSERT(active_w_slots == 0); 14787 14788 // Drop memory to restore stack_pointer. 14789 __ Drop(claim); 14790 14791 __ Mov(sp, __ StackPointer()); 14792 __ SetStackPointer(sp); 14793 } 14794 14795 END(); 14796 14797 RUN(); 14798 14799 int slot = 0; 14800 for (int i = 0; i < reg_count; i++) { 14801 // Even-numbered registers were written as W registers. 14802 // Odd-numbered registers were written as X registers. 14803 bool expect_64 = (i & 1); 14804 uint64_t expected; 14805 14806 if (expect_64) { 14807 uint64_t hi = stack[slot++]; 14808 uint64_t lo = stack[slot++]; 14809 expected = (hi << 32) | lo; 14810 } else { 14811 expected = stack[slot++]; 14812 } 14813 14814 // Always use ASSERT_EQUAL_64, even when testing W registers, so we can 14815 // test that the upper word was properly cleared by Pop. 14816 if (x[i].Is(xzr)) { 14817 ASSERT_EQUAL_64(0, x[i]); 14818 } else { 14819 ASSERT_EQUAL_64(expected, x[i]); 14820 } 14821 } 14822 VIXL_ASSERT(slot == requested_w_slots); 14823 14824 TEARDOWN(); 14825 } 14826 14827 14828 TEST(push_pop_xreg_wx_overlap) { 14829 for (int claim = 0; claim <= 8; claim++) { 14830 for (int count = 1; count <= 8; count++) { 14831 PushPopWXOverlapHelper(count, claim); 14832 } 14833 // Test with the maximum number of registers. 14834 PushPopWXOverlapHelper(kPushPopUseMaxRegCount, claim); 14835 } 14836 } 14837 14838 14839 TEST(push_pop_sp) { 14840 SETUP(); 14841 14842 START(); 14843 14844 VIXL_ASSERT(sp.Is(__ StackPointer())); 14845 14846 // Acquire all temps from the MacroAssembler. They are used arbitrarily below. 14847 UseScratchRegisterScope temps(&masm); 14848 temps.ExcludeAll(); 14849 14850 __ Mov(x3, 0x3333333333333333); 14851 __ Mov(x2, 0x2222222222222222); 14852 __ Mov(x1, 0x1111111111111111); 14853 __ Mov(x0, 0x0000000000000000); 14854 __ Claim(2 * kXRegSizeInBytes); 14855 __ PushXRegList(x0.GetBit() | x1.GetBit() | x2.GetBit() | x3.GetBit()); 14856 __ Push(x3, x2); 14857 __ PopXRegList(x0.GetBit() | x1.GetBit() | x2.GetBit() | x3.GetBit()); 14858 __ Push(x2, x1, x3, x0); 14859 __ Pop(x4, x5); 14860 __ Pop(x6, x7, x8, x9); 14861 14862 __ Claim(2 * kXRegSizeInBytes); 14863 __ PushWRegList(w0.GetBit() | w1.GetBit() | w2.GetBit() | w3.GetBit()); 14864 __ Push(w3, w1, w2, w0); 14865 __ PopWRegList(w10.GetBit() | w11.GetBit() | w12.GetBit() | w13.GetBit()); 14866 __ Pop(w14, w15, w16, w17); 14867 14868 __ Claim(2 * kXRegSizeInBytes); 14869 __ Push(w2, w2, w1, w1); 14870 __ Push(x3, x3); 14871 __ Pop(w18, w19, w20, w21); 14872 __ Pop(x22, x23); 14873 14874 __ Claim(2 * kXRegSizeInBytes); 14875 __ PushXRegList(x1.GetBit() | x22.GetBit()); 14876 __ PopXRegList(x24.GetBit() | x26.GetBit()); 14877 14878 __ Claim(2 * kXRegSizeInBytes); 14879 __ PushWRegList(w1.GetBit() | w2.GetBit() | w4.GetBit() | w22.GetBit()); 14880 __ PopWRegList(w25.GetBit() | w27.GetBit() | w28.GetBit() | w29.GetBit()); 14881 14882 __ Claim(2 * kXRegSizeInBytes); 14883 __ PushXRegList(0); 14884 __ PopXRegList(0); 14885 __ PushXRegList(0xffffffff); 14886 __ PopXRegList(0xffffffff); 14887 __ Drop(12 * kXRegSizeInBytes); 14888 END(); 14889 14890 RUN(); 14891 14892 ASSERT_EQUAL_64(0x1111111111111111, x3); 14893 ASSERT_EQUAL_64(0x0000000000000000, x2); 14894 ASSERT_EQUAL_64(0x3333333333333333, x1); 14895 ASSERT_EQUAL_64(0x2222222222222222, x0); 14896 ASSERT_EQUAL_64(0x3333333333333333, x9); 14897 ASSERT_EQUAL_64(0x2222222222222222, x8); 14898 ASSERT_EQUAL_64(0x0000000000000000, x7); 14899 ASSERT_EQUAL_64(0x3333333333333333, x6); 14900 ASSERT_EQUAL_64(0x1111111111111111, x5); 14901 ASSERT_EQUAL_64(0x2222222222222222, x4); 14902 14903 ASSERT_EQUAL_32(0x11111111U, w13); 14904 ASSERT_EQUAL_32(0x33333333U, w12); 14905 ASSERT_EQUAL_32(0x00000000U, w11); 14906 ASSERT_EQUAL_32(0x22222222U, w10); 14907 ASSERT_EQUAL_32(0x11111111U, w17); 14908 ASSERT_EQUAL_32(0x00000000U, w16); 14909 ASSERT_EQUAL_32(0x33333333U, w15); 14910 ASSERT_EQUAL_32(0x22222222U, w14); 14911 14912 ASSERT_EQUAL_32(0x11111111U, w18); 14913 ASSERT_EQUAL_32(0x11111111U, w19); 14914 ASSERT_EQUAL_32(0x11111111U, w20); 14915 ASSERT_EQUAL_32(0x11111111U, w21); 14916 ASSERT_EQUAL_64(0x3333333333333333, x22); 14917 ASSERT_EQUAL_64(0x0000000000000000, x23); 14918 14919 ASSERT_EQUAL_64(0x3333333333333333, x24); 14920 ASSERT_EQUAL_64(0x3333333333333333, x26); 14921 14922 ASSERT_EQUAL_32(0x33333333U, w25); 14923 ASSERT_EQUAL_32(0x00000000U, w27); 14924 ASSERT_EQUAL_32(0x22222222U, w28); 14925 ASSERT_EQUAL_32(0x33333333U, w29); 14926 TEARDOWN(); 14927 } 14928 14929 14930 TEST(printf) { 14931 SETUP(); 14932 START(); 14933 14934 char const* test_plain_string = "Printf with no arguments.\n"; 14935 char const* test_substring = "'This is a substring.'"; 14936 RegisterDump before; 14937 14938 // Initialize x29 to the value of the stack pointer. We will use x29 as a 14939 // temporary stack pointer later, and initializing it in this way allows the 14940 // RegisterDump check to pass. 14941 __ Mov(x29, __ StackPointer()); 14942 14943 // Test simple integer arguments. 14944 __ Mov(x0, 1234); 14945 __ Mov(x1, 0x1234); 14946 14947 // Test simple floating-point arguments. 14948 __ Fmov(d0, 1.234); 14949 14950 // Test pointer (string) arguments. 14951 __ Mov(x2, reinterpret_cast<uintptr_t>(test_substring)); 14952 14953 // Test the maximum number of arguments, and sign extension. 14954 __ Mov(w3, 0xffffffff); 14955 __ Mov(w4, 0xffffffff); 14956 __ Mov(x5, 0xffffffffffffffff); 14957 __ Mov(x6, 0xffffffffffffffff); 14958 __ Fmov(s1, 1.234); 14959 __ Fmov(s2, 2.345); 14960 __ Fmov(d3, 3.456); 14961 __ Fmov(d4, 4.567); 14962 14963 // Test printing callee-saved registers. 14964 __ Mov(x28, 0x123456789abcdef); 14965 __ Fmov(d10, 42.0); 14966 14967 // Test with three arguments. 14968 __ Mov(x10, 3); 14969 __ Mov(x11, 40); 14970 __ Mov(x12, 500); 14971 14972 // A single character. 14973 __ Mov(w13, 'x'); 14974 14975 // Check that we don't clobber any registers. 14976 before.Dump(&masm); 14977 14978 __ Printf(test_plain_string); // NOLINT(runtime/printf) 14979 __ Printf("x0: %" PRId64 ", x1: 0x%08" PRIx64 "\n", x0, x1); 14980 __ Printf("w5: %" PRId32 ", x5: %" PRId64 "\n", w5, x5); 14981 __ Printf("d0: %f\n", d0); 14982 __ Printf("Test %%s: %s\n", x2); 14983 __ Printf("w3(uint32): %" PRIu32 "\nw4(int32): %" PRId32 14984 "\n" 14985 "x5(uint64): %" PRIu64 "\nx6(int64): %" PRId64 "\n", 14986 w3, 14987 w4, 14988 x5, 14989 x6); 14990 __ Printf("%%f: %f\n%%g: %g\n%%e: %e\n%%E: %E\n", s1, s2, d3, d4); 14991 __ Printf("0x%" PRIx32 ", 0x%" PRIx64 "\n", w28, x28); 14992 __ Printf("%g\n", d10); 14993 __ Printf("%%%%%s%%%c%%\n", x2, w13); 14994 14995 // Print the stack pointer (sp). 14996 __ Printf("StackPointer(sp): 0x%016" PRIx64 ", 0x%08" PRIx32 "\n", 14997 __ StackPointer(), 14998 __ StackPointer().W()); 14999 15000 // Test with a different stack pointer. 15001 const Register old_stack_pointer = __ StackPointer(); 15002 __ Mov(x29, old_stack_pointer); 15003 __ SetStackPointer(x29); 15004 // Print the stack pointer (not sp). 15005 __ Printf("StackPointer(not sp): 0x%016" PRIx64 ", 0x%08" PRIx32 "\n", 15006 __ StackPointer(), 15007 __ StackPointer().W()); 15008 __ Mov(old_stack_pointer, __ StackPointer()); 15009 __ SetStackPointer(old_stack_pointer); 15010 15011 // Test with three arguments. 15012 __ Printf("3=%u, 4=%u, 5=%u\n", x10, x11, x12); 15013 15014 // Mixed argument types. 15015 __ Printf("w3: %" PRIu32 ", s1: %f, x5: %" PRIu64 ", d3: %f\n", 15016 w3, 15017 s1, 15018 x5, 15019 d3); 15020 __ Printf("s1: %f, d3: %f, w3: %" PRId32 ", x5: %" PRId64 "\n", 15021 s1, 15022 d3, 15023 w3, 15024 x5); 15025 15026 END(); 15027 RUN(); 15028 15029 // We cannot easily test the output of the Printf sequences, and because 15030 // Printf preserves all registers by default, we can't look at the number of 15031 // bytes that were printed. However, the printf_no_preserve test should check 15032 // that, and here we just test that we didn't clobber any registers. 15033 ASSERT_EQUAL_REGISTERS(before); 15034 15035 TEARDOWN(); 15036 } 15037 15038 15039 TEST(printf_no_preserve) { 15040 SETUP(); 15041 START(); 15042 15043 char const* test_plain_string = "Printf with no arguments.\n"; 15044 char const* test_substring = "'This is a substring.'"; 15045 15046 __ PrintfNoPreserve(test_plain_string); 15047 __ Mov(x19, x0); 15048 15049 // Test simple integer arguments. 15050 __ Mov(x0, 1234); 15051 __ Mov(x1, 0x1234); 15052 __ PrintfNoPreserve("x0: %" PRId64 ", x1: 0x%08" PRIx64 "\n", x0, x1); 15053 __ Mov(x20, x0); 15054 15055 // Test simple floating-point arguments. 15056 __ Fmov(d0, 1.234); 15057 __ PrintfNoPreserve("d0: %f\n", d0); 15058 __ Mov(x21, x0); 15059 15060 // Test pointer (string) arguments. 15061 __ Mov(x2, reinterpret_cast<uintptr_t>(test_substring)); 15062 __ PrintfNoPreserve("Test %%s: %s\n", x2); 15063 __ Mov(x22, x0); 15064 15065 // Test the maximum number of arguments, and sign extension. 15066 __ Mov(w3, 0xffffffff); 15067 __ Mov(w4, 0xffffffff); 15068 __ Mov(x5, 0xffffffffffffffff); 15069 __ Mov(x6, 0xffffffffffffffff); 15070 __ PrintfNoPreserve("w3(uint32): %" PRIu32 "\nw4(int32): %" PRId32 15071 "\n" 15072 "x5(uint64): %" PRIu64 "\nx6(int64): %" PRId64 "\n", 15073 w3, 15074 w4, 15075 x5, 15076 x6); 15077 __ Mov(x23, x0); 15078 15079 __ Fmov(s1, 1.234); 15080 __ Fmov(s2, 2.345); 15081 __ Fmov(d3, 3.456); 15082 __ Fmov(d4, 4.567); 15083 __ PrintfNoPreserve("%%f: %f\n%%g: %g\n%%e: %e\n%%E: %E\n", s1, s2, d3, d4); 15084 __ Mov(x24, x0); 15085 15086 // Test printing callee-saved registers. 15087 __ Mov(x28, 0x123456789abcdef); 15088 __ PrintfNoPreserve("0x%" PRIx32 ", 0x%" PRIx64 "\n", w28, x28); 15089 __ Mov(x25, x0); 15090 15091 __ Fmov(d10, 42.0); 15092 __ PrintfNoPreserve("%g\n", d10); 15093 __ Mov(x26, x0); 15094 15095 // Test with a different stack pointer. 15096 const Register old_stack_pointer = __ StackPointer(); 15097 __ Mov(x29, old_stack_pointer); 15098 __ SetStackPointer(x29); 15099 // Print the stack pointer (not sp). 15100 __ PrintfNoPreserve("StackPointer(not sp): 0x%016" PRIx64 ", 0x%08" PRIx32 15101 "\n", 15102 __ StackPointer(), 15103 __ StackPointer().W()); 15104 __ Mov(x27, x0); 15105 __ Mov(old_stack_pointer, __ StackPointer()); 15106 __ SetStackPointer(old_stack_pointer); 15107 15108 // Test with three arguments. 15109 __ Mov(x3, 3); 15110 __ Mov(x4, 40); 15111 __ Mov(x5, 500); 15112 __ PrintfNoPreserve("3=%u, 4=%u, 5=%u\n", x3, x4, x5); 15113 __ Mov(x28, x0); 15114 15115 // Mixed argument types. 15116 __ Mov(w3, 0xffffffff); 15117 __ Fmov(s1, 1.234); 15118 __ Mov(x5, 0xffffffffffffffff); 15119 __ Fmov(d3, 3.456); 15120 __ PrintfNoPreserve("w3: %" PRIu32 ", s1: %f, x5: %" PRIu64 ", d3: %f\n", 15121 w3, 15122 s1, 15123 x5, 15124 d3); 15125 __ Mov(x29, x0); 15126 15127 END(); 15128 RUN(); 15129 15130 // We cannot easily test the exact output of the Printf sequences, but we can 15131 // use the return code to check that the string length was correct. 15132 15133 // Printf with no arguments. 15134 ASSERT_EQUAL_64(strlen(test_plain_string), x19); 15135 // x0: 1234, x1: 0x00001234 15136 ASSERT_EQUAL_64(25, x20); 15137 // d0: 1.234000 15138 ASSERT_EQUAL_64(13, x21); 15139 // Test %s: 'This is a substring.' 15140 ASSERT_EQUAL_64(32, x22); 15141 // w3(uint32): 4294967295 15142 // w4(int32): -1 15143 // x5(uint64): 18446744073709551615 15144 // x6(int64): -1 15145 ASSERT_EQUAL_64(23 + 14 + 33 + 14, x23); 15146 // %f: 1.234000 15147 // %g: 2.345 15148 // %e: 3.456000e+00 15149 // %E: 4.567000E+00 15150 ASSERT_EQUAL_64(13 + 10 + 17 + 17, x24); 15151 // 0x89abcdef, 0x123456789abcdef 15152 ASSERT_EQUAL_64(30, x25); 15153 // 42 15154 ASSERT_EQUAL_64(3, x26); 15155 // StackPointer(not sp): 0x00007fb037ae2370, 0x37ae2370 15156 // Note: This is an example value, but the field width is fixed here so the 15157 // string length is still predictable. 15158 ASSERT_EQUAL_64(53, x27); 15159 // 3=3, 4=40, 5=500 15160 ASSERT_EQUAL_64(17, x28); 15161 // w3: 4294967295, s1: 1.234000, x5: 18446744073709551615, d3: 3.456000 15162 ASSERT_EQUAL_64(69, x29); 15163 15164 TEARDOWN(); 15165 } 15166 15167 15168 #ifndef VIXL_INCLUDE_SIMULATOR_AARCH64 15169 TEST(trace) { 15170 // The Trace helper should not generate any code unless the simulator (or 15171 // debugger) is being used. 15172 SETUP(); 15173 START(); 15174 15175 Label start; 15176 __ Bind(&start); 15177 __ Trace(LOG_ALL, TRACE_ENABLE); 15178 __ Trace(LOG_ALL, TRACE_DISABLE); 15179 VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&start) == 0); 15180 15181 END(); 15182 TEARDOWN(); 15183 } 15184 #endif 15185 15186 15187 #ifndef VIXL_INCLUDE_SIMULATOR_AARCH64 15188 TEST(log) { 15189 // The Log helper should not generate any code unless the simulator (or 15190 // debugger) is being used. 15191 SETUP(); 15192 START(); 15193 15194 Label start; 15195 __ Bind(&start); 15196 __ Log(LOG_ALL); 15197 VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&start) == 0); 15198 15199 END(); 15200 TEARDOWN(); 15201 } 15202 #endif 15203 15204 15205 TEST(blr_lr) { 15206 // A simple test to check that the simulator correcty handle "blr lr". 15207 SETUP(); 15208 15209 START(); 15210 Label target; 15211 Label end; 15212 15213 __ Mov(x0, 0x0); 15214 __ Adr(lr, &target); 15215 15216 __ Blr(lr); 15217 __ Mov(x0, 0xdeadbeef); 15218 __ B(&end); 15219 15220 __ Bind(&target); 15221 __ Mov(x0, 0xc001c0de); 15222 15223 __ Bind(&end); 15224 END(); 15225 15226 RUN(); 15227 15228 ASSERT_EQUAL_64(0xc001c0de, x0); 15229 15230 TEARDOWN(); 15231 } 15232 15233 15234 TEST(barriers) { 15235 // Generate all supported barriers, this is just a smoke test 15236 SETUP(); 15237 15238 START(); 15239 15240 // DMB 15241 __ Dmb(FullSystem, BarrierAll); 15242 __ Dmb(FullSystem, BarrierReads); 15243 __ Dmb(FullSystem, BarrierWrites); 15244 __ Dmb(FullSystem, BarrierOther); 15245 15246 __ Dmb(InnerShareable, BarrierAll); 15247 __ Dmb(InnerShareable, BarrierReads); 15248 __ Dmb(InnerShareable, BarrierWrites); 15249 __ Dmb(InnerShareable, BarrierOther); 15250 15251 __ Dmb(NonShareable, BarrierAll); 15252 __ Dmb(NonShareable, BarrierReads); 15253 __ Dmb(NonShareable, BarrierWrites); 15254 __ Dmb(NonShareable, BarrierOther); 15255 15256 __ Dmb(OuterShareable, BarrierAll); 15257 __ Dmb(OuterShareable, BarrierReads); 15258 __ Dmb(OuterShareable, BarrierWrites); 15259 __ Dmb(OuterShareable, BarrierOther); 15260 15261 // DSB 15262 __ Dsb(FullSystem, BarrierAll); 15263 __ Dsb(FullSystem, BarrierReads); 15264 __ Dsb(FullSystem, BarrierWrites); 15265 __ Dsb(FullSystem, BarrierOther); 15266 15267 __ Dsb(InnerShareable, BarrierAll); 15268 __ Dsb(InnerShareable, BarrierReads); 15269 __ Dsb(InnerShareable, BarrierWrites); 15270 __ Dsb(InnerShareable, BarrierOther); 15271 15272 __ Dsb(NonShareable, BarrierAll); 15273 __ Dsb(NonShareable, BarrierReads); 15274 __ Dsb(NonShareable, BarrierWrites); 15275 __ Dsb(NonShareable, BarrierOther); 15276 15277 __ Dsb(OuterShareable, BarrierAll); 15278 __ Dsb(OuterShareable, BarrierReads); 15279 __ Dsb(OuterShareable, BarrierWrites); 15280 __ Dsb(OuterShareable, BarrierOther); 15281 15282 // ISB 15283 __ Isb(); 15284 15285 END(); 15286 15287 RUN(); 15288 15289 TEARDOWN(); 15290 } 15291 15292 15293 TEST(process_nan_double) { 15294 // Make sure that NaN propagation works correctly. 15295 double sn = RawbitsToDouble(0x7ff5555511111111); 15296 double qn = RawbitsToDouble(0x7ffaaaaa11111111); 15297 VIXL_ASSERT(IsSignallingNaN(sn)); 15298 VIXL_ASSERT(IsQuietNaN(qn)); 15299 15300 // The input NaNs after passing through ProcessNaN. 15301 double sn_proc = RawbitsToDouble(0x7ffd555511111111); 15302 double qn_proc = qn; 15303 VIXL_ASSERT(IsQuietNaN(sn_proc)); 15304 VIXL_ASSERT(IsQuietNaN(qn_proc)); 15305 15306 SETUP(); 15307 START(); 15308 15309 // Execute a number of instructions which all use ProcessNaN, and check that 15310 // they all handle the NaN correctly. 15311 __ Fmov(d0, sn); 15312 __ Fmov(d10, qn); 15313 15314 // Operations that always propagate NaNs unchanged, even signalling NaNs. 15315 // - Signalling NaN 15316 __ Fmov(d1, d0); 15317 __ Fabs(d2, d0); 15318 __ Fneg(d3, d0); 15319 // - Quiet NaN 15320 __ Fmov(d11, d10); 15321 __ Fabs(d12, d10); 15322 __ Fneg(d13, d10); 15323 15324 // Operations that use ProcessNaN. 15325 // - Signalling NaN 15326 __ Fsqrt(d4, d0); 15327 __ Frinta(d5, d0); 15328 __ Frintn(d6, d0); 15329 __ Frintz(d7, d0); 15330 // - Quiet NaN 15331 __ Fsqrt(d14, d10); 15332 __ Frinta(d15, d10); 15333 __ Frintn(d16, d10); 15334 __ Frintz(d17, d10); 15335 15336 // The behaviour of fcvt is checked in TEST(fcvt_sd). 15337 15338 END(); 15339 RUN(); 15340 15341 uint64_t qn_raw = DoubleToRawbits(qn); 15342 uint64_t sn_raw = DoubleToRawbits(sn); 15343 15344 // - Signalling NaN 15345 ASSERT_EQUAL_FP64(sn, d1); 15346 ASSERT_EQUAL_FP64(RawbitsToDouble(sn_raw & ~kDSignMask), d2); 15347 ASSERT_EQUAL_FP64(RawbitsToDouble(sn_raw ^ kDSignMask), d3); 15348 // - Quiet NaN 15349 ASSERT_EQUAL_FP64(qn, d11); 15350 ASSERT_EQUAL_FP64(RawbitsToDouble(qn_raw & ~kDSignMask), d12); 15351 ASSERT_EQUAL_FP64(RawbitsToDouble(qn_raw ^ kDSignMask), d13); 15352 15353 // - Signalling NaN 15354 ASSERT_EQUAL_FP64(sn_proc, d4); 15355 ASSERT_EQUAL_FP64(sn_proc, d5); 15356 ASSERT_EQUAL_FP64(sn_proc, d6); 15357 ASSERT_EQUAL_FP64(sn_proc, d7); 15358 // - Quiet NaN 15359 ASSERT_EQUAL_FP64(qn_proc, d14); 15360 ASSERT_EQUAL_FP64(qn_proc, d15); 15361 ASSERT_EQUAL_FP64(qn_proc, d16); 15362 ASSERT_EQUAL_FP64(qn_proc, d17); 15363 15364 TEARDOWN(); 15365 } 15366 15367 15368 TEST(process_nan_float) { 15369 // Make sure that NaN propagation works correctly. 15370 float sn = RawbitsToFloat(0x7f951111); 15371 float qn = RawbitsToFloat(0x7fea1111); 15372 VIXL_ASSERT(IsSignallingNaN(sn)); 15373 VIXL_ASSERT(IsQuietNaN(qn)); 15374 15375 // The input NaNs after passing through ProcessNaN. 15376 float sn_proc = RawbitsToFloat(0x7fd51111); 15377 float qn_proc = qn; 15378 VIXL_ASSERT(IsQuietNaN(sn_proc)); 15379 VIXL_ASSERT(IsQuietNaN(qn_proc)); 15380 15381 SETUP(); 15382 START(); 15383 15384 // Execute a number of instructions which all use ProcessNaN, and check that 15385 // they all handle the NaN correctly. 15386 __ Fmov(s0, sn); 15387 __ Fmov(s10, qn); 15388 15389 // Operations that always propagate NaNs unchanged, even signalling NaNs. 15390 // - Signalling NaN 15391 __ Fmov(s1, s0); 15392 __ Fabs(s2, s0); 15393 __ Fneg(s3, s0); 15394 // - Quiet NaN 15395 __ Fmov(s11, s10); 15396 __ Fabs(s12, s10); 15397 __ Fneg(s13, s10); 15398 15399 // Operations that use ProcessNaN. 15400 // - Signalling NaN 15401 __ Fsqrt(s4, s0); 15402 __ Frinta(s5, s0); 15403 __ Frintn(s6, s0); 15404 __ Frintz(s7, s0); 15405 // - Quiet NaN 15406 __ Fsqrt(s14, s10); 15407 __ Frinta(s15, s10); 15408 __ Frintn(s16, s10); 15409 __ Frintz(s17, s10); 15410 15411 // The behaviour of fcvt is checked in TEST(fcvt_sd). 15412 15413 END(); 15414 RUN(); 15415 15416 uint32_t qn_raw = FloatToRawbits(qn); 15417 uint32_t sn_raw = FloatToRawbits(sn); 15418 15419 // - Signalling NaN 15420 ASSERT_EQUAL_FP32(sn, s1); 15421 ASSERT_EQUAL_FP32(RawbitsToFloat(sn_raw & ~kSSignMask), s2); 15422 ASSERT_EQUAL_FP32(RawbitsToFloat(sn_raw ^ kSSignMask), s3); 15423 // - Quiet NaN 15424 ASSERT_EQUAL_FP32(qn, s11); 15425 ASSERT_EQUAL_FP32(RawbitsToFloat(qn_raw & ~kSSignMask), s12); 15426 ASSERT_EQUAL_FP32(RawbitsToFloat(qn_raw ^ kSSignMask), s13); 15427 15428 // - Signalling NaN 15429 ASSERT_EQUAL_FP32(sn_proc, s4); 15430 ASSERT_EQUAL_FP32(sn_proc, s5); 15431 ASSERT_EQUAL_FP32(sn_proc, s6); 15432 ASSERT_EQUAL_FP32(sn_proc, s7); 15433 // - Quiet NaN 15434 ASSERT_EQUAL_FP32(qn_proc, s14); 15435 ASSERT_EQUAL_FP32(qn_proc, s15); 15436 ASSERT_EQUAL_FP32(qn_proc, s16); 15437 ASSERT_EQUAL_FP32(qn_proc, s17); 15438 15439 TEARDOWN(); 15440 } 15441 15442 15443 static void ProcessNaNsHelper(double n, double m, double expected) { 15444 VIXL_ASSERT(std::isnan(n) || std::isnan(m)); 15445 VIXL_ASSERT(std::isnan(expected)); 15446 15447 SETUP(); 15448 START(); 15449 15450 // Execute a number of instructions which all use ProcessNaNs, and check that 15451 // they all propagate NaNs correctly. 15452 __ Fmov(d0, n); 15453 __ Fmov(d1, m); 15454 15455 __ Fadd(d2, d0, d1); 15456 __ Fsub(d3, d0, d1); 15457 __ Fmul(d4, d0, d1); 15458 __ Fdiv(d5, d0, d1); 15459 __ Fmax(d6, d0, d1); 15460 __ Fmin(d7, d0, d1); 15461 15462 END(); 15463 RUN(); 15464 15465 ASSERT_EQUAL_FP64(expected, d2); 15466 ASSERT_EQUAL_FP64(expected, d3); 15467 ASSERT_EQUAL_FP64(expected, d4); 15468 ASSERT_EQUAL_FP64(expected, d5); 15469 ASSERT_EQUAL_FP64(expected, d6); 15470 ASSERT_EQUAL_FP64(expected, d7); 15471 15472 TEARDOWN(); 15473 } 15474 15475 15476 TEST(process_nans_double) { 15477 // Make sure that NaN propagation works correctly. 15478 double sn = RawbitsToDouble(0x7ff5555511111111); 15479 double sm = RawbitsToDouble(0x7ff5555522222222); 15480 double qn = RawbitsToDouble(0x7ffaaaaa11111111); 15481 double qm = RawbitsToDouble(0x7ffaaaaa22222222); 15482 VIXL_ASSERT(IsSignallingNaN(sn)); 15483 VIXL_ASSERT(IsSignallingNaN(sm)); 15484 VIXL_ASSERT(IsQuietNaN(qn)); 15485 VIXL_ASSERT(IsQuietNaN(qm)); 15486 15487 // The input NaNs after passing through ProcessNaN. 15488 double sn_proc = RawbitsToDouble(0x7ffd555511111111); 15489 double sm_proc = RawbitsToDouble(0x7ffd555522222222); 15490 double qn_proc = qn; 15491 double qm_proc = qm; 15492 VIXL_ASSERT(IsQuietNaN(sn_proc)); 15493 VIXL_ASSERT(IsQuietNaN(sm_proc)); 15494 VIXL_ASSERT(IsQuietNaN(qn_proc)); 15495 VIXL_ASSERT(IsQuietNaN(qm_proc)); 15496 15497 // Quiet NaNs are propagated. 15498 ProcessNaNsHelper(qn, 0, qn_proc); 15499 ProcessNaNsHelper(0, qm, qm_proc); 15500 ProcessNaNsHelper(qn, qm, qn_proc); 15501 15502 // Signalling NaNs are propagated, and made quiet. 15503 ProcessNaNsHelper(sn, 0, sn_proc); 15504 ProcessNaNsHelper(0, sm, sm_proc); 15505 ProcessNaNsHelper(sn, sm, sn_proc); 15506 15507 // Signalling NaNs take precedence over quiet NaNs. 15508 ProcessNaNsHelper(sn, qm, sn_proc); 15509 ProcessNaNsHelper(qn, sm, sm_proc); 15510 ProcessNaNsHelper(sn, sm, sn_proc); 15511 } 15512 15513 15514 static void ProcessNaNsHelper(float n, float m, float expected) { 15515 VIXL_ASSERT(std::isnan(n) || std::isnan(m)); 15516 VIXL_ASSERT(std::isnan(expected)); 15517 15518 SETUP(); 15519 START(); 15520 15521 // Execute a number of instructions which all use ProcessNaNs, and check that 15522 // they all propagate NaNs correctly. 15523 __ Fmov(s0, n); 15524 __ Fmov(s1, m); 15525 15526 __ Fadd(s2, s0, s1); 15527 __ Fsub(s3, s0, s1); 15528 __ Fmul(s4, s0, s1); 15529 __ Fdiv(s5, s0, s1); 15530 __ Fmax(s6, s0, s1); 15531 __ Fmin(s7, s0, s1); 15532 15533 END(); 15534 RUN(); 15535 15536 ASSERT_EQUAL_FP32(expected, s2); 15537 ASSERT_EQUAL_FP32(expected, s3); 15538 ASSERT_EQUAL_FP32(expected, s4); 15539 ASSERT_EQUAL_FP32(expected, s5); 15540 ASSERT_EQUAL_FP32(expected, s6); 15541 ASSERT_EQUAL_FP32(expected, s7); 15542 15543 TEARDOWN(); 15544 } 15545 15546 15547 TEST(process_nans_float) { 15548 // Make sure that NaN propagation works correctly. 15549 float sn = RawbitsToFloat(0x7f951111); 15550 float sm = RawbitsToFloat(0x7f952222); 15551 float qn = RawbitsToFloat(0x7fea1111); 15552 float qm = RawbitsToFloat(0x7fea2222); 15553 VIXL_ASSERT(IsSignallingNaN(sn)); 15554 VIXL_ASSERT(IsSignallingNaN(sm)); 15555 VIXL_ASSERT(IsQuietNaN(qn)); 15556 VIXL_ASSERT(IsQuietNaN(qm)); 15557 15558 // The input NaNs after passing through ProcessNaN. 15559 float sn_proc = RawbitsToFloat(0x7fd51111); 15560 float sm_proc = RawbitsToFloat(0x7fd52222); 15561 float qn_proc = qn; 15562 float qm_proc = qm; 15563 VIXL_ASSERT(IsQuietNaN(sn_proc)); 15564 VIXL_ASSERT(IsQuietNaN(sm_proc)); 15565 VIXL_ASSERT(IsQuietNaN(qn_proc)); 15566 VIXL_ASSERT(IsQuietNaN(qm_proc)); 15567 15568 // Quiet NaNs are propagated. 15569 ProcessNaNsHelper(qn, 0, qn_proc); 15570 ProcessNaNsHelper(0, qm, qm_proc); 15571 ProcessNaNsHelper(qn, qm, qn_proc); 15572 15573 // Signalling NaNs are propagated, and made quiet. 15574 ProcessNaNsHelper(sn, 0, sn_proc); 15575 ProcessNaNsHelper(0, sm, sm_proc); 15576 ProcessNaNsHelper(sn, sm, sn_proc); 15577 15578 // Signalling NaNs take precedence over quiet NaNs. 15579 ProcessNaNsHelper(sn, qm, sn_proc); 15580 ProcessNaNsHelper(qn, sm, sm_proc); 15581 ProcessNaNsHelper(sn, sm, sn_proc); 15582 } 15583 15584 15585 static void DefaultNaNHelper(float n, float m, float a) { 15586 VIXL_ASSERT(std::isnan(n) || std::isnan(m) || std::isnan(a)); 15587 15588 bool test_1op = std::isnan(n); 15589 bool test_2op = std::isnan(n) || std::isnan(m); 15590 15591 SETUP(); 15592 START(); 15593 15594 // Enable Default-NaN mode in the FPCR. 15595 __ Mrs(x0, FPCR); 15596 __ Orr(x1, x0, DN_mask); 15597 __ Msr(FPCR, x1); 15598 15599 // Execute a number of instructions which all use ProcessNaNs, and check that 15600 // they all produce the default NaN. 15601 __ Fmov(s0, n); 15602 __ Fmov(s1, m); 15603 __ Fmov(s2, a); 15604 15605 if (test_1op) { 15606 // Operations that always propagate NaNs unchanged, even signalling NaNs. 15607 __ Fmov(s10, s0); 15608 __ Fabs(s11, s0); 15609 __ Fneg(s12, s0); 15610 15611 // Operations that use ProcessNaN. 15612 __ Fsqrt(s13, s0); 15613 __ Frinta(s14, s0); 15614 __ Frintn(s15, s0); 15615 __ Frintz(s16, s0); 15616 15617 // Fcvt usually has special NaN handling, but it respects default-NaN mode. 15618 __ Fcvt(d17, s0); 15619 } 15620 15621 if (test_2op) { 15622 __ Fadd(s18, s0, s1); 15623 __ Fsub(s19, s0, s1); 15624 __ Fmul(s20, s0, s1); 15625 __ Fdiv(s21, s0, s1); 15626 __ Fmax(s22, s0, s1); 15627 __ Fmin(s23, s0, s1); 15628 } 15629 15630 __ Fmadd(s24, s0, s1, s2); 15631 __ Fmsub(s25, s0, s1, s2); 15632 __ Fnmadd(s26, s0, s1, s2); 15633 __ Fnmsub(s27, s0, s1, s2); 15634 15635 // Restore FPCR. 15636 __ Msr(FPCR, x0); 15637 15638 END(); 15639 RUN(); 15640 15641 if (test_1op) { 15642 uint32_t n_raw = FloatToRawbits(n); 15643 ASSERT_EQUAL_FP32(n, s10); 15644 ASSERT_EQUAL_FP32(RawbitsToFloat(n_raw & ~kSSignMask), s11); 15645 ASSERT_EQUAL_FP32(RawbitsToFloat(n_raw ^ kSSignMask), s12); 15646 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s13); 15647 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s14); 15648 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s15); 15649 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s16); 15650 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d17); 15651 } 15652 15653 if (test_2op) { 15654 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s18); 15655 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s19); 15656 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s20); 15657 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s21); 15658 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s22); 15659 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s23); 15660 } 15661 15662 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s24); 15663 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s25); 15664 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s26); 15665 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s27); 15666 15667 TEARDOWN(); 15668 } 15669 15670 15671 TEST(default_nan_float) { 15672 float sn = RawbitsToFloat(0x7f951111); 15673 float sm = RawbitsToFloat(0x7f952222); 15674 float sa = RawbitsToFloat(0x7f95aaaa); 15675 float qn = RawbitsToFloat(0x7fea1111); 15676 float qm = RawbitsToFloat(0x7fea2222); 15677 float qa = RawbitsToFloat(0x7feaaaaa); 15678 VIXL_ASSERT(IsSignallingNaN(sn)); 15679 VIXL_ASSERT(IsSignallingNaN(sm)); 15680 VIXL_ASSERT(IsSignallingNaN(sa)); 15681 VIXL_ASSERT(IsQuietNaN(qn)); 15682 VIXL_ASSERT(IsQuietNaN(qm)); 15683 VIXL_ASSERT(IsQuietNaN(qa)); 15684 15685 // - Signalling NaNs 15686 DefaultNaNHelper(sn, 0.0f, 0.0f); 15687 DefaultNaNHelper(0.0f, sm, 0.0f); 15688 DefaultNaNHelper(0.0f, 0.0f, sa); 15689 DefaultNaNHelper(sn, sm, 0.0f); 15690 DefaultNaNHelper(0.0f, sm, sa); 15691 DefaultNaNHelper(sn, 0.0f, sa); 15692 DefaultNaNHelper(sn, sm, sa); 15693 // - Quiet NaNs 15694 DefaultNaNHelper(qn, 0.0f, 0.0f); 15695 DefaultNaNHelper(0.0f, qm, 0.0f); 15696 DefaultNaNHelper(0.0f, 0.0f, qa); 15697 DefaultNaNHelper(qn, qm, 0.0f); 15698 DefaultNaNHelper(0.0f, qm, qa); 15699 DefaultNaNHelper(qn, 0.0f, qa); 15700 DefaultNaNHelper(qn, qm, qa); 15701 // - Mixed NaNs 15702 DefaultNaNHelper(qn, sm, sa); 15703 DefaultNaNHelper(sn, qm, sa); 15704 DefaultNaNHelper(sn, sm, qa); 15705 DefaultNaNHelper(qn, qm, sa); 15706 DefaultNaNHelper(sn, qm, qa); 15707 DefaultNaNHelper(qn, sm, qa); 15708 DefaultNaNHelper(qn, qm, qa); 15709 } 15710 15711 15712 static void DefaultNaNHelper(double n, double m, double a) { 15713 VIXL_ASSERT(std::isnan(n) || std::isnan(m) || std::isnan(a)); 15714 15715 bool test_1op = std::isnan(n); 15716 bool test_2op = std::isnan(n) || std::isnan(m); 15717 15718 SETUP(); 15719 START(); 15720 15721 // Enable Default-NaN mode in the FPCR. 15722 __ Mrs(x0, FPCR); 15723 __ Orr(x1, x0, DN_mask); 15724 __ Msr(FPCR, x1); 15725 15726 // Execute a number of instructions which all use ProcessNaNs, and check that 15727 // they all produce the default NaN. 15728 __ Fmov(d0, n); 15729 __ Fmov(d1, m); 15730 __ Fmov(d2, a); 15731 15732 if (test_1op) { 15733 // Operations that always propagate NaNs unchanged, even signalling NaNs. 15734 __ Fmov(d10, d0); 15735 __ Fabs(d11, d0); 15736 __ Fneg(d12, d0); 15737 15738 // Operations that use ProcessNaN. 15739 __ Fsqrt(d13, d0); 15740 __ Frinta(d14, d0); 15741 __ Frintn(d15, d0); 15742 __ Frintz(d16, d0); 15743 15744 // Fcvt usually has special NaN handling, but it respects default-NaN mode. 15745 __ Fcvt(s17, d0); 15746 } 15747 15748 if (test_2op) { 15749 __ Fadd(d18, d0, d1); 15750 __ Fsub(d19, d0, d1); 15751 __ Fmul(d20, d0, d1); 15752 __ Fdiv(d21, d0, d1); 15753 __ Fmax(d22, d0, d1); 15754 __ Fmin(d23, d0, d1); 15755 } 15756 15757 __ Fmadd(d24, d0, d1, d2); 15758 __ Fmsub(d25, d0, d1, d2); 15759 __ Fnmadd(d26, d0, d1, d2); 15760 __ Fnmsub(d27, d0, d1, d2); 15761 15762 // Restore FPCR. 15763 __ Msr(FPCR, x0); 15764 15765 END(); 15766 RUN(); 15767 15768 if (test_1op) { 15769 uint64_t n_raw = DoubleToRawbits(n); 15770 ASSERT_EQUAL_FP64(n, d10); 15771 ASSERT_EQUAL_FP64(RawbitsToDouble(n_raw & ~kDSignMask), d11); 15772 ASSERT_EQUAL_FP64(RawbitsToDouble(n_raw ^ kDSignMask), d12); 15773 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13); 15774 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d14); 15775 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d15); 15776 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d16); 15777 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s17); 15778 } 15779 15780 if (test_2op) { 15781 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d18); 15782 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d19); 15783 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d20); 15784 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d21); 15785 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d22); 15786 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d23); 15787 } 15788 15789 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d24); 15790 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d25); 15791 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d26); 15792 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d27); 15793 15794 TEARDOWN(); 15795 } 15796 15797 15798 TEST(default_nan_double) { 15799 double sn = RawbitsToDouble(0x7ff5555511111111); 15800 double sm = RawbitsToDouble(0x7ff5555522222222); 15801 double sa = RawbitsToDouble(0x7ff55555aaaaaaaa); 15802 double qn = RawbitsToDouble(0x7ffaaaaa11111111); 15803 double qm = RawbitsToDouble(0x7ffaaaaa22222222); 15804 double qa = RawbitsToDouble(0x7ffaaaaaaaaaaaaa); 15805 VIXL_ASSERT(IsSignallingNaN(sn)); 15806 VIXL_ASSERT(IsSignallingNaN(sm)); 15807 VIXL_ASSERT(IsSignallingNaN(sa)); 15808 VIXL_ASSERT(IsQuietNaN(qn)); 15809 VIXL_ASSERT(IsQuietNaN(qm)); 15810 VIXL_ASSERT(IsQuietNaN(qa)); 15811 15812 // - Signalling NaNs 15813 DefaultNaNHelper(sn, 0.0, 0.0); 15814 DefaultNaNHelper(0.0, sm, 0.0); 15815 DefaultNaNHelper(0.0, 0.0, sa); 15816 DefaultNaNHelper(sn, sm, 0.0); 15817 DefaultNaNHelper(0.0, sm, sa); 15818 DefaultNaNHelper(sn, 0.0, sa); 15819 DefaultNaNHelper(sn, sm, sa); 15820 // - Quiet NaNs 15821 DefaultNaNHelper(qn, 0.0, 0.0); 15822 DefaultNaNHelper(0.0, qm, 0.0); 15823 DefaultNaNHelper(0.0, 0.0, qa); 15824 DefaultNaNHelper(qn, qm, 0.0); 15825 DefaultNaNHelper(0.0, qm, qa); 15826 DefaultNaNHelper(qn, 0.0, qa); 15827 DefaultNaNHelper(qn, qm, qa); 15828 // - Mixed NaNs 15829 DefaultNaNHelper(qn, sm, sa); 15830 DefaultNaNHelper(sn, qm, sa); 15831 DefaultNaNHelper(sn, sm, qa); 15832 DefaultNaNHelper(qn, qm, sa); 15833 DefaultNaNHelper(sn, qm, qa); 15834 DefaultNaNHelper(qn, sm, qa); 15835 DefaultNaNHelper(qn, qm, qa); 15836 } 15837 15838 15839 TEST(ldar_stlr) { 15840 // The middle value is read, modified, and written. The padding exists only to 15841 // check for over-write. 15842 uint8_t b[] = {0, 0x12, 0}; 15843 uint16_t h[] = {0, 0x1234, 0}; 15844 uint32_t w[] = {0, 0x12345678, 0}; 15845 uint64_t x[] = {0, 0x123456789abcdef0, 0}; 15846 15847 SETUP(); 15848 START(); 15849 15850 __ Mov(x10, reinterpret_cast<uintptr_t>(&b[1])); 15851 __ Ldarb(w0, MemOperand(x10)); 15852 __ Add(w0, w0, 1); 15853 __ Stlrb(w0, MemOperand(x10)); 15854 15855 __ Mov(x10, reinterpret_cast<uintptr_t>(&h[1])); 15856 __ Ldarh(w0, MemOperand(x10)); 15857 __ Add(w0, w0, 1); 15858 __ Stlrh(w0, MemOperand(x10)); 15859 15860 __ Mov(x10, reinterpret_cast<uintptr_t>(&w[1])); 15861 __ Ldar(w0, MemOperand(x10)); 15862 __ Add(w0, w0, 1); 15863 __ Stlr(w0, MemOperand(x10)); 15864 15865 __ Mov(x10, reinterpret_cast<uintptr_t>(&x[1])); 15866 __ Ldar(x0, MemOperand(x10)); 15867 __ Add(x0, x0, 1); 15868 __ Stlr(x0, MemOperand(x10)); 15869 15870 END(); 15871 RUN(); 15872 15873 ASSERT_EQUAL_32(0x13, b[1]); 15874 ASSERT_EQUAL_32(0x1235, h[1]); 15875 ASSERT_EQUAL_32(0x12345679, w[1]); 15876 ASSERT_EQUAL_64(0x123456789abcdef1, x[1]); 15877 15878 // Check for over-write. 15879 ASSERT_EQUAL_32(0, b[0]); 15880 ASSERT_EQUAL_32(0, b[2]); 15881 ASSERT_EQUAL_32(0, h[0]); 15882 ASSERT_EQUAL_32(0, h[2]); 15883 ASSERT_EQUAL_32(0, w[0]); 15884 ASSERT_EQUAL_32(0, w[2]); 15885 ASSERT_EQUAL_64(0, x[0]); 15886 ASSERT_EQUAL_64(0, x[2]); 15887 15888 TEARDOWN(); 15889 } 15890 15891 15892 TEST(ldxr_stxr) { 15893 // The middle value is read, modified, and written. The padding exists only to 15894 // check for over-write. 15895 uint8_t b[] = {0, 0x12, 0}; 15896 uint16_t h[] = {0, 0x1234, 0}; 15897 uint32_t w[] = {0, 0x12345678, 0}; 15898 uint64_t x[] = {0, 0x123456789abcdef0, 0}; 15899 15900 // As above, but get suitably-aligned values for ldxp and stxp. 15901 uint32_t wp_data[] = {0, 0, 0, 0, 0}; 15902 uint32_t* wp = AlignUp(wp_data + 1, kWRegSizeInBytes * 2) - 1; 15903 wp[1] = 0x12345678; // wp[1] is 64-bit-aligned. 15904 wp[2] = 0x87654321; 15905 uint64_t xp_data[] = {0, 0, 0, 0, 0}; 15906 uint64_t* xp = AlignUp(xp_data + 1, kXRegSizeInBytes * 2) - 1; 15907 xp[1] = 0x123456789abcdef0; // xp[1] is 128-bit-aligned. 15908 xp[2] = 0x0fedcba987654321; 15909 15910 SETUP(); 15911 START(); 15912 15913 __ Mov(x10, reinterpret_cast<uintptr_t>(&b[1])); 15914 Label try_b; 15915 __ Bind(&try_b); 15916 __ Ldxrb(w0, MemOperand(x10)); 15917 __ Add(w0, w0, 1); 15918 __ Stxrb(w5, w0, MemOperand(x10)); 15919 __ Cbnz(w5, &try_b); 15920 15921 __ Mov(x10, reinterpret_cast<uintptr_t>(&h[1])); 15922 Label try_h; 15923 __ Bind(&try_h); 15924 __ Ldxrh(w0, MemOperand(x10)); 15925 __ Add(w0, w0, 1); 15926 __ Stxrh(w5, w0, MemOperand(x10)); 15927 __ Cbnz(w5, &try_h); 15928 15929 __ Mov(x10, reinterpret_cast<uintptr_t>(&w[1])); 15930 Label try_w; 15931 __ Bind(&try_w); 15932 __ Ldxr(w0, MemOperand(x10)); 15933 __ Add(w0, w0, 1); 15934 __ Stxr(w5, w0, MemOperand(x10)); 15935 __ Cbnz(w5, &try_w); 15936 15937 __ Mov(x10, reinterpret_cast<uintptr_t>(&x[1])); 15938 Label try_x; 15939 __ Bind(&try_x); 15940 __ Ldxr(x0, MemOperand(x10)); 15941 __ Add(x0, x0, 1); 15942 __ Stxr(w5, x0, MemOperand(x10)); 15943 __ Cbnz(w5, &try_x); 15944 15945 __ Mov(x10, reinterpret_cast<uintptr_t>(&wp[1])); 15946 Label try_wp; 15947 __ Bind(&try_wp); 15948 __ Ldxp(w0, w1, MemOperand(x10)); 15949 __ Add(w0, w0, 1); 15950 __ Add(w1, w1, 1); 15951 __ Stxp(w5, w0, w1, MemOperand(x10)); 15952 __ Cbnz(w5, &try_wp); 15953 15954 __ Mov(x10, reinterpret_cast<uintptr_t>(&xp[1])); 15955 Label try_xp; 15956 __ Bind(&try_xp); 15957 __ Ldxp(x0, x1, MemOperand(x10)); 15958 __ Add(x0, x0, 1); 15959 __ Add(x1, x1, 1); 15960 __ Stxp(w5, x0, x1, MemOperand(x10)); 15961 __ Cbnz(w5, &try_xp); 15962 15963 END(); 15964 RUN(); 15965 15966 ASSERT_EQUAL_32(0x13, b[1]); 15967 ASSERT_EQUAL_32(0x1235, h[1]); 15968 ASSERT_EQUAL_32(0x12345679, w[1]); 15969 ASSERT_EQUAL_64(0x123456789abcdef1, x[1]); 15970 ASSERT_EQUAL_32(0x12345679, wp[1]); 15971 ASSERT_EQUAL_32(0x87654322, wp[2]); 15972 ASSERT_EQUAL_64(0x123456789abcdef1, xp[1]); 15973 ASSERT_EQUAL_64(0x0fedcba987654322, xp[2]); 15974 15975 // Check for over-write. 15976 ASSERT_EQUAL_32(0, b[0]); 15977 ASSERT_EQUAL_32(0, b[2]); 15978 ASSERT_EQUAL_32(0, h[0]); 15979 ASSERT_EQUAL_32(0, h[2]); 15980 ASSERT_EQUAL_32(0, w[0]); 15981 ASSERT_EQUAL_32(0, w[2]); 15982 ASSERT_EQUAL_64(0, x[0]); 15983 ASSERT_EQUAL_64(0, x[2]); 15984 ASSERT_EQUAL_32(0, wp[0]); 15985 ASSERT_EQUAL_32(0, wp[3]); 15986 ASSERT_EQUAL_64(0, xp[0]); 15987 ASSERT_EQUAL_64(0, xp[3]); 15988 15989 TEARDOWN(); 15990 } 15991 15992 15993 TEST(ldaxr_stlxr) { 15994 // The middle value is read, modified, and written. The padding exists only to 15995 // check for over-write. 15996 uint8_t b[] = {0, 0x12, 0}; 15997 uint16_t h[] = {0, 0x1234, 0}; 15998 uint32_t w[] = {0, 0x12345678, 0}; 15999 uint64_t x[] = {0, 0x123456789abcdef0, 0}; 16000 16001 // As above, but get suitably-aligned values for ldxp and stxp. 16002 uint32_t wp_data[] = {0, 0, 0, 0, 0}; 16003 uint32_t* wp = AlignUp(wp_data + 1, kWRegSizeInBytes * 2) - 1; 16004 wp[1] = 0x12345678; // wp[1] is 64-bit-aligned. 16005 wp[2] = 0x87654321; 16006 uint64_t xp_data[] = {0, 0, 0, 0, 0}; 16007 uint64_t* xp = AlignUp(xp_data + 1, kXRegSizeInBytes * 2) - 1; 16008 xp[1] = 0x123456789abcdef0; // xp[1] is 128-bit-aligned. 16009 xp[2] = 0x0fedcba987654321; 16010 16011 SETUP(); 16012 START(); 16013 16014 __ Mov(x10, reinterpret_cast<uintptr_t>(&b[1])); 16015 Label try_b; 16016 __ Bind(&try_b); 16017 __ Ldaxrb(w0, MemOperand(x10)); 16018 __ Add(w0, w0, 1); 16019 __ Stlxrb(w5, w0, MemOperand(x10)); 16020 __ Cbnz(w5, &try_b); 16021 16022 __ Mov(x10, reinterpret_cast<uintptr_t>(&h[1])); 16023 Label try_h; 16024 __ Bind(&try_h); 16025 __ Ldaxrh(w0, MemOperand(x10)); 16026 __ Add(w0, w0, 1); 16027 __ Stlxrh(w5, w0, MemOperand(x10)); 16028 __ Cbnz(w5, &try_h); 16029 16030 __ Mov(x10, reinterpret_cast<uintptr_t>(&w[1])); 16031 Label try_w; 16032 __ Bind(&try_w); 16033 __ Ldaxr(w0, MemOperand(x10)); 16034 __ Add(w0, w0, 1); 16035 __ Stlxr(w5, w0, MemOperand(x10)); 16036 __ Cbnz(w5, &try_w); 16037 16038 __ Mov(x10, reinterpret_cast<uintptr_t>(&x[1])); 16039 Label try_x; 16040 __ Bind(&try_x); 16041 __ Ldaxr(x0, MemOperand(x10)); 16042 __ Add(x0, x0, 1); 16043 __ Stlxr(w5, x0, MemOperand(x10)); 16044 __ Cbnz(w5, &try_x); 16045 16046 __ Mov(x10, reinterpret_cast<uintptr_t>(&wp[1])); 16047 Label try_wp; 16048 __ Bind(&try_wp); 16049 __ Ldaxp(w0, w1, MemOperand(x10)); 16050 __ Add(w0, w0, 1); 16051 __ Add(w1, w1, 1); 16052 __ Stlxp(w5, w0, w1, MemOperand(x10)); 16053 __ Cbnz(w5, &try_wp); 16054 16055 __ Mov(x10, reinterpret_cast<uintptr_t>(&xp[1])); 16056 Label try_xp; 16057 __ Bind(&try_xp); 16058 __ Ldaxp(x0, x1, MemOperand(x10)); 16059 __ Add(x0, x0, 1); 16060 __ Add(x1, x1, 1); 16061 __ Stlxp(w5, x0, x1, MemOperand(x10)); 16062 __ Cbnz(w5, &try_xp); 16063 16064 END(); 16065 RUN(); 16066 16067 ASSERT_EQUAL_32(0x13, b[1]); 16068 ASSERT_EQUAL_32(0x1235, h[1]); 16069 ASSERT_EQUAL_32(0x12345679, w[1]); 16070 ASSERT_EQUAL_64(0x123456789abcdef1, x[1]); 16071 ASSERT_EQUAL_32(0x12345679, wp[1]); 16072 ASSERT_EQUAL_32(0x87654322, wp[2]); 16073 ASSERT_EQUAL_64(0x123456789abcdef1, xp[1]); 16074 ASSERT_EQUAL_64(0x0fedcba987654322, xp[2]); 16075 16076 // Check for over-write. 16077 ASSERT_EQUAL_32(0, b[0]); 16078 ASSERT_EQUAL_32(0, b[2]); 16079 ASSERT_EQUAL_32(0, h[0]); 16080 ASSERT_EQUAL_32(0, h[2]); 16081 ASSERT_EQUAL_32(0, w[0]); 16082 ASSERT_EQUAL_32(0, w[2]); 16083 ASSERT_EQUAL_64(0, x[0]); 16084 ASSERT_EQUAL_64(0, x[2]); 16085 ASSERT_EQUAL_32(0, wp[0]); 16086 ASSERT_EQUAL_32(0, wp[3]); 16087 ASSERT_EQUAL_64(0, xp[0]); 16088 ASSERT_EQUAL_64(0, xp[3]); 16089 16090 TEARDOWN(); 16091 } 16092 16093 16094 TEST(clrex) { 16095 // This data should never be written. 16096 uint64_t data[] = {0, 0, 0}; 16097 uint64_t* data_aligned = AlignUp(data, kXRegSizeInBytes * 2); 16098 16099 SETUP(); 16100 START(); 16101 16102 __ Mov(x10, reinterpret_cast<uintptr_t>(data_aligned)); 16103 __ Mov(w6, 0); 16104 16105 __ Ldxrb(w0, MemOperand(x10)); 16106 __ Clrex(); 16107 __ Add(w0, w0, 1); 16108 __ Stxrb(w5, w0, MemOperand(x10)); 16109 __ Add(w6, w6, w5); 16110 16111 __ Ldxrh(w0, MemOperand(x10)); 16112 __ Clrex(); 16113 __ Add(w0, w0, 1); 16114 __ Stxrh(w5, w0, MemOperand(x10)); 16115 __ Add(w6, w6, w5); 16116 16117 __ Ldxr(w0, MemOperand(x10)); 16118 __ Clrex(); 16119 __ Add(w0, w0, 1); 16120 __ Stxr(w5, w0, MemOperand(x10)); 16121 __ Add(w6, w6, w5); 16122 16123 __ Ldxr(x0, MemOperand(x10)); 16124 __ Clrex(); 16125 __ Add(x0, x0, 1); 16126 __ Stxr(w5, x0, MemOperand(x10)); 16127 __ Add(w6, w6, w5); 16128 16129 __ Ldxp(w0, w1, MemOperand(x10)); 16130 __ Clrex(); 16131 __ Add(w0, w0, 1); 16132 __ Add(w1, w1, 1); 16133 __ Stxp(w5, w0, w1, MemOperand(x10)); 16134 __ Add(w6, w6, w5); 16135 16136 __ Ldxp(x0, x1, MemOperand(x10)); 16137 __ Clrex(); 16138 __ Add(x0, x0, 1); 16139 __ Add(x1, x1, 1); 16140 __ Stxp(w5, x0, x1, MemOperand(x10)); 16141 __ Add(w6, w6, w5); 16142 16143 // Acquire-release variants. 16144 16145 __ Ldaxrb(w0, MemOperand(x10)); 16146 __ Clrex(); 16147 __ Add(w0, w0, 1); 16148 __ Stlxrb(w5, w0, MemOperand(x10)); 16149 __ Add(w6, w6, w5); 16150 16151 __ Ldaxrh(w0, MemOperand(x10)); 16152 __ Clrex(); 16153 __ Add(w0, w0, 1); 16154 __ Stlxrh(w5, w0, MemOperand(x10)); 16155 __ Add(w6, w6, w5); 16156 16157 __ Ldaxr(w0, MemOperand(x10)); 16158 __ Clrex(); 16159 __ Add(w0, w0, 1); 16160 __ Stlxr(w5, w0, MemOperand(x10)); 16161 __ Add(w6, w6, w5); 16162 16163 __ Ldaxr(x0, MemOperand(x10)); 16164 __ Clrex(); 16165 __ Add(x0, x0, 1); 16166 __ Stlxr(w5, x0, MemOperand(x10)); 16167 __ Add(w6, w6, w5); 16168 16169 __ Ldaxp(w0, w1, MemOperand(x10)); 16170 __ Clrex(); 16171 __ Add(w0, w0, 1); 16172 __ Add(w1, w1, 1); 16173 __ Stlxp(w5, w0, w1, MemOperand(x10)); 16174 __ Add(w6, w6, w5); 16175 16176 __ Ldaxp(x0, x1, MemOperand(x10)); 16177 __ Clrex(); 16178 __ Add(x0, x0, 1); 16179 __ Add(x1, x1, 1); 16180 __ Stlxp(w5, x0, x1, MemOperand(x10)); 16181 __ Add(w6, w6, w5); 16182 16183 END(); 16184 RUN(); 16185 16186 // None of the 12 store-exclusives should have succeeded. 16187 ASSERT_EQUAL_32(12, w6); 16188 16189 ASSERT_EQUAL_64(0, data[0]); 16190 ASSERT_EQUAL_64(0, data[1]); 16191 ASSERT_EQUAL_64(0, data[2]); 16192 16193 TEARDOWN(); 16194 } 16195 16196 16197 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 16198 // Check that the simulator occasionally makes store-exclusive fail. 16199 TEST(ldxr_stxr_fail) { 16200 uint64_t data[] = {0, 0, 0}; 16201 uint64_t* data_aligned = AlignUp(data, kXRegSizeInBytes * 2); 16202 16203 // Impose a hard limit on the number of attempts, so the test cannot hang. 16204 static const uint64_t kWatchdog = 10000; 16205 Label done; 16206 16207 SETUP(); 16208 START(); 16209 16210 __ Mov(x10, reinterpret_cast<uintptr_t>(data_aligned)); 16211 __ Mov(x11, kWatchdog); 16212 16213 // This loop is the opposite of what we normally do with ldxr and stxr; we 16214 // keep trying until we fail (or the watchdog counter runs out). 16215 Label try_b; 16216 __ Bind(&try_b); 16217 __ Ldxrb(w0, MemOperand(x10)); 16218 __ Stxrb(w5, w0, MemOperand(x10)); 16219 // Check the watchdog counter. 16220 __ Sub(x11, x11, 1); 16221 __ Cbz(x11, &done); 16222 // Check the exclusive-store result. 16223 __ Cbz(w5, &try_b); 16224 16225 Label try_h; 16226 __ Bind(&try_h); 16227 __ Ldxrh(w0, MemOperand(x10)); 16228 __ Stxrh(w5, w0, MemOperand(x10)); 16229 __ Sub(x11, x11, 1); 16230 __ Cbz(x11, &done); 16231 __ Cbz(w5, &try_h); 16232 16233 Label try_w; 16234 __ Bind(&try_w); 16235 __ Ldxr(w0, MemOperand(x10)); 16236 __ Stxr(w5, w0, MemOperand(x10)); 16237 __ Sub(x11, x11, 1); 16238 __ Cbz(x11, &done); 16239 __ Cbz(w5, &try_w); 16240 16241 Label try_x; 16242 __ Bind(&try_x); 16243 __ Ldxr(x0, MemOperand(x10)); 16244 __ Stxr(w5, x0, MemOperand(x10)); 16245 __ Sub(x11, x11, 1); 16246 __ Cbz(x11, &done); 16247 __ Cbz(w5, &try_x); 16248 16249 Label try_wp; 16250 __ Bind(&try_wp); 16251 __ Ldxp(w0, w1, MemOperand(x10)); 16252 __ Stxp(w5, w0, w1, MemOperand(x10)); 16253 __ Sub(x11, x11, 1); 16254 __ Cbz(x11, &done); 16255 __ Cbz(w5, &try_wp); 16256 16257 Label try_xp; 16258 __ Bind(&try_xp); 16259 __ Ldxp(x0, x1, MemOperand(x10)); 16260 __ Stxp(w5, x0, x1, MemOperand(x10)); 16261 __ Sub(x11, x11, 1); 16262 __ Cbz(x11, &done); 16263 __ Cbz(w5, &try_xp); 16264 16265 __ Bind(&done); 16266 // Trigger an error if x11 (watchdog) is zero. 16267 __ Cmp(x11, 0); 16268 __ Cset(x12, eq); 16269 16270 END(); 16271 RUN(); 16272 16273 // Check that the watchdog counter didn't run out. 16274 ASSERT_EQUAL_64(0, x12); 16275 16276 TEARDOWN(); 16277 } 16278 #endif 16279 16280 16281 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 16282 // Check that the simulator occasionally makes store-exclusive fail. 16283 TEST(ldaxr_stlxr_fail) { 16284 uint64_t data[] = {0, 0, 0}; 16285 uint64_t* data_aligned = AlignUp(data, kXRegSizeInBytes * 2); 16286 16287 // Impose a hard limit on the number of attempts, so the test cannot hang. 16288 static const uint64_t kWatchdog = 10000; 16289 Label done; 16290 16291 SETUP(); 16292 START(); 16293 16294 __ Mov(x10, reinterpret_cast<uintptr_t>(data_aligned)); 16295 __ Mov(x11, kWatchdog); 16296 16297 // This loop is the opposite of what we normally do with ldxr and stxr; we 16298 // keep trying until we fail (or the watchdog counter runs out). 16299 Label try_b; 16300 __ Bind(&try_b); 16301 __ Ldxrb(w0, MemOperand(x10)); 16302 __ Stxrb(w5, w0, MemOperand(x10)); 16303 // Check the watchdog counter. 16304 __ Sub(x11, x11, 1); 16305 __ Cbz(x11, &done); 16306 // Check the exclusive-store result. 16307 __ Cbz(w5, &try_b); 16308 16309 Label try_h; 16310 __ Bind(&try_h); 16311 __ Ldaxrh(w0, MemOperand(x10)); 16312 __ Stlxrh(w5, w0, MemOperand(x10)); 16313 __ Sub(x11, x11, 1); 16314 __ Cbz(x11, &done); 16315 __ Cbz(w5, &try_h); 16316 16317 Label try_w; 16318 __ Bind(&try_w); 16319 __ Ldaxr(w0, MemOperand(x10)); 16320 __ Stlxr(w5, w0, MemOperand(x10)); 16321 __ Sub(x11, x11, 1); 16322 __ Cbz(x11, &done); 16323 __ Cbz(w5, &try_w); 16324 16325 Label try_x; 16326 __ Bind(&try_x); 16327 __ Ldaxr(x0, MemOperand(x10)); 16328 __ Stlxr(w5, x0, MemOperand(x10)); 16329 __ Sub(x11, x11, 1); 16330 __ Cbz(x11, &done); 16331 __ Cbz(w5, &try_x); 16332 16333 Label try_wp; 16334 __ Bind(&try_wp); 16335 __ Ldaxp(w0, w1, MemOperand(x10)); 16336 __ Stlxp(w5, w0, w1, MemOperand(x10)); 16337 __ Sub(x11, x11, 1); 16338 __ Cbz(x11, &done); 16339 __ Cbz(w5, &try_wp); 16340 16341 Label try_xp; 16342 __ Bind(&try_xp); 16343 __ Ldaxp(x0, x1, MemOperand(x10)); 16344 __ Stlxp(w5, x0, x1, MemOperand(x10)); 16345 __ Sub(x11, x11, 1); 16346 __ Cbz(x11, &done); 16347 __ Cbz(w5, &try_xp); 16348 16349 __ Bind(&done); 16350 // Trigger an error if x11 (watchdog) is zero. 16351 __ Cmp(x11, 0); 16352 __ Cset(x12, eq); 16353 16354 END(); 16355 RUN(); 16356 16357 // Check that the watchdog counter didn't run out. 16358 ASSERT_EQUAL_64(0, x12); 16359 16360 TEARDOWN(); 16361 } 16362 #endif 16363 16364 16365 TEST(load_store_tagged_immediate_offset) { 16366 uint64_t tags[] = {0x00, 0x1, 0x55, 0xff}; 16367 int tag_count = sizeof(tags) / sizeof(tags[0]); 16368 16369 const int kMaxDataLength = 160; 16370 16371 for (int i = 0; i < tag_count; i++) { 16372 unsigned char src[kMaxDataLength]; 16373 uint64_t src_raw = reinterpret_cast<uint64_t>(src); 16374 uint64_t src_tag = tags[i]; 16375 uint64_t src_tagged = CPU::SetPointerTag(src_raw, src_tag); 16376 16377 for (int k = 0; k < kMaxDataLength; k++) { 16378 src[k] = k + 1; 16379 } 16380 16381 for (int j = 0; j < tag_count; j++) { 16382 unsigned char dst[kMaxDataLength]; 16383 uint64_t dst_raw = reinterpret_cast<uint64_t>(dst); 16384 uint64_t dst_tag = tags[j]; 16385 uint64_t dst_tagged = CPU::SetPointerTag(dst_raw, dst_tag); 16386 16387 memset(dst, 0, kMaxDataLength); 16388 16389 SETUP(); 16390 START(); 16391 16392 __ Mov(x0, src_tagged); 16393 __ Mov(x1, dst_tagged); 16394 16395 int offset = 0; 16396 16397 // Scaled-immediate offsets. 16398 { 16399 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16400 __ ldp(q0, q1, MemOperand(x0, offset)); 16401 __ stp(q0, q1, MemOperand(x1, offset)); 16402 } 16403 offset += 2 * kQRegSizeInBytes; 16404 16405 { 16406 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16407 __ ldp(x2, x3, MemOperand(x0, offset)); 16408 __ stp(x2, x3, MemOperand(x1, offset)); 16409 } 16410 offset += 2 * kXRegSizeInBytes; 16411 16412 { 16413 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16414 __ ldpsw(x2, x3, MemOperand(x0, offset)); 16415 __ stp(w2, w3, MemOperand(x1, offset)); 16416 } 16417 offset += 2 * kWRegSizeInBytes; 16418 16419 { 16420 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16421 __ ldp(d0, d1, MemOperand(x0, offset)); 16422 __ stp(d0, d1, MemOperand(x1, offset)); 16423 } 16424 offset += 2 * kDRegSizeInBytes; 16425 16426 { 16427 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16428 __ ldp(w2, w3, MemOperand(x0, offset)); 16429 __ stp(w2, w3, MemOperand(x1, offset)); 16430 } 16431 offset += 2 * kWRegSizeInBytes; 16432 16433 { 16434 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16435 __ ldp(s0, s1, MemOperand(x0, offset)); 16436 __ stp(s0, s1, MemOperand(x1, offset)); 16437 } 16438 offset += 2 * kSRegSizeInBytes; 16439 16440 { 16441 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16442 __ ldr(x2, MemOperand(x0, offset), RequireScaledOffset); 16443 __ str(x2, MemOperand(x1, offset), RequireScaledOffset); 16444 } 16445 offset += kXRegSizeInBytes; 16446 16447 { 16448 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16449 __ ldr(d0, MemOperand(x0, offset), RequireScaledOffset); 16450 __ str(d0, MemOperand(x1, offset), RequireScaledOffset); 16451 } 16452 offset += kDRegSizeInBytes; 16453 16454 { 16455 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16456 __ ldr(w2, MemOperand(x0, offset), RequireScaledOffset); 16457 __ str(w2, MemOperand(x1, offset), RequireScaledOffset); 16458 } 16459 offset += kWRegSizeInBytes; 16460 16461 { 16462 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16463 __ ldr(s0, MemOperand(x0, offset), RequireScaledOffset); 16464 __ str(s0, MemOperand(x1, offset), RequireScaledOffset); 16465 } 16466 offset += kSRegSizeInBytes; 16467 16468 { 16469 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16470 __ ldrh(w2, MemOperand(x0, offset), RequireScaledOffset); 16471 __ strh(w2, MemOperand(x1, offset), RequireScaledOffset); 16472 } 16473 offset += 2; 16474 16475 { 16476 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16477 __ ldrsh(w2, MemOperand(x0, offset), RequireScaledOffset); 16478 __ strh(w2, MemOperand(x1, offset), RequireScaledOffset); 16479 } 16480 offset += 2; 16481 16482 { 16483 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16484 __ ldrb(w2, MemOperand(x0, offset), RequireScaledOffset); 16485 __ strb(w2, MemOperand(x1, offset), RequireScaledOffset); 16486 } 16487 offset += 1; 16488 16489 { 16490 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16491 __ ldrsb(w2, MemOperand(x0, offset), RequireScaledOffset); 16492 __ strb(w2, MemOperand(x1, offset), RequireScaledOffset); 16493 } 16494 offset += 1; 16495 16496 // Unscaled-immediate offsets. 16497 16498 { 16499 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16500 __ ldur(x2, MemOperand(x0, offset), RequireUnscaledOffset); 16501 __ stur(x2, MemOperand(x1, offset), RequireUnscaledOffset); 16502 } 16503 offset += kXRegSizeInBytes; 16504 16505 { 16506 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16507 __ ldur(d0, MemOperand(x0, offset), RequireUnscaledOffset); 16508 __ stur(d0, MemOperand(x1, offset), RequireUnscaledOffset); 16509 } 16510 offset += kDRegSizeInBytes; 16511 16512 { 16513 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16514 __ ldur(w2, MemOperand(x0, offset), RequireUnscaledOffset); 16515 __ stur(w2, MemOperand(x1, offset), RequireUnscaledOffset); 16516 } 16517 offset += kWRegSizeInBytes; 16518 16519 { 16520 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16521 __ ldur(s0, MemOperand(x0, offset), RequireUnscaledOffset); 16522 __ stur(s0, MemOperand(x1, offset), RequireUnscaledOffset); 16523 } 16524 offset += kSRegSizeInBytes; 16525 16526 { 16527 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16528 __ ldurh(w2, MemOperand(x0, offset), RequireUnscaledOffset); 16529 __ sturh(w2, MemOperand(x1, offset), RequireUnscaledOffset); 16530 } 16531 offset += 2; 16532 16533 { 16534 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16535 __ ldursh(w2, MemOperand(x0, offset), RequireUnscaledOffset); 16536 __ sturh(w2, MemOperand(x1, offset), RequireUnscaledOffset); 16537 } 16538 offset += 2; 16539 16540 { 16541 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16542 __ ldurb(w2, MemOperand(x0, offset), RequireUnscaledOffset); 16543 __ sturb(w2, MemOperand(x1, offset), RequireUnscaledOffset); 16544 } 16545 offset += 1; 16546 16547 { 16548 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16549 __ ldursb(w2, MemOperand(x0, offset), RequireUnscaledOffset); 16550 __ sturb(w2, MemOperand(x1, offset), RequireUnscaledOffset); 16551 } 16552 offset += 1; 16553 16554 // Extract the tag (so we can test that it was preserved correctly). 16555 __ Ubfx(x0, x0, kAddressTagOffset, kAddressTagWidth); 16556 __ Ubfx(x1, x1, kAddressTagOffset, kAddressTagWidth); 16557 16558 VIXL_ASSERT(kMaxDataLength >= offset); 16559 16560 END(); 16561 RUN(); 16562 16563 ASSERT_EQUAL_64(src_tag, x0); 16564 ASSERT_EQUAL_64(dst_tag, x1); 16565 16566 for (int k = 0; k < offset; k++) { 16567 VIXL_CHECK(src[k] == dst[k]); 16568 } 16569 16570 TEARDOWN(); 16571 } 16572 } 16573 } 16574 16575 16576 TEST(load_store_tagged_immediate_preindex) { 16577 uint64_t tags[] = {0x00, 0x1, 0x55, 0xff}; 16578 int tag_count = sizeof(tags) / sizeof(tags[0]); 16579 16580 const int kMaxDataLength = 128; 16581 16582 for (int i = 0; i < tag_count; i++) { 16583 unsigned char src[kMaxDataLength]; 16584 uint64_t src_raw = reinterpret_cast<uint64_t>(src); 16585 uint64_t src_tag = tags[i]; 16586 uint64_t src_tagged = CPU::SetPointerTag(src_raw, src_tag); 16587 16588 for (int k = 0; k < kMaxDataLength; k++) { 16589 src[k] = k + 1; 16590 } 16591 16592 for (int j = 0; j < tag_count; j++) { 16593 unsigned char dst[kMaxDataLength]; 16594 uint64_t dst_raw = reinterpret_cast<uint64_t>(dst); 16595 uint64_t dst_tag = tags[j]; 16596 uint64_t dst_tagged = CPU::SetPointerTag(dst_raw, dst_tag); 16597 16598 for (int k = 0; k < kMaxDataLength; k++) { 16599 dst[k] = 0; 16600 } 16601 16602 SETUP(); 16603 START(); 16604 16605 // Each MemOperand must apply a pre-index equal to the size of the 16606 // previous access. 16607 16608 // Start with a non-zero preindex. 16609 int preindex = 62 * kXRegSizeInBytes; 16610 int data_length = 0; 16611 16612 __ Mov(x0, src_tagged - preindex); 16613 __ Mov(x1, dst_tagged - preindex); 16614 16615 { 16616 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16617 __ ldp(q0, q1, MemOperand(x0, preindex, PreIndex)); 16618 __ stp(q0, q1, MemOperand(x1, preindex, PreIndex)); 16619 } 16620 preindex = 2 * kQRegSizeInBytes; 16621 data_length = preindex; 16622 16623 { 16624 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16625 __ ldp(x2, x3, MemOperand(x0, preindex, PreIndex)); 16626 __ stp(x2, x3, MemOperand(x1, preindex, PreIndex)); 16627 } 16628 preindex = 2 * kXRegSizeInBytes; 16629 data_length += preindex; 16630 16631 { 16632 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16633 __ ldpsw(x2, x3, MemOperand(x0, preindex, PreIndex)); 16634 __ stp(w2, w3, MemOperand(x1, preindex, PreIndex)); 16635 } 16636 preindex = 2 * kWRegSizeInBytes; 16637 data_length += preindex; 16638 16639 { 16640 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16641 __ ldp(d0, d1, MemOperand(x0, preindex, PreIndex)); 16642 __ stp(d0, d1, MemOperand(x1, preindex, PreIndex)); 16643 } 16644 preindex = 2 * kDRegSizeInBytes; 16645 data_length += preindex; 16646 16647 { 16648 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16649 __ ldp(w2, w3, MemOperand(x0, preindex, PreIndex)); 16650 __ stp(w2, w3, MemOperand(x1, preindex, PreIndex)); 16651 } 16652 preindex = 2 * kWRegSizeInBytes; 16653 data_length += preindex; 16654 16655 { 16656 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16657 __ ldp(s0, s1, MemOperand(x0, preindex, PreIndex)); 16658 __ stp(s0, s1, MemOperand(x1, preindex, PreIndex)); 16659 } 16660 preindex = 2 * kSRegSizeInBytes; 16661 data_length += preindex; 16662 16663 { 16664 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16665 __ ldr(x2, MemOperand(x0, preindex, PreIndex)); 16666 __ str(x2, MemOperand(x1, preindex, PreIndex)); 16667 } 16668 preindex = kXRegSizeInBytes; 16669 data_length += preindex; 16670 16671 { 16672 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16673 __ ldr(d0, MemOperand(x0, preindex, PreIndex)); 16674 __ str(d0, MemOperand(x1, preindex, PreIndex)); 16675 } 16676 preindex = kDRegSizeInBytes; 16677 data_length += preindex; 16678 16679 { 16680 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16681 __ ldr(w2, MemOperand(x0, preindex, PreIndex)); 16682 __ str(w2, MemOperand(x1, preindex, PreIndex)); 16683 } 16684 preindex = kWRegSizeInBytes; 16685 data_length += preindex; 16686 16687 { 16688 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16689 __ ldr(s0, MemOperand(x0, preindex, PreIndex)); 16690 __ str(s0, MemOperand(x1, preindex, PreIndex)); 16691 } 16692 preindex = kSRegSizeInBytes; 16693 data_length += preindex; 16694 16695 { 16696 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16697 __ ldrh(w2, MemOperand(x0, preindex, PreIndex)); 16698 __ strh(w2, MemOperand(x1, preindex, PreIndex)); 16699 } 16700 preindex = 2; 16701 data_length += preindex; 16702 16703 { 16704 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16705 __ ldrsh(w2, MemOperand(x0, preindex, PreIndex)); 16706 __ strh(w2, MemOperand(x1, preindex, PreIndex)); 16707 } 16708 preindex = 2; 16709 data_length += preindex; 16710 16711 { 16712 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16713 __ ldrb(w2, MemOperand(x0, preindex, PreIndex)); 16714 __ strb(w2, MemOperand(x1, preindex, PreIndex)); 16715 } 16716 preindex = 1; 16717 data_length += preindex; 16718 16719 { 16720 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16721 __ ldrsb(w2, MemOperand(x0, preindex, PreIndex)); 16722 __ strb(w2, MemOperand(x1, preindex, PreIndex)); 16723 } 16724 preindex = 1; 16725 data_length += preindex; 16726 16727 VIXL_ASSERT(kMaxDataLength >= data_length); 16728 16729 END(); 16730 RUN(); 16731 16732 // Check that the preindex was correctly applied in each operation, and 16733 // that the tag was preserved. 16734 ASSERT_EQUAL_64(src_tagged + data_length - preindex, x0); 16735 ASSERT_EQUAL_64(dst_tagged + data_length - preindex, x1); 16736 16737 for (int k = 0; k < data_length; k++) { 16738 VIXL_CHECK(src[k] == dst[k]); 16739 } 16740 16741 TEARDOWN(); 16742 } 16743 } 16744 } 16745 16746 16747 TEST(load_store_tagged_immediate_postindex) { 16748 uint64_t tags[] = {0x00, 0x1, 0x55, 0xff}; 16749 int tag_count = sizeof(tags) / sizeof(tags[0]); 16750 16751 const int kMaxDataLength = 128; 16752 16753 for (int i = 0; i < tag_count; i++) { 16754 unsigned char src[kMaxDataLength]; 16755 uint64_t src_raw = reinterpret_cast<uint64_t>(src); 16756 uint64_t src_tag = tags[i]; 16757 uint64_t src_tagged = CPU::SetPointerTag(src_raw, src_tag); 16758 16759 for (int k = 0; k < kMaxDataLength; k++) { 16760 src[k] = k + 1; 16761 } 16762 16763 for (int j = 0; j < tag_count; j++) { 16764 unsigned char dst[kMaxDataLength]; 16765 uint64_t dst_raw = reinterpret_cast<uint64_t>(dst); 16766 uint64_t dst_tag = tags[j]; 16767 uint64_t dst_tagged = CPU::SetPointerTag(dst_raw, dst_tag); 16768 16769 for (int k = 0; k < kMaxDataLength; k++) { 16770 dst[k] = 0; 16771 } 16772 16773 SETUP(); 16774 START(); 16775 16776 int postindex = 2 * kXRegSizeInBytes; 16777 int data_length = 0; 16778 16779 __ Mov(x0, src_tagged); 16780 __ Mov(x1, dst_tagged); 16781 16782 { 16783 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16784 __ ldp(x2, x3, MemOperand(x0, postindex, PostIndex)); 16785 __ stp(x2, x3, MemOperand(x1, postindex, PostIndex)); 16786 } 16787 data_length = postindex; 16788 16789 postindex = 2 * kQRegSizeInBytes; 16790 { 16791 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16792 __ ldp(q0, q1, MemOperand(x0, postindex, PostIndex)); 16793 __ stp(q0, q1, MemOperand(x1, postindex, PostIndex)); 16794 } 16795 data_length += postindex; 16796 16797 postindex = 2 * kWRegSizeInBytes; 16798 { 16799 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16800 __ ldpsw(x2, x3, MemOperand(x0, postindex, PostIndex)); 16801 __ stp(w2, w3, MemOperand(x1, postindex, PostIndex)); 16802 } 16803 data_length += postindex; 16804 16805 postindex = 2 * kDRegSizeInBytes; 16806 { 16807 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16808 __ ldp(d0, d1, MemOperand(x0, postindex, PostIndex)); 16809 __ stp(d0, d1, MemOperand(x1, postindex, PostIndex)); 16810 } 16811 data_length += postindex; 16812 16813 postindex = 2 * kWRegSizeInBytes; 16814 { 16815 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16816 __ ldp(w2, w3, MemOperand(x0, postindex, PostIndex)); 16817 __ stp(w2, w3, MemOperand(x1, postindex, PostIndex)); 16818 } 16819 data_length += postindex; 16820 16821 postindex = 2 * kSRegSizeInBytes; 16822 { 16823 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16824 __ ldp(s0, s1, MemOperand(x0, postindex, PostIndex)); 16825 __ stp(s0, s1, MemOperand(x1, postindex, PostIndex)); 16826 } 16827 data_length += postindex; 16828 16829 postindex = kXRegSizeInBytes; 16830 { 16831 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16832 __ ldr(x2, MemOperand(x0, postindex, PostIndex)); 16833 __ str(x2, MemOperand(x1, postindex, PostIndex)); 16834 } 16835 data_length += postindex; 16836 16837 postindex = kDRegSizeInBytes; 16838 { 16839 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16840 __ ldr(d0, MemOperand(x0, postindex, PostIndex)); 16841 __ str(d0, MemOperand(x1, postindex, PostIndex)); 16842 } 16843 data_length += postindex; 16844 16845 postindex = kWRegSizeInBytes; 16846 { 16847 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16848 __ ldr(w2, MemOperand(x0, postindex, PostIndex)); 16849 __ str(w2, MemOperand(x1, postindex, PostIndex)); 16850 } 16851 data_length += postindex; 16852 16853 postindex = kSRegSizeInBytes; 16854 { 16855 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16856 __ ldr(s0, MemOperand(x0, postindex, PostIndex)); 16857 __ str(s0, MemOperand(x1, postindex, PostIndex)); 16858 } 16859 data_length += postindex; 16860 16861 postindex = 2; 16862 { 16863 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16864 __ ldrh(w2, MemOperand(x0, postindex, PostIndex)); 16865 __ strh(w2, MemOperand(x1, postindex, PostIndex)); 16866 } 16867 data_length += postindex; 16868 16869 postindex = 2; 16870 { 16871 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16872 __ ldrsh(w2, MemOperand(x0, postindex, PostIndex)); 16873 __ strh(w2, MemOperand(x1, postindex, PostIndex)); 16874 } 16875 data_length += postindex; 16876 16877 postindex = 1; 16878 { 16879 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16880 __ ldrb(w2, MemOperand(x0, postindex, PostIndex)); 16881 __ strb(w2, MemOperand(x1, postindex, PostIndex)); 16882 } 16883 data_length += postindex; 16884 16885 postindex = 1; 16886 { 16887 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16888 __ ldrsb(w2, MemOperand(x0, postindex, PostIndex)); 16889 __ strb(w2, MemOperand(x1, postindex, PostIndex)); 16890 } 16891 data_length += postindex; 16892 16893 VIXL_ASSERT(kMaxDataLength >= data_length); 16894 16895 END(); 16896 RUN(); 16897 16898 // Check that the postindex was correctly applied in each operation, and 16899 // that the tag was preserved. 16900 ASSERT_EQUAL_64(src_tagged + data_length, x0); 16901 ASSERT_EQUAL_64(dst_tagged + data_length, x1); 16902 16903 for (int k = 0; k < data_length; k++) { 16904 VIXL_CHECK(src[k] == dst[k]); 16905 } 16906 16907 TEARDOWN(); 16908 } 16909 } 16910 } 16911 16912 16913 TEST(load_store_tagged_register_offset) { 16914 uint64_t tags[] = {0x00, 0x1, 0x55, 0xff}; 16915 int tag_count = sizeof(tags) / sizeof(tags[0]); 16916 16917 const int kMaxDataLength = 128; 16918 16919 for (int i = 0; i < tag_count; i++) { 16920 unsigned char src[kMaxDataLength]; 16921 uint64_t src_raw = reinterpret_cast<uint64_t>(src); 16922 uint64_t src_tag = tags[i]; 16923 uint64_t src_tagged = CPU::SetPointerTag(src_raw, src_tag); 16924 16925 for (int k = 0; k < kMaxDataLength; k++) { 16926 src[k] = k + 1; 16927 } 16928 16929 for (int j = 0; j < tag_count; j++) { 16930 unsigned char dst[kMaxDataLength]; 16931 uint64_t dst_raw = reinterpret_cast<uint64_t>(dst); 16932 uint64_t dst_tag = tags[j]; 16933 uint64_t dst_tagged = CPU::SetPointerTag(dst_raw, dst_tag); 16934 16935 // Also tag the offset register; the operation should still succeed. 16936 for (int o = 0; o < tag_count; o++) { 16937 uint64_t offset_base = CPU::SetPointerTag(UINT64_C(0), tags[o]); 16938 int data_length = 0; 16939 16940 for (int k = 0; k < kMaxDataLength; k++) { 16941 dst[k] = 0; 16942 } 16943 16944 SETUP(); 16945 START(); 16946 16947 __ Mov(x0, src_tagged); 16948 __ Mov(x1, dst_tagged); 16949 16950 __ Mov(x10, offset_base + data_length); 16951 { 16952 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16953 __ ldr(x2, MemOperand(x0, x10)); 16954 __ str(x2, MemOperand(x1, x10)); 16955 } 16956 data_length += kXRegSizeInBytes; 16957 16958 __ Mov(x10, offset_base + data_length); 16959 { 16960 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16961 __ ldr(d0, MemOperand(x0, x10)); 16962 __ str(d0, MemOperand(x1, x10)); 16963 } 16964 data_length += kDRegSizeInBytes; 16965 16966 __ Mov(x10, offset_base + data_length); 16967 { 16968 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16969 __ ldr(w2, MemOperand(x0, x10)); 16970 __ str(w2, MemOperand(x1, x10)); 16971 } 16972 data_length += kWRegSizeInBytes; 16973 16974 __ Mov(x10, offset_base + data_length); 16975 { 16976 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16977 __ ldr(s0, MemOperand(x0, x10)); 16978 __ str(s0, MemOperand(x1, x10)); 16979 } 16980 data_length += kSRegSizeInBytes; 16981 16982 __ Mov(x10, offset_base + data_length); 16983 { 16984 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16985 __ ldrh(w2, MemOperand(x0, x10)); 16986 __ strh(w2, MemOperand(x1, x10)); 16987 } 16988 data_length += 2; 16989 16990 __ Mov(x10, offset_base + data_length); 16991 { 16992 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16993 __ ldrsh(w2, MemOperand(x0, x10)); 16994 __ strh(w2, MemOperand(x1, x10)); 16995 } 16996 data_length += 2; 16997 16998 __ Mov(x10, offset_base + data_length); 16999 { 17000 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 17001 __ ldrb(w2, MemOperand(x0, x10)); 17002 __ strb(w2, MemOperand(x1, x10)); 17003 } 17004 data_length += 1; 17005 17006 __ Mov(x10, offset_base + data_length); 17007 { 17008 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 17009 __ ldrsb(w2, MemOperand(x0, x10)); 17010 __ strb(w2, MemOperand(x1, x10)); 17011 } 17012 data_length += 1; 17013 17014 VIXL_ASSERT(kMaxDataLength >= data_length); 17015 17016 END(); 17017 RUN(); 17018 17019 // Check that the postindex was correctly applied in each operation, and 17020 // that the tag was preserved. 17021 ASSERT_EQUAL_64(src_tagged, x0); 17022 ASSERT_EQUAL_64(dst_tagged, x1); 17023 ASSERT_EQUAL_64(offset_base + data_length - 1, x10); 17024 17025 for (int k = 0; k < data_length; k++) { 17026 VIXL_CHECK(src[k] == dst[k]); 17027 } 17028 17029 TEARDOWN(); 17030 } 17031 } 17032 } 17033 } 17034 17035 17036 TEST(load_store_tagged_register_postindex) { 17037 uint64_t src[] = {0x0706050403020100, 0x0f0e0d0c0b0a0908}; 17038 uint64_t tags[] = {0x00, 0x1, 0x55, 0xff}; 17039 int tag_count = sizeof(tags) / sizeof(tags[0]); 17040 17041 for (int j = 0; j < tag_count; j++) { 17042 for (int i = 0; i < tag_count; i++) { 17043 SETUP(); 17044 uint64_t src_base = reinterpret_cast<uint64_t>(src); 17045 uint64_t src_tagged = CPU::SetPointerTag(src_base, tags[i]); 17046 uint64_t offset_tagged = CPU::SetPointerTag(UINT64_C(0), tags[j]); 17047 17048 START(); 17049 __ Mov(x10, src_tagged); 17050 __ Mov(x11, offset_tagged); 17051 __ Ld1(v0.V16B(), MemOperand(x10, x11, PostIndex)); 17052 // TODO: add other instructions (ld2-4, st1-4) as they become available. 17053 END(); 17054 17055 RUN(); 17056 17057 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q0); 17058 ASSERT_EQUAL_64(src_tagged + offset_tagged, x10); 17059 17060 TEARDOWN(); 17061 } 17062 } 17063 } 17064 17065 17066 TEST(branch_tagged) { 17067 SETUP(); 17068 START(); 17069 17070 Label loop, loop_entry, done; 17071 __ Adr(x0, &loop); 17072 __ Mov(x1, 0); 17073 __ B(&loop_entry); 17074 17075 __ Bind(&loop); 17076 __ Add(x1, x1, 1); // Count successful jumps. 17077 17078 // Advance to the next tag, then bail out if we've come back around to tag 0. 17079 __ Add(x0, x0, UINT64_C(1) << kAddressTagOffset); 17080 __ Tst(x0, kAddressTagMask); 17081 __ B(eq, &done); 17082 17083 __ Bind(&loop_entry); 17084 __ Br(x0); 17085 17086 __ Bind(&done); 17087 17088 END(); 17089 RUN(); 17090 17091 ASSERT_EQUAL_64(1 << kAddressTagWidth, x1); 17092 17093 TEARDOWN(); 17094 } 17095 17096 17097 TEST(branch_and_link_tagged) { 17098 SETUP(); 17099 START(); 17100 17101 Label loop, loop_entry, done; 17102 __ Adr(x0, &loop); 17103 __ Mov(x1, 0); 17104 __ B(&loop_entry); 17105 17106 __ Bind(&loop); 17107 17108 // Bail out (before counting a successful jump) if lr appears to be tagged. 17109 __ Tst(lr, kAddressTagMask); 17110 __ B(ne, &done); 17111 17112 __ Add(x1, x1, 1); // Count successful jumps. 17113 17114 // Advance to the next tag, then bail out if we've come back around to tag 0. 17115 __ Add(x0, x0, UINT64_C(1) << kAddressTagOffset); 17116 __ Tst(x0, kAddressTagMask); 17117 __ B(eq, &done); 17118 17119 __ Bind(&loop_entry); 17120 __ Blr(x0); 17121 17122 __ Bind(&done); 17123 17124 END(); 17125 RUN(); 17126 17127 ASSERT_EQUAL_64(1 << kAddressTagWidth, x1); 17128 17129 TEARDOWN(); 17130 } 17131 17132 17133 TEST(branch_tagged_and_adr_adrp) { 17134 SETUP_CUSTOM(kPageSize, PageOffsetDependentCode); 17135 START(); 17136 17137 Label loop, loop_entry, done; 17138 __ Adr(x0, &loop); 17139 __ Mov(x1, 0); 17140 __ B(&loop_entry); 17141 17142 __ Bind(&loop); 17143 17144 // Bail out (before counting a successful jump) if `adr x10, ...` is tagged. 17145 __ Adr(x10, &done); 17146 __ Tst(x10, kAddressTagMask); 17147 __ B(ne, &done); 17148 17149 // Bail out (before counting a successful jump) if `adrp x11, ...` is tagged. 17150 __ Adrp(x11, &done); 17151 __ Tst(x11, kAddressTagMask); 17152 __ B(ne, &done); 17153 17154 __ Add(x1, x1, 1); // Count successful iterations. 17155 17156 // Advance to the next tag, then bail out if we've come back around to tag 0. 17157 __ Add(x0, x0, UINT64_C(1) << kAddressTagOffset); 17158 __ Tst(x0, kAddressTagMask); 17159 __ B(eq, &done); 17160 17161 __ Bind(&loop_entry); 17162 __ Br(x0); 17163 17164 __ Bind(&done); 17165 17166 END(); 17167 RUN_CUSTOM(); 17168 17169 ASSERT_EQUAL_64(1 << kAddressTagWidth, x1); 17170 17171 TEARDOWN_CUSTOM(); 17172 } 17173 17174 TEST(neon_3same_addp) { 17175 SETUP(); 17176 17177 START(); 17178 17179 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 17180 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff); 17181 __ Addp(v16.V16B(), v0.V16B(), v1.V16B()); 17182 17183 END(); 17184 17185 RUN(); 17186 ASSERT_EQUAL_128(0x00ff54ffff54aaff, 0xffffffffffffffff, q16); 17187 TEARDOWN(); 17188 } 17189 17190 TEST(neon_3same_sqdmulh_sqrdmulh) { 17191 SETUP(); 17192 17193 START(); 17194 17195 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000); 17196 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000); 17197 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000); 17198 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000); 17199 17200 __ Sqdmulh(v16.V4H(), v0.V4H(), v1.V4H()); 17201 __ Sqdmulh(v17.V4S(), v2.V4S(), v3.V4S()); 17202 __ Sqdmulh(h18, h0, h1); 17203 __ Sqdmulh(s19, s2, s3); 17204 17205 __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.V4H()); 17206 __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.V4S()); 17207 __ Sqrdmulh(h22, h0, h1); 17208 __ Sqrdmulh(s23, s2, s3); 17209 17210 END(); 17211 17212 RUN(); 17213 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100007fff, q16); 17214 ASSERT_EQUAL_128(0x000000017fffffff, 0x000000007fffffff, q17); 17215 ASSERT_EQUAL_128(0, 0x7fff, q18); 17216 ASSERT_EQUAL_128(0, 0x7fffffff, q19); 17217 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100017fff, q20); 17218 ASSERT_EQUAL_128(0x000000017fffffff, 0x000000017fffffff, q21); 17219 ASSERT_EQUAL_128(0, 0x7fff, q22); 17220 ASSERT_EQUAL_128(0, 0x7fffffff, q23); 17221 TEARDOWN(); 17222 } 17223 17224 TEST(neon_byelement_sqdmulh_sqrdmulh) { 17225 SETUP(); 17226 17227 START(); 17228 17229 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000); 17230 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000); 17231 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000); 17232 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000); 17233 17234 __ Sqdmulh(v16.V4H(), v0.V4H(), v1.H(), 1); 17235 __ Sqdmulh(v17.V4S(), v2.V4S(), v3.S(), 1); 17236 __ Sqdmulh(h18, h0, v1.H(), 0); 17237 __ Sqdmulh(s19, s2, v3.S(), 0); 17238 17239 __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.H(), 1); 17240 __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.S(), 1); 17241 __ Sqrdmulh(h22, h0, v1.H(), 0); 17242 __ Sqrdmulh(s23, s2, v3.S(), 0); 17243 17244 END(); 17245 17246 RUN(); 17247 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000fff0, q16); 17248 ASSERT_EQUAL_128(0x00000000fffffff0, 0x00000000fffffff0, q17); 17249 ASSERT_EQUAL_128(0, 0x7fff, q18); 17250 ASSERT_EQUAL_128(0, 0x7fffffff, q19); 17251 ASSERT_EQUAL_128(0x0000000000000000, 0x000000010001fff0, q20); 17252 ASSERT_EQUAL_128(0x00000001fffffff0, 0x00000001fffffff0, q21); 17253 ASSERT_EQUAL_128(0, 0x7fff, q22); 17254 ASSERT_EQUAL_128(0, 0x7fffffff, q23); 17255 TEARDOWN(); 17256 } 17257 17258 17259 TEST(neon_2regmisc_saddlp) { 17260 SETUP(); 17261 17262 START(); 17263 17264 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 17265 17266 __ Saddlp(v16.V8H(), v0.V16B()); 17267 __ Saddlp(v17.V4H(), v0.V8B()); 17268 17269 __ Saddlp(v18.V4S(), v0.V8H()); 17270 __ Saddlp(v19.V2S(), v0.V4H()); 17271 17272 __ Saddlp(v20.V2D(), v0.V4S()); 17273 __ Saddlp(v21.V1D(), v0.V2S()); 17274 17275 END(); 17276 17277 RUN(); 17278 ASSERT_EQUAL_128(0x0080ffffff010080, 0xff01ffff0080ff01, q16); 17279 ASSERT_EQUAL_128(0x0000000000000000, 0xff01ffff0080ff01, q17); 17280 ASSERT_EQUAL_128(0x0000800000000081, 0xffff7f81ffff8200, q18); 17281 ASSERT_EQUAL_128(0x0000000000000000, 0xffff7f81ffff8200, q19); 17282 ASSERT_EQUAL_128(0x0000000000818000, 0xffffffff82017f81, q20); 17283 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff82017f81, q21); 17284 TEARDOWN(); 17285 } 17286 17287 TEST(neon_2regmisc_uaddlp) { 17288 SETUP(); 17289 17290 START(); 17291 17292 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 17293 17294 __ Uaddlp(v16.V8H(), v0.V16B()); 17295 __ Uaddlp(v17.V4H(), v0.V8B()); 17296 17297 __ Uaddlp(v18.V4S(), v0.V8H()); 17298 __ Uaddlp(v19.V2S(), v0.V4H()); 17299 17300 __ Uaddlp(v20.V2D(), v0.V4S()); 17301 __ Uaddlp(v21.V1D(), v0.V2S()); 17302 17303 END(); 17304 17305 RUN(); 17306 ASSERT_EQUAL_128(0x008000ff01010080, 0x010100ff00800101, q16); 17307 ASSERT_EQUAL_128(0x0000000000000000, 0x010100ff00800101, q17); 17308 ASSERT_EQUAL_128(0x0000800000010081, 0x00017f8100008200, q18); 17309 ASSERT_EQUAL_128(0x0000000000000000, 0x00017f8100008200, q19); 17310 ASSERT_EQUAL_128(0x0000000100818000, 0x0000000082017f81, q20); 17311 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000082017f81, q21); 17312 TEARDOWN(); 17313 } 17314 17315 TEST(neon_2regmisc_sadalp) { 17316 SETUP(); 17317 17318 START(); 17319 17320 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 17321 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 17322 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 17323 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 17324 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 17325 17326 __ Mov(v16.V16B(), v1.V16B()); 17327 __ Mov(v17.V16B(), v1.V16B()); 17328 __ Sadalp(v16.V8H(), v0.V16B()); 17329 __ Sadalp(v17.V4H(), v0.V8B()); 17330 17331 __ Mov(v18.V16B(), v2.V16B()); 17332 __ Mov(v19.V16B(), v2.V16B()); 17333 __ Sadalp(v18.V4S(), v1.V8H()); 17334 __ Sadalp(v19.V2S(), v1.V4H()); 17335 17336 __ Mov(v20.V16B(), v3.V16B()); 17337 __ Mov(v21.V16B(), v4.V16B()); 17338 __ Sadalp(v20.V2D(), v2.V4S()); 17339 __ Sadalp(v21.V1D(), v2.V2S()); 17340 17341 END(); 17342 17343 RUN(); 17344 ASSERT_EQUAL_128(0x80808000ff000080, 0xff00ffff00817f00, q16); 17345 ASSERT_EQUAL_128(0x0000000000000000, 0xff00ffff00817f00, q17); 17346 ASSERT_EQUAL_128(0x7fff0001fffffffe, 0xffffffff80007fff, q18); 17347 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff80007fff, q19); 17348 ASSERT_EQUAL_128(0x7fffffff80000000, 0x800000007ffffffe, q20); 17349 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21); 17350 TEARDOWN(); 17351 } 17352 17353 TEST(neon_2regmisc_uadalp) { 17354 SETUP(); 17355 17356 START(); 17357 17358 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 17359 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 17360 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 17361 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 17362 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 17363 17364 __ Mov(v16.V16B(), v1.V16B()); 17365 __ Mov(v17.V16B(), v1.V16B()); 17366 __ Uadalp(v16.V8H(), v0.V16B()); 17367 __ Uadalp(v17.V4H(), v0.V8B()); 17368 17369 __ Mov(v18.V16B(), v2.V16B()); 17370 __ Mov(v19.V16B(), v2.V16B()); 17371 __ Uadalp(v18.V4S(), v1.V8H()); 17372 __ Uadalp(v19.V2S(), v1.V4H()); 17373 17374 __ Mov(v20.V16B(), v3.V16B()); 17375 __ Mov(v21.V16B(), v4.V16B()); 17376 __ Uadalp(v20.V2D(), v2.V4S()); 17377 __ Uadalp(v21.V1D(), v2.V2S()); 17378 17379 END(); 17380 17381 RUN(); 17382 ASSERT_EQUAL_128(0x8080810001000080, 0x010000ff00818100, q16); 17383 ASSERT_EQUAL_128(0x0000000000000000, 0x010000ff00818100, q17); 17384 ASSERT_EQUAL_128(0x800100010000fffe, 0x0000ffff80007fff, q18); 17385 ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff80007fff, q19); 17386 ASSERT_EQUAL_128(0x8000000180000000, 0x800000007ffffffe, q20); 17387 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21); 17388 TEARDOWN(); 17389 } 17390 17391 TEST(neon_3same_mul) { 17392 SETUP(); 17393 17394 START(); 17395 17396 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 17397 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff); 17398 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17399 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17400 17401 __ Mla(v16.V16B(), v0.V16B(), v1.V16B()); 17402 __ Mls(v17.V16B(), v0.V16B(), v1.V16B()); 17403 __ Mul(v18.V16B(), v0.V16B(), v1.V16B()); 17404 17405 END(); 17406 17407 RUN(); 17408 ASSERT_EQUAL_128(0x0102757605b1b208, 0x5f0a61450db90f56, q16); 17409 ASSERT_EQUAL_128(0x01029192055b5c08, 0xb30ab5d30d630faa, q17); 17410 ASSERT_EQUAL_128(0x0000727200abab00, 0x5600563900ab0056, q18); 17411 TEARDOWN(); 17412 } 17413 17414 17415 TEST(neon_3same_absdiff) { 17416 SETUP(); 17417 17418 START(); 17419 17420 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 17421 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff); 17422 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17423 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17424 17425 __ Saba(v16.V16B(), v0.V16B(), v1.V16B()); 17426 __ Uaba(v17.V16B(), v0.V16B(), v1.V16B()); 17427 __ Sabd(v18.V16B(), v0.V16B(), v1.V16B()); 17428 __ Uabd(v19.V16B(), v0.V16B(), v1.V16B()); 17429 17430 END(); 17431 17432 RUN(); 17433 ASSERT_EQUAL_128(0x0202aeaf065c5d5e, 0x5e5f600c62646455, q16); 17434 ASSERT_EQUAL_128(0x0002585904b0b1b2, 0x5e5f600c62b86455, q17); 17435 ASSERT_EQUAL_128(0x0100abab01565656, 0x5555550055565555, q18); 17436 ASSERT_EQUAL_128(0xff005555ffaaaaaa, 0x5555550055aa5555, q19); 17437 TEARDOWN(); 17438 } 17439 17440 17441 TEST(neon_byelement_mul) { 17442 SETUP(); 17443 17444 START(); 17445 17446 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 17447 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff); 17448 17449 17450 __ Mul(v16.V4H(), v0.V4H(), v1.H(), 0); 17451 __ Mul(v17.V8H(), v0.V8H(), v1.H(), 7); 17452 __ Mul(v18.V2S(), v0.V2S(), v1.S(), 0); 17453 __ Mul(v19.V4S(), v0.V4S(), v1.S(), 3); 17454 17455 __ Movi(v20.V2D(), 0x0000000000000000, 0x0001000200030004); 17456 __ Movi(v21.V2D(), 0x0005000600070008, 0x0001000200030004); 17457 __ Mla(v20.V4H(), v0.V4H(), v1.H(), 0); 17458 __ Mla(v21.V8H(), v0.V8H(), v1.H(), 7); 17459 17460 __ Movi(v22.V2D(), 0x0000000000000000, 0x0000000200000004); 17461 __ Movi(v23.V2D(), 0x0000000600000008, 0x0000000200000004); 17462 __ Mla(v22.V2S(), v0.V2S(), v1.S(), 0); 17463 __ Mla(v23.V4S(), v0.V4S(), v1.S(), 3); 17464 17465 __ Movi(v24.V2D(), 0x0000000000000000, 0x0100aaabfe015456); 17466 __ Movi(v25.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 17467 __ Mls(v24.V4H(), v0.V4H(), v1.H(), 0); 17468 __ Mls(v25.V8H(), v0.V8H(), v1.H(), 7); 17469 17470 __ Movi(v26.V2D(), 0x0000000000000000, 0xc8e2aaabe1c85456); 17471 __ Movi(v27.V2D(), 0x39545572c6aa54e4, 0x39545572c6aa54e4); 17472 __ Mls(v26.V2S(), v0.V2S(), v1.S(), 0); 17473 __ Mls(v27.V4S(), v0.V4S(), v1.S(), 3); 17474 17475 END(); 17476 17477 RUN(); 17478 ASSERT_EQUAL_128(0x0000000000000000, 0x0100aaabfe015456, q16); 17479 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17); 17480 ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaabe1c85456, q18); 17481 ASSERT_EQUAL_128(0x39545572c6aa54e4, 0x39545572c6aa54e4, q19); 17482 17483 ASSERT_EQUAL_128(0x0000000000000000, 0x0101aaadfe04545a, q20); 17484 ASSERT_EQUAL_128(0xff05aa5b010655b2, 0xff01aa57010255ae, q21); 17485 ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaade1c8545a, q22); 17486 ASSERT_EQUAL_128(0x39545578c6aa54ec, 0x39545574c6aa54e8, q23); 17487 17488 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24); 17489 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25); 17490 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26); 17491 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27); 17492 TEARDOWN(); 17493 } 17494 17495 17496 TEST(neon_byelement_mull) { 17497 SETUP(); 17498 17499 START(); 17500 17501 __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa); 17502 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff); 17503 17504 17505 __ Smull(v16.V4S(), v0.V4H(), v1.H(), 7); 17506 __ Smull2(v17.V4S(), v0.V8H(), v1.H(), 0); 17507 __ Umull(v18.V4S(), v0.V4H(), v1.H(), 7); 17508 __ Umull2(v19.V4S(), v0.V8H(), v1.H(), 0); 17509 17510 __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001); 17511 __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001); 17512 __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001); 17513 __ Movi(v23.V2D(), 0x0000000100000002, 0x0000000200000001); 17514 17515 __ Smlal(v20.V4S(), v0.V4H(), v1.H(), 7); 17516 __ Smlal2(v21.V4S(), v0.V8H(), v1.H(), 0); 17517 __ Umlal(v22.V4S(), v0.V4H(), v1.H(), 7); 17518 __ Umlal2(v23.V4S(), v0.V8H(), v1.H(), 0); 17519 17520 __ Movi(v24.V2D(), 0xffffff00ffffaa55, 0x000000ff000055aa); 17521 __ Movi(v25.V2D(), 0xffaaaaabffff55ab, 0x0054ffab0000fe01); 17522 __ Movi(v26.V2D(), 0x0000ff000000aa55, 0x000000ff000055aa); 17523 __ Movi(v27.V2D(), 0x00a9aaab00fe55ab, 0x0054ffab0000fe01); 17524 17525 __ Smlsl(v24.V4S(), v0.V4H(), v1.H(), 7); 17526 __ Smlsl2(v25.V4S(), v0.V8H(), v1.H(), 0); 17527 __ Umlsl(v26.V4S(), v0.V4H(), v1.H(), 7); 17528 __ Umlsl2(v27.V4S(), v0.V8H(), v1.H(), 0); 17529 17530 END(); 17531 17532 RUN(); 17533 17534 ASSERT_EQUAL_128(0xffffff00ffffaa55, 0x000000ff000055aa, q16); 17535 ASSERT_EQUAL_128(0xffaaaaabffff55ab, 0x0054ffab0000fe01, q17); 17536 ASSERT_EQUAL_128(0x0000ff000000aa55, 0x000000ff000055aa, q18); 17537 ASSERT_EQUAL_128(0x00a9aaab00fe55ab, 0x0054ffab0000fe01, q19); 17538 17539 ASSERT_EQUAL_128(0xffffff01ffffaa57, 0x00000101000055ab, q20); 17540 ASSERT_EQUAL_128(0xffaaaaacffff55ad, 0x0054ffad0000fe02, q21); 17541 ASSERT_EQUAL_128(0x0000ff010000aa57, 0x00000101000055ab, q22); 17542 ASSERT_EQUAL_128(0x00a9aaac00fe55ad, 0x0054ffad0000fe02, q23); 17543 17544 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24); 17545 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25); 17546 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26); 17547 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27); 17548 17549 TEARDOWN(); 17550 } 17551 17552 17553 TEST(neon_byelement_sqdmull) { 17554 SETUP(); 17555 17556 START(); 17557 17558 __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa); 17559 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff); 17560 17561 __ Sqdmull(v16.V4S(), v0.V4H(), v1.H(), 7); 17562 __ Sqdmull2(v17.V4S(), v0.V8H(), v1.H(), 0); 17563 __ Sqdmull(s18, h0, v1.H(), 7); 17564 17565 __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001); 17566 __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001); 17567 __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001); 17568 17569 __ Sqdmlal(v20.V4S(), v0.V4H(), v1.H(), 7); 17570 __ Sqdmlal2(v21.V4S(), v0.V8H(), v1.H(), 0); 17571 __ Sqdmlal(s22, h0, v1.H(), 7); 17572 17573 __ Movi(v24.V2D(), 0xfffffe00ffff54aa, 0x000001fe0000ab54); 17574 __ Movi(v25.V2D(), 0xff555556fffeab56, 0x00a9ff560001fc02); 17575 __ Movi(v26.V2D(), 0x0000000000000000, 0x000000000000ab54); 17576 17577 __ Sqdmlsl(v24.V4S(), v0.V4H(), v1.H(), 7); 17578 __ Sqdmlsl2(v25.V4S(), v0.V8H(), v1.H(), 0); 17579 __ Sqdmlsl(s26, h0, v1.H(), 7); 17580 17581 END(); 17582 17583 RUN(); 17584 17585 ASSERT_EQUAL_128(0xfffffe00ffff54aa, 0x000001fe0000ab54, q16); 17586 ASSERT_EQUAL_128(0xff555556fffeab56, 0x00a9ff560001fc02, q17); 17587 ASSERT_EQUAL_128(0, 0x0000ab54, q18); 17588 17589 ASSERT_EQUAL_128(0xfffffe01ffff54ac, 0x000002000000ab55, q20); 17590 ASSERT_EQUAL_128(0xff555557fffeab58, 0x00a9ff580001fc03, q21); 17591 ASSERT_EQUAL_128(0, 0x0000ab55, q22); 17592 17593 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24); 17594 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25); 17595 ASSERT_EQUAL_128(0, 0x00000000, q26); 17596 17597 TEARDOWN(); 17598 } 17599 17600 17601 TEST(neon_3diff_absdiff) { 17602 SETUP(); 17603 17604 START(); 17605 17606 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa); 17607 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff); 17608 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17609 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17610 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17611 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17612 17613 __ Sabal(v16.V8H(), v0.V8B(), v1.V8B()); 17614 __ Uabal(v17.V8H(), v0.V8B(), v1.V8B()); 17615 __ Sabal2(v18.V8H(), v0.V16B(), v1.V16B()); 17616 __ Uabal2(v19.V8H(), v0.V16B(), v1.V16B()); 17617 17618 END(); 17619 17620 RUN(); 17621 ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0b620d630f55, q16); 17622 ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0bb60d630f55, q17); 17623 ASSERT_EQUAL_128(0x0103030405b107b3, 0x090b0b620d640f55, q18); 17624 ASSERT_EQUAL_128(0x02010304055b075d, 0x0a090bb60db80fab, q19); 17625 TEARDOWN(); 17626 } 17627 17628 17629 TEST(neon_3diff_sqdmull) { 17630 SETUP(); 17631 17632 START(); 17633 17634 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000); 17635 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000); 17636 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000); 17637 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000); 17638 17639 __ Sqdmull(v16.V4S(), v0.V4H(), v1.V4H()); 17640 __ Sqdmull2(v17.V4S(), v0.V8H(), v1.V8H()); 17641 __ Sqdmull(v18.V2D(), v2.V2S(), v3.V2S()); 17642 __ Sqdmull2(v19.V2D(), v2.V4S(), v3.V4S()); 17643 __ Sqdmull(s20, h0, h1); 17644 __ Sqdmull(d21, s2, s3); 17645 17646 END(); 17647 17648 RUN(); 17649 ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q16); 17650 ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q17); 17651 ASSERT_EQUAL_128(0x8000000100000000, 0x7fffffffffffffff, q18); 17652 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000100000000, q19); 17653 ASSERT_EQUAL_128(0, 0x7fffffff, q20); 17654 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21); 17655 TEARDOWN(); 17656 } 17657 17658 17659 TEST(neon_3diff_sqdmlal) { 17660 SETUP(); 17661 17662 START(); 17663 17664 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000); 17665 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000); 17666 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000); 17667 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000); 17668 17669 __ Movi(v16.V2D(), 0xffffffff00000001, 0x8fffffff00000001); 17670 __ Movi(v17.V2D(), 0x00000001ffffffff, 0x00000001ffffffff); 17671 __ Movi(v18.V2D(), 0x8000000000000001, 0x0000000000000001); 17672 __ Movi(v19.V2D(), 0xffffffffffffffff, 0x7fffffffffffffff); 17673 __ Movi(v20.V2D(), 0, 0x00000001); 17674 __ Movi(v21.V2D(), 0, 0x00000001); 17675 17676 __ Sqdmlal(v16.V4S(), v0.V4H(), v1.V4H()); 17677 __ Sqdmlal2(v17.V4S(), v0.V8H(), v1.V8H()); 17678 __ Sqdmlal(v18.V2D(), v2.V2S(), v3.V2S()); 17679 __ Sqdmlal2(v19.V2D(), v2.V4S(), v3.V4S()); 17680 __ Sqdmlal(s20, h0, h1); 17681 __ Sqdmlal(d21, s2, s3); 17682 17683 END(); 17684 17685 RUN(); 17686 ASSERT_EQUAL_128(0x8000ffff7ffe0003, 0x800000007fffffff, q16); 17687 ASSERT_EQUAL_128(0x800100017ffe0001, 0x800100017ffffffe, q17); 17688 ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q18); 17689 ASSERT_EQUAL_128(0x7ffffffffffffffe, 0x00000000ffffffff, q19); 17690 ASSERT_EQUAL_128(0, 0x7fffffff, q20); 17691 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21); 17692 TEARDOWN(); 17693 } 17694 17695 17696 TEST(neon_3diff_sqdmlsl) { 17697 SETUP(); 17698 17699 START(); 17700 17701 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000); 17702 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000); 17703 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000); 17704 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000); 17705 17706 __ Movi(v16.V2D(), 0xffffffff00000001, 0x7ffffffe80000001); 17707 __ Movi(v17.V2D(), 0x00000001ffffffff, 0x7ffffffe00000001); 17708 __ Movi(v18.V2D(), 0x8000000000000001, 0x8000000000000001); 17709 __ Movi(v19.V2D(), 0xfffffffffffffffe, 0x7fffffffffffffff); 17710 __ Movi(v20.V2D(), 0, 0x00000001); 17711 __ Movi(v21.V2D(), 0, 0x00000001); 17712 17713 __ Sqdmlsl(v16.V4S(), v0.V4H(), v1.V4H()); 17714 __ Sqdmlsl2(v17.V4S(), v0.V8H(), v1.V8H()); 17715 __ Sqdmlsl(v18.V2D(), v2.V2S(), v3.V2S()); 17716 __ Sqdmlsl2(v19.V2D(), v2.V4S(), v3.V4S()); 17717 __ Sqdmlsl(s20, h0, h1); 17718 __ Sqdmlsl(d21, s2, s3); 17719 17720 END(); 17721 17722 RUN(); 17723 ASSERT_EQUAL_128(0x7ffeffff8001ffff, 0x7fffffff80000000, q16); 17724 ASSERT_EQUAL_128(0x7fff00018001fffd, 0x7fffffff80000002, q17); 17725 ASSERT_EQUAL_128(0xffffffff00000001, 0x8000000000000000, q18); 17726 ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q19); 17727 ASSERT_EQUAL_128(0, 0x80000002, q20); 17728 ASSERT_EQUAL_128(0, 0x8000000000000002, q21); 17729 17730 TEARDOWN(); 17731 } 17732 17733 17734 TEST(neon_3diff_mla) { 17735 SETUP(); 17736 17737 START(); 17738 17739 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa); 17740 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff); 17741 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17742 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17743 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17744 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17745 17746 __ Smlal(v16.V8H(), v0.V8B(), v1.V8B()); 17747 __ Umlal(v17.V8H(), v0.V8B(), v1.V8B()); 17748 __ Smlal2(v18.V8H(), v0.V16B(), v1.V16B()); 17749 __ Umlal2(v19.V8H(), v0.V16B(), v1.V16B()); 17750 17751 END(); 17752 17753 RUN(); 17754 ASSERT_EQUAL_128(0x01580304055c2341, 0x090a0ab70d0e0f56, q16); 17755 ASSERT_EQUAL_128(0xaa580304ae5c2341, 0x090a5fb70d0eb856, q17); 17756 ASSERT_EQUAL_128(0x01020304e878ea7a, 0x090a0ab70cb90f00, q18); 17757 ASSERT_EQUAL_128(0x010203043d783f7a, 0x090a5fb761b90f00, q19); 17758 TEARDOWN(); 17759 } 17760 17761 17762 TEST(neon_3diff_mls) { 17763 SETUP(); 17764 17765 START(); 17766 17767 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa); 17768 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff); 17769 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17770 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17771 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17772 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17773 17774 __ Smlsl(v16.V8H(), v0.V8B(), v1.V8B()); 17775 __ Umlsl(v17.V8H(), v0.V8B(), v1.V8B()); 17776 __ Smlsl2(v18.V8H(), v0.V16B(), v1.V16B()); 17777 __ Umlsl2(v19.V8H(), v0.V16B(), v1.V16B()); 17778 17779 END(); 17780 17781 RUN(); 17782 ASSERT_EQUAL_128(0x00ac030404b0eacf, 0x090a0b610d0e0eaa, q16); 17783 ASSERT_EQUAL_128(0x57ac03045bb0eacf, 0x090ab6610d0e65aa, q17); 17784 ASSERT_EQUAL_128(0x0102030421942396, 0x090a0b610d630f00, q18); 17785 ASSERT_EQUAL_128(0x01020304cc94ce96, 0x090ab661b8630f00, q19); 17786 TEARDOWN(); 17787 } 17788 17789 17790 TEST(neon_3same_compare) { 17791 SETUP(); 17792 17793 START(); 17794 17795 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 17796 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff); 17797 17798 __ Cmeq(v16.V16B(), v0.V16B(), v0.V16B()); 17799 __ Cmeq(v17.V16B(), v0.V16B(), v1.V16B()); 17800 __ Cmge(v18.V16B(), v0.V16B(), v0.V16B()); 17801 __ Cmge(v19.V16B(), v0.V16B(), v1.V16B()); 17802 __ Cmgt(v20.V16B(), v0.V16B(), v0.V16B()); 17803 __ Cmgt(v21.V16B(), v0.V16B(), v1.V16B()); 17804 __ Cmhi(v22.V16B(), v0.V16B(), v0.V16B()); 17805 __ Cmhi(v23.V16B(), v0.V16B(), v1.V16B()); 17806 __ Cmhs(v24.V16B(), v0.V16B(), v0.V16B()); 17807 __ Cmhs(v25.V16B(), v0.V16B(), v1.V16B()); 17808 17809 END(); 17810 17811 RUN(); 17812 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16); 17813 ASSERT_EQUAL_128(0x00ff000000000000, 0x000000ff00000000, q17); 17814 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18); 17815 ASSERT_EQUAL_128(0x00ff00ffff00ff00, 0xff0000ff0000ff00, q19); 17816 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20); 17817 ASSERT_EQUAL_128(0x000000ffff00ff00, 0xff0000000000ff00, q21); 17818 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22); 17819 ASSERT_EQUAL_128(0xff00ff0000ff00ff, 0xff00000000ffff00, q23); 17820 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q24); 17821 ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xff0000ff00ffff00, q25); 17822 TEARDOWN(); 17823 } 17824 17825 17826 TEST(neon_3same_scalar_compare) { 17827 SETUP(); 17828 17829 START(); 17830 17831 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 17832 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff); 17833 17834 __ Cmeq(d16, d0, d0); 17835 __ Cmeq(d17, d0, d1); 17836 __ Cmeq(d18, d1, d0); 17837 __ Cmge(d19, d0, d0); 17838 __ Cmge(d20, d0, d1); 17839 __ Cmge(d21, d1, d0); 17840 __ Cmgt(d22, d0, d0); 17841 __ Cmgt(d23, d0, d1); 17842 __ Cmhi(d24, d0, d0); 17843 __ Cmhi(d25, d0, d1); 17844 __ Cmhs(d26, d0, d0); 17845 __ Cmhs(d27, d0, d1); 17846 __ Cmhs(d28, d1, d0); 17847 17848 END(); 17849 17850 RUN(); 17851 17852 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q16); 17853 ASSERT_EQUAL_128(0, 0x0000000000000000, q17); 17854 ASSERT_EQUAL_128(0, 0x0000000000000000, q18); 17855 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19); 17856 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20); 17857 ASSERT_EQUAL_128(0, 0x0000000000000000, q21); 17858 ASSERT_EQUAL_128(0, 0x0000000000000000, q22); 17859 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q23); 17860 ASSERT_EQUAL_128(0, 0x0000000000000000, q24); 17861 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25); 17862 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26); 17863 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q27); 17864 ASSERT_EQUAL_128(0, 0x0000000000000000, q28); 17865 17866 TEARDOWN(); 17867 } 17868 17869 TEST(neon_2regmisc_fcmeq) { 17870 SETUP(); 17871 17872 START(); 17873 17874 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero. 17875 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan. 17876 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0. 17877 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0. 17878 17879 __ Fcmeq(s16, s0, 0.0); 17880 __ Fcmeq(s17, s1, 0.0); 17881 __ Fcmeq(s18, s2, 0.0); 17882 __ Fcmeq(d19, d0, 0.0); 17883 __ Fcmeq(d20, d1, 0.0); 17884 __ Fcmeq(d21, d2, 0.0); 17885 __ Fcmeq(v22.V2S(), v0.V2S(), 0.0); 17886 __ Fcmeq(v23.V4S(), v1.V4S(), 0.0); 17887 __ Fcmeq(v24.V2D(), v1.V2D(), 0.0); 17888 __ Fcmeq(v25.V2D(), v2.V2D(), 0.0); 17889 17890 END(); 17891 17892 RUN(); 17893 ASSERT_EQUAL_128(0, 0xffffffff, q16); 17894 ASSERT_EQUAL_128(0, 0x00000000, q17); 17895 ASSERT_EQUAL_128(0, 0x00000000, q18); 17896 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19); 17897 ASSERT_EQUAL_128(0, 0x0000000000000000, q20); 17898 ASSERT_EQUAL_128(0, 0x0000000000000000, q21); 17899 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22); 17900 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23); 17901 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24); 17902 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25); 17903 TEARDOWN(); 17904 } 17905 17906 TEST(neon_2regmisc_fcmge) { 17907 SETUP(); 17908 17909 START(); 17910 17911 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero. 17912 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan. 17913 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0. 17914 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0. 17915 17916 __ Fcmge(s16, s0, 0.0); 17917 __ Fcmge(s17, s1, 0.0); 17918 __ Fcmge(s18, s2, 0.0); 17919 __ Fcmge(d19, d0, 0.0); 17920 __ Fcmge(d20, d1, 0.0); 17921 __ Fcmge(d21, d3, 0.0); 17922 __ Fcmge(v22.V2S(), v0.V2S(), 0.0); 17923 __ Fcmge(v23.V4S(), v1.V4S(), 0.0); 17924 __ Fcmge(v24.V2D(), v1.V2D(), 0.0); 17925 __ Fcmge(v25.V2D(), v3.V2D(), 0.0); 17926 17927 END(); 17928 17929 RUN(); 17930 ASSERT_EQUAL_128(0, 0xffffffff, q16); 17931 ASSERT_EQUAL_128(0, 0x00000000, q17); 17932 ASSERT_EQUAL_128(0, 0x00000000, q18); 17933 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19); 17934 ASSERT_EQUAL_128(0, 0x0000000000000000, q20); 17935 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21); 17936 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22); 17937 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23); 17938 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24); 17939 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25); 17940 TEARDOWN(); 17941 } 17942 17943 17944 TEST(neon_2regmisc_fcmgt) { 17945 SETUP(); 17946 17947 START(); 17948 17949 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero. 17950 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan. 17951 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0. 17952 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0. 17953 17954 __ Fcmgt(s16, s0, 0.0); 17955 __ Fcmgt(s17, s1, 0.0); 17956 __ Fcmgt(s18, s2, 0.0); 17957 __ Fcmgt(d19, d0, 0.0); 17958 __ Fcmgt(d20, d1, 0.0); 17959 __ Fcmgt(d21, d3, 0.0); 17960 __ Fcmgt(v22.V2S(), v0.V2S(), 0.0); 17961 __ Fcmgt(v23.V4S(), v1.V4S(), 0.0); 17962 __ Fcmgt(v24.V2D(), v1.V2D(), 0.0); 17963 __ Fcmgt(v25.V2D(), v3.V2D(), 0.0); 17964 17965 END(); 17966 17967 RUN(); 17968 ASSERT_EQUAL_128(0, 0x00000000, q16); 17969 ASSERT_EQUAL_128(0, 0x00000000, q17); 17970 ASSERT_EQUAL_128(0, 0x00000000, q18); 17971 ASSERT_EQUAL_128(0, 0x0000000000000000, q19); 17972 ASSERT_EQUAL_128(0, 0x0000000000000000, q20); 17973 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21); 17974 ASSERT_EQUAL_128(0, 0x0000000000000000, q22); 17975 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23); 17976 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24); 17977 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25); 17978 TEARDOWN(); 17979 } 17980 17981 TEST(neon_2regmisc_fcmle) { 17982 SETUP(); 17983 17984 START(); 17985 17986 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero. 17987 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan. 17988 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0. 17989 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0. 17990 17991 __ Fcmle(s16, s0, 0.0); 17992 __ Fcmle(s17, s1, 0.0); 17993 __ Fcmle(s18, s3, 0.0); 17994 __ Fcmle(d19, d0, 0.0); 17995 __ Fcmle(d20, d1, 0.0); 17996 __ Fcmle(d21, d2, 0.0); 17997 __ Fcmle(v22.V2S(), v0.V2S(), 0.0); 17998 __ Fcmle(v23.V4S(), v1.V4S(), 0.0); 17999 __ Fcmle(v24.V2D(), v1.V2D(), 0.0); 18000 __ Fcmle(v25.V2D(), v2.V2D(), 0.0); 18001 18002 END(); 18003 18004 RUN(); 18005 ASSERT_EQUAL_128(0, 0xffffffff, q16); 18006 ASSERT_EQUAL_128(0, 0x00000000, q17); 18007 ASSERT_EQUAL_128(0, 0x00000000, q18); 18008 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19); 18009 ASSERT_EQUAL_128(0, 0x0000000000000000, q20); 18010 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21); 18011 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22); 18012 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23); 18013 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24); 18014 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25); 18015 TEARDOWN(); 18016 } 18017 18018 18019 TEST(neon_2regmisc_fcmlt) { 18020 SETUP(); 18021 18022 START(); 18023 18024 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero. 18025 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan. 18026 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0. 18027 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0. 18028 18029 __ Fcmlt(s16, s0, 0.0); 18030 __ Fcmlt(s17, s1, 0.0); 18031 __ Fcmlt(s18, s3, 0.0); 18032 __ Fcmlt(d19, d0, 0.0); 18033 __ Fcmlt(d20, d1, 0.0); 18034 __ Fcmlt(d21, d2, 0.0); 18035 __ Fcmlt(v22.V2S(), v0.V2S(), 0.0); 18036 __ Fcmlt(v23.V4S(), v1.V4S(), 0.0); 18037 __ Fcmlt(v24.V2D(), v1.V2D(), 0.0); 18038 __ Fcmlt(v25.V2D(), v2.V2D(), 0.0); 18039 18040 END(); 18041 18042 RUN(); 18043 ASSERT_EQUAL_128(0, 0x00000000, q16); 18044 ASSERT_EQUAL_128(0, 0x00000000, q17); 18045 ASSERT_EQUAL_128(0, 0x00000000, q18); 18046 ASSERT_EQUAL_128(0, 0x0000000000000000, q19); 18047 ASSERT_EQUAL_128(0, 0x0000000000000000, q20); 18048 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21); 18049 ASSERT_EQUAL_128(0, 0x0000000000000000, q22); 18050 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23); 18051 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24); 18052 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25); 18053 TEARDOWN(); 18054 } 18055 18056 TEST(neon_2regmisc_cmeq) { 18057 SETUP(); 18058 18059 START(); 18060 18061 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000); 18062 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff); 18063 18064 __ Cmeq(v16.V8B(), v1.V8B(), 0); 18065 __ Cmeq(v17.V16B(), v1.V16B(), 0); 18066 __ Cmeq(v18.V4H(), v1.V4H(), 0); 18067 __ Cmeq(v19.V8H(), v1.V8H(), 0); 18068 __ Cmeq(v20.V2S(), v0.V2S(), 0); 18069 __ Cmeq(v21.V4S(), v0.V4S(), 0); 18070 __ Cmeq(d22, d0, 0); 18071 __ Cmeq(d23, d1, 0); 18072 __ Cmeq(v24.V2D(), v0.V2D(), 0); 18073 18074 END(); 18075 18076 RUN(); 18077 ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000000ff00, q16); 18078 ASSERT_EQUAL_128(0xffff0000000000ff, 0xffff00000000ff00, q17); 18079 ASSERT_EQUAL_128(0x0000000000000000, 0xffff000000000000, q18); 18080 ASSERT_EQUAL_128(0xffff000000000000, 0xffff000000000000, q19); 18081 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20); 18082 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q21); 18083 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22); 18084 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23); 18085 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24); 18086 TEARDOWN(); 18087 } 18088 18089 18090 TEST(neon_2regmisc_cmge) { 18091 SETUP(); 18092 18093 START(); 18094 18095 __ Movi(v0.V2D(), 0xff01000200030004, 0x0000000000000000); 18096 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff); 18097 18098 __ Cmge(v16.V8B(), v1.V8B(), 0); 18099 __ Cmge(v17.V16B(), v1.V16B(), 0); 18100 __ Cmge(v18.V4H(), v1.V4H(), 0); 18101 __ Cmge(v19.V8H(), v1.V8H(), 0); 18102 __ Cmge(v20.V2S(), v0.V2S(), 0); 18103 __ Cmge(v21.V4S(), v0.V4S(), 0); 18104 __ Cmge(d22, d0, 0); 18105 __ Cmge(d23, d1, 0); 18106 __ Cmge(v24.V2D(), v0.V2D(), 0); 18107 18108 END(); 18109 18110 RUN(); 18111 ASSERT_EQUAL_128(0x0000000000000000, 0xffff00ffffffff00, q16); 18112 ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xffff00ffffffff00, q17); 18113 ASSERT_EQUAL_128(0x0000000000000000, 0xffff0000ffffffff, q18); 18114 ASSERT_EQUAL_128(0xffffffff00000000, 0xffff0000ffffffff, q19); 18115 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20); 18116 ASSERT_EQUAL_128(0x00000000ffffffff, 0xffffffffffffffff, q21); 18117 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22); 18118 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23); 18119 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24); 18120 TEARDOWN(); 18121 } 18122 18123 18124 TEST(neon_2regmisc_cmlt) { 18125 SETUP(); 18126 18127 START(); 18128 18129 __ Movi(v0.V2D(), 0x0001000200030004, 0xff00000000000000); 18130 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff); 18131 18132 __ Cmlt(v16.V8B(), v1.V8B(), 0); 18133 __ Cmlt(v17.V16B(), v1.V16B(), 0); 18134 __ Cmlt(v18.V4H(), v1.V4H(), 0); 18135 __ Cmlt(v19.V8H(), v1.V8H(), 0); 18136 __ Cmlt(v20.V2S(), v1.V2S(), 0); 18137 __ Cmlt(v21.V4S(), v1.V4S(), 0); 18138 __ Cmlt(d22, d0, 0); 18139 __ Cmlt(d23, d1, 0); 18140 __ Cmlt(v24.V2D(), v0.V2D(), 0); 18141 18142 END(); 18143 18144 RUN(); 18145 ASSERT_EQUAL_128(0x0000000000000000, 0x0000ff00000000ff, q16); 18146 ASSERT_EQUAL_128(0x000000ffff00ff00, 0x0000ff00000000ff, q17); 18147 ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff00000000, q18); 18148 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000ffff00000000, q19); 18149 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20); 18150 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21); 18151 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22); 18152 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23); 18153 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24); 18154 TEARDOWN(); 18155 } 18156 18157 18158 TEST(neon_2regmisc_cmle) { 18159 SETUP(); 18160 18161 START(); 18162 18163 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000); 18164 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff); 18165 18166 __ Cmle(v16.V8B(), v1.V8B(), 0); 18167 __ Cmle(v17.V16B(), v1.V16B(), 0); 18168 __ Cmle(v18.V4H(), v1.V4H(), 0); 18169 __ Cmle(v19.V8H(), v1.V8H(), 0); 18170 __ Cmle(v20.V2S(), v1.V2S(), 0); 18171 __ Cmle(v21.V4S(), v1.V4S(), 0); 18172 __ Cmle(d22, d0, 0); 18173 __ Cmle(d23, d1, 0); 18174 __ Cmle(v24.V2D(), v0.V2D(), 0); 18175 18176 END(); 18177 18178 RUN(); 18179 ASSERT_EQUAL_128(0x0000000000000000, 0xffffff000000ffff, q16); 18180 ASSERT_EQUAL_128(0xffff00ffff00ffff, 0xffffff000000ffff, q17); 18181 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff00000000, q18); 18182 ASSERT_EQUAL_128(0xffff0000ffffffff, 0xffffffff00000000, q19); 18183 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20); 18184 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21); 18185 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22); 18186 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23); 18187 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24); 18188 TEARDOWN(); 18189 } 18190 18191 18192 TEST(neon_2regmisc_cmgt) { 18193 SETUP(); 18194 18195 START(); 18196 18197 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000); 18198 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff); 18199 18200 __ Cmgt(v16.V8B(), v1.V8B(), 0); 18201 __ Cmgt(v17.V16B(), v1.V16B(), 0); 18202 __ Cmgt(v18.V4H(), v1.V4H(), 0); 18203 __ Cmgt(v19.V8H(), v1.V8H(), 0); 18204 __ Cmgt(v20.V2S(), v0.V2S(), 0); 18205 __ Cmgt(v21.V4S(), v0.V4S(), 0); 18206 __ Cmgt(d22, d0, 0); 18207 __ Cmgt(d23, d1, 0); 18208 __ Cmgt(v24.V2D(), v0.V2D(), 0); 18209 18210 END(); 18211 18212 RUN(); 18213 ASSERT_EQUAL_128(0x0000000000000000, 0x000000ffffff0000, q16); 18214 ASSERT_EQUAL_128(0x0000ff0000ff0000, 0x000000ffffff0000, q17); 18215 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18); 18216 ASSERT_EQUAL_128(0x0000ffff00000000, 0x00000000ffffffff, q19); 18217 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20); 18218 ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q21); 18219 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22); 18220 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23); 18221 ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q24); 18222 TEARDOWN(); 18223 } 18224 18225 18226 TEST(neon_2regmisc_neg) { 18227 SETUP(); 18228 18229 START(); 18230 18231 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 18232 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 18233 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 18234 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 18235 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 18236 18237 __ Neg(v16.V8B(), v0.V8B()); 18238 __ Neg(v17.V16B(), v0.V16B()); 18239 __ Neg(v18.V4H(), v1.V4H()); 18240 __ Neg(v19.V8H(), v1.V8H()); 18241 __ Neg(v20.V2S(), v2.V2S()); 18242 __ Neg(v21.V4S(), v2.V4S()); 18243 __ Neg(d22, d3); 18244 __ Neg(v23.V2D(), v3.V2D()); 18245 __ Neg(v24.V2D(), v4.V2D()); 18246 18247 END(); 18248 18249 RUN(); 18250 ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100ff81807f, q16); 18251 ASSERT_EQUAL_128(0x81ff00017f8081ff, 0x807f0100ff81807f, q17); 18252 ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18); 18253 ASSERT_EQUAL_128(0x80007fff00010000, 0x00010000ffff8001, q19); 18254 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20); 18255 ASSERT_EQUAL_128(0x8000000000000001, 0x0000000080000001, q21); 18256 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000001, q22); 18257 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q23); 18258 ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24); 18259 18260 TEARDOWN(); 18261 } 18262 18263 18264 TEST(neon_2regmisc_sqneg) { 18265 SETUP(); 18266 18267 START(); 18268 18269 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 18270 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 18271 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 18272 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 18273 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 18274 18275 __ Sqneg(v16.V8B(), v0.V8B()); 18276 __ Sqneg(v17.V16B(), v0.V16B()); 18277 __ Sqneg(v18.V4H(), v1.V4H()); 18278 __ Sqneg(v19.V8H(), v1.V8H()); 18279 __ Sqneg(v20.V2S(), v2.V2S()); 18280 __ Sqneg(v21.V4S(), v2.V4S()); 18281 __ Sqneg(v22.V2D(), v3.V2D()); 18282 __ Sqneg(v23.V2D(), v4.V2D()); 18283 18284 __ Sqneg(b24, b0); 18285 __ Sqneg(h25, h1); 18286 __ Sqneg(s26, s2); 18287 __ Sqneg(d27, d3); 18288 18289 END(); 18290 18291 RUN(); 18292 ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100ff817f7f, q16); 18293 ASSERT_EQUAL_128(0x81ff00017f7f81ff, 0x7f7f0100ff817f7f, q17); 18294 ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18); 18295 ASSERT_EQUAL_128(0x7fff7fff00010000, 0x00010000ffff8001, q19); 18296 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20); 18297 ASSERT_EQUAL_128(0x7fffffff00000001, 0x0000000080000001, q21); 18298 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q22); 18299 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23); 18300 18301 ASSERT_EQUAL_128(0, 0x7f, q24); 18302 ASSERT_EQUAL_128(0, 0x8001, q25); 18303 ASSERT_EQUAL_128(0, 0x80000001, q26); 18304 ASSERT_EQUAL_128(0, 0x8000000000000001, q27); 18305 18306 TEARDOWN(); 18307 } 18308 18309 18310 TEST(neon_2regmisc_abs) { 18311 SETUP(); 18312 18313 START(); 18314 18315 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 18316 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 18317 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 18318 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 18319 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 18320 18321 __ Abs(v16.V8B(), v0.V8B()); 18322 __ Abs(v17.V16B(), v0.V16B()); 18323 __ Abs(v18.V4H(), v1.V4H()); 18324 __ Abs(v19.V8H(), v1.V8H()); 18325 __ Abs(v20.V2S(), v2.V2S()); 18326 __ Abs(v21.V4S(), v2.V4S()); 18327 __ Abs(d22, d3); 18328 __ Abs(v23.V2D(), v3.V2D()); 18329 __ Abs(v24.V2D(), v4.V2D()); 18330 18331 END(); 18332 18333 RUN(); 18334 ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100017f807f, q16); 18335 ASSERT_EQUAL_128(0x7f0100017f807f01, 0x807f0100017f807f, q17); 18336 ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18); 18337 ASSERT_EQUAL_128(0x80007fff00010000, 0x0001000000017fff, q19); 18338 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20); 18339 ASSERT_EQUAL_128(0x8000000000000001, 0x000000007fffffff, q21); 18340 ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffffffffffff, q22); 18341 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q23); 18342 ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24); 18343 18344 TEARDOWN(); 18345 } 18346 18347 18348 TEST(neon_2regmisc_sqabs) { 18349 SETUP(); 18350 18351 START(); 18352 18353 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 18354 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 18355 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 18356 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 18357 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 18358 18359 __ Sqabs(v16.V8B(), v0.V8B()); 18360 __ Sqabs(v17.V16B(), v0.V16B()); 18361 __ Sqabs(v18.V4H(), v1.V4H()); 18362 __ Sqabs(v19.V8H(), v1.V8H()); 18363 __ Sqabs(v20.V2S(), v2.V2S()); 18364 __ Sqabs(v21.V4S(), v2.V4S()); 18365 __ Sqabs(v22.V2D(), v3.V2D()); 18366 __ Sqabs(v23.V2D(), v4.V2D()); 18367 18368 __ Sqabs(b24, b0); 18369 __ Sqabs(h25, h1); 18370 __ Sqabs(s26, s2); 18371 __ Sqabs(d27, d3); 18372 18373 END(); 18374 18375 RUN(); 18376 ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100017f7f7f, q16); 18377 ASSERT_EQUAL_128(0x7f0100017f7f7f01, 0x7f7f0100017f7f7f, q17); 18378 ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18); 18379 ASSERT_EQUAL_128(0x7fff7fff00010000, 0x0001000000017fff, q19); 18380 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20); 18381 ASSERT_EQUAL_128(0x7fffffff00000001, 0x000000007fffffff, q21); 18382 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q22); 18383 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23); 18384 18385 ASSERT_EQUAL_128(0, 0x7f, q24); 18386 ASSERT_EQUAL_128(0, 0x7fff, q25); 18387 ASSERT_EQUAL_128(0, 0x7fffffff, q26); 18388 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27); 18389 18390 TEARDOWN(); 18391 } 18392 18393 TEST(neon_2regmisc_suqadd) { 18394 SETUP(); 18395 18396 START(); 18397 18398 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 18399 __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f0180ff); 18400 18401 __ Movi(v2.V2D(), 0x80008001ffff0000, 0xffff000000017ffd); 18402 __ Movi(v3.V2D(), 0xffff000080008001, 0x00017fffffff0001); 18403 18404 __ Movi(v4.V2D(), 0x80000000fffffffe, 0xfffffff17ffffffe); 18405 __ Movi(v5.V2D(), 0xffffffff80000000, 0x7fffffff00000002); 18406 18407 __ Movi(v6.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 18408 __ Movi(v7.V2D(), 0x8000000000000000, 0x8000000000000002); 18409 18410 __ Mov(v16.V2D(), v0.V2D()); 18411 __ Mov(v17.V2D(), v0.V2D()); 18412 __ Mov(v18.V2D(), v2.V2D()); 18413 __ Mov(v19.V2D(), v2.V2D()); 18414 __ Mov(v20.V2D(), v4.V2D()); 18415 __ Mov(v21.V2D(), v4.V2D()); 18416 __ Mov(v22.V2D(), v6.V2D()); 18417 18418 __ Mov(v23.V2D(), v0.V2D()); 18419 __ Mov(v24.V2D(), v2.V2D()); 18420 __ Mov(v25.V2D(), v4.V2D()); 18421 __ Mov(v26.V2D(), v6.V2D()); 18422 18423 __ Suqadd(v16.V8B(), v1.V8B()); 18424 __ Suqadd(v17.V16B(), v1.V16B()); 18425 __ Suqadd(v18.V4H(), v3.V4H()); 18426 __ Suqadd(v19.V8H(), v3.V8H()); 18427 __ Suqadd(v20.V2S(), v5.V2S()); 18428 __ Suqadd(v21.V4S(), v5.V4S()); 18429 __ Suqadd(v22.V2D(), v7.V2D()); 18430 18431 __ Suqadd(b23, b1); 18432 __ Suqadd(h24, h3); 18433 __ Suqadd(s25, s5); 18434 __ Suqadd(d26, d7); 18435 18436 END(); 18437 18438 RUN(); 18439 ASSERT_EQUAL_128(0x0000000000000000, 0x81817f7f7f7f007f, q16); 18440 ASSERT_EQUAL_128(0x7f7f7f7f7f807f7f, 0x81817f7f7f7f007f, q17); 18441 ASSERT_EQUAL_128(0x0000000000000000, 0x00007fff7fff7ffe, q18); 18442 ASSERT_EQUAL_128(0x7fff80017fff7fff, 0x00007fff7fff7ffe, q19); 18443 ASSERT_EQUAL_128(0x0000000000000000, 0x7ffffff07fffffff, q20); 18444 ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x7ffffff07fffffff, q21); 18445 ASSERT_EQUAL_128(0x0000000000000001, 0x7fffffffffffffff, q22); 18446 18447 ASSERT_EQUAL_128(0, 0x7f, q23); 18448 ASSERT_EQUAL_128(0, 0x7ffe, q24); 18449 ASSERT_EQUAL_128(0, 0x7fffffff, q25); 18450 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26); 18451 TEARDOWN(); 18452 } 18453 18454 TEST(neon_2regmisc_usqadd) { 18455 SETUP(); 18456 18457 START(); 18458 18459 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f7ffe); 18460 __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f018002); 18461 18462 __ Movi(v2.V2D(), 0x80008001fffe0000, 0xffff000000017ffd); 18463 __ Movi(v3.V2D(), 0xffff000000028001, 0x00017fffffff0001); 18464 18465 __ Movi(v4.V2D(), 0x80000000fffffffe, 0x00000001fffffffe); 18466 __ Movi(v5.V2D(), 0xffffffff80000000, 0xfffffffe00000002); 18467 18468 __ Movi(v6.V2D(), 0x8000000000000002, 0x7fffffffffffffff); 18469 __ Movi(v7.V2D(), 0x7fffffffffffffff, 0x8000000000000000); 18470 18471 __ Mov(v16.V2D(), v0.V2D()); 18472 __ Mov(v17.V2D(), v0.V2D()); 18473 __ Mov(v18.V2D(), v2.V2D()); 18474 __ Mov(v19.V2D(), v2.V2D()); 18475 __ Mov(v20.V2D(), v4.V2D()); 18476 __ Mov(v21.V2D(), v4.V2D()); 18477 __ Mov(v22.V2D(), v6.V2D()); 18478 18479 __ Mov(v23.V2D(), v0.V2D()); 18480 __ Mov(v24.V2D(), v2.V2D()); 18481 __ Mov(v25.V2D(), v4.V2D()); 18482 __ Mov(v26.V2D(), v6.V2D()); 18483 18484 __ Usqadd(v16.V8B(), v1.V8B()); 18485 __ Usqadd(v17.V16B(), v1.V16B()); 18486 __ Usqadd(v18.V4H(), v3.V4H()); 18487 __ Usqadd(v19.V8H(), v3.V8H()); 18488 __ Usqadd(v20.V2S(), v5.V2S()); 18489 __ Usqadd(v21.V4S(), v5.V4S()); 18490 __ Usqadd(v22.V2D(), v7.V2D()); 18491 18492 __ Usqadd(b23, b1); 18493 __ Usqadd(h24, h3); 18494 __ Usqadd(s25, s5); 18495 __ Usqadd(d26, d7); 18496 18497 END(); 18498 18499 RUN(); 18500 ASSERT_EQUAL_128(0x0000000000000000, 0x81817f00808000ff, q16); 18501 ASSERT_EQUAL_128(0x8080008080808080, 0x81817f00808000ff, q17); 18502 ASSERT_EQUAL_128(0x0000000000000000, 0xffff7fff00007ffe, q18); 18503 ASSERT_EQUAL_128(0x7fff8001ffff0000, 0xffff7fff00007ffe, q19); 18504 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q20); 18505 ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x00000000ffffffff, q21); 18506 ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q22); 18507 18508 ASSERT_EQUAL_128(0, 0xff, q23); 18509 ASSERT_EQUAL_128(0, 0x7ffe, q24); 18510 ASSERT_EQUAL_128(0, 0xffffffff, q25); 18511 ASSERT_EQUAL_128(0, 0x0000000000000000, q26); 18512 TEARDOWN(); 18513 } 18514 18515 18516 TEST(system_sys) { 18517 SETUP(); 18518 const char* msg = "SYS test!"; 18519 uintptr_t msg_addr = reinterpret_cast<uintptr_t>(msg); 18520 18521 START(); 18522 __ Mov(x4, msg_addr); 18523 __ Sys(3, 0x7, 0x5, 1, x4); 18524 __ Mov(x3, x4); 18525 __ Sys(3, 0x7, 0xa, 1, x3); 18526 __ Mov(x2, x3); 18527 __ Sys(3, 0x7, 0xb, 1, x2); 18528 __ Mov(x1, x2); 18529 __ Sys(3, 0x7, 0xe, 1, x1); 18530 // TODO: Add tests to check ZVA equivalent. 18531 END(); 18532 18533 RUN(); 18534 18535 TEARDOWN(); 18536 } 18537 18538 18539 TEST(system_ic) { 18540 SETUP(); 18541 const char* msg = "IC test!"; 18542 uintptr_t msg_addr = reinterpret_cast<uintptr_t>(msg); 18543 18544 START(); 18545 __ Mov(x11, msg_addr); 18546 __ Ic(IVAU, x11); 18547 END(); 18548 18549 RUN(); 18550 18551 TEARDOWN(); 18552 } 18553 18554 18555 TEST(system_dc) { 18556 SETUP(); 18557 const char* msg = "DC test!"; 18558 uintptr_t msg_addr = reinterpret_cast<uintptr_t>(msg); 18559 18560 START(); 18561 __ Mov(x20, msg_addr); 18562 __ Dc(CVAC, x20); 18563 __ Mov(x21, x20); 18564 __ Dc(CVAU, x21); 18565 __ Mov(x22, x21); 18566 __ Dc(CIVAC, x22); 18567 // TODO: Add tests to check ZVA. 18568 END(); 18569 18570 RUN(); 18571 18572 TEARDOWN(); 18573 } 18574 18575 18576 TEST(neon_2regmisc_xtn) { 18577 SETUP(); 18578 18579 START(); 18580 18581 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 18582 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 18583 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 18584 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 18585 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 18586 18587 __ Xtn(v16.V8B(), v0.V8H()); 18588 __ Xtn2(v16.V16B(), v1.V8H()); 18589 __ Xtn(v17.V4H(), v1.V4S()); 18590 __ Xtn2(v17.V8H(), v2.V4S()); 18591 __ Xtn(v18.V2S(), v3.V2D()); 18592 __ Xtn2(v18.V4S(), v4.V2D()); 18593 18594 END(); 18595 18596 RUN(); 18597 ASSERT_EQUAL_128(0x0001ff00ff0001ff, 0x01ff800181007f81, q16); 18598 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8001000000007fff, q17); 18599 ASSERT_EQUAL_128(0x0000000000000000, 0x00000001ffffffff, q18); 18600 TEARDOWN(); 18601 } 18602 18603 18604 TEST(neon_2regmisc_sqxtn) { 18605 SETUP(); 18606 18607 START(); 18608 18609 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081); 18610 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 18611 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 18612 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 18613 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 18614 18615 __ Sqxtn(v16.V8B(), v0.V8H()); 18616 __ Sqxtn2(v16.V16B(), v1.V8H()); 18617 __ Sqxtn(v17.V4H(), v1.V4S()); 18618 __ Sqxtn2(v17.V8H(), v2.V4S()); 18619 __ Sqxtn(v18.V2S(), v3.V2D()); 18620 __ Sqxtn2(v18.V4S(), v4.V2D()); 18621 __ Sqxtn(b19, h0); 18622 __ Sqxtn(h20, s0); 18623 __ Sqxtn(s21, d0); 18624 18625 END(); 18626 18627 RUN(); 18628 ASSERT_EQUAL_128(0x8080ff00ff00017f, 0x7f7a807f80807f80, q16); 18629 ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000800080007fff, q17); 18630 ASSERT_EQUAL_128(0x8000000000000000, 0x800000007fffffff, q18); 18631 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19); 18632 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000007fff, q20); 18633 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21); 18634 TEARDOWN(); 18635 } 18636 18637 18638 TEST(neon_2regmisc_uqxtn) { 18639 SETUP(); 18640 18641 START(); 18642 18643 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081); 18644 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 18645 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 18646 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 18647 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 18648 18649 __ Uqxtn(v16.V8B(), v0.V8H()); 18650 __ Uqxtn2(v16.V16B(), v1.V8H()); 18651 __ Uqxtn(v17.V4H(), v1.V4S()); 18652 __ Uqxtn2(v17.V8H(), v2.V4S()); 18653 __ Uqxtn(v18.V2S(), v3.V2D()); 18654 __ Uqxtn2(v18.V4S(), v4.V2D()); 18655 __ Uqxtn(b19, h0); 18656 __ Uqxtn(h20, s0); 18657 __ Uqxtn(s21, d0); 18658 18659 END(); 18660 18661 RUN(); 18662 ASSERT_EQUAL_128(0xffffff00ff0001ff, 0xff7affffffffffff, q16); 18663 ASSERT_EQUAL_128(0xffffffff0000ffff, 0xffffffffffffffff, q17); 18664 ASSERT_EQUAL_128(0xffffffff00000000, 0xffffffffffffffff, q18); 18665 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000ff, q19); 18666 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20); 18667 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q21); 18668 TEARDOWN(); 18669 } 18670 18671 18672 TEST(neon_2regmisc_sqxtun) { 18673 SETUP(); 18674 18675 START(); 18676 18677 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081); 18678 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 18679 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 18680 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 18681 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 18682 18683 __ Sqxtun(v16.V8B(), v0.V8H()); 18684 __ Sqxtun2(v16.V16B(), v1.V8H()); 18685 __ Sqxtun(v17.V4H(), v1.V4S()); 18686 __ Sqxtun2(v17.V8H(), v2.V4S()); 18687 __ Sqxtun(v18.V2S(), v3.V2D()); 18688 __ Sqxtun2(v18.V4S(), v4.V2D()); 18689 __ Sqxtun(b19, h0); 18690 __ Sqxtun(h20, s0); 18691 __ Sqxtun(s21, d0); 18692 18693 END(); 18694 18695 RUN(); 18696 ASSERT_EQUAL_128(0x00000000000001ff, 0xff7a00ff0000ff00, q16); 18697 ASSERT_EQUAL_128(0x000000000000ffff, 0x000000000000ffff, q17); 18698 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18); 18699 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19); 18700 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20); 18701 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q21); 18702 TEARDOWN(); 18703 } 18704 18705 TEST(neon_3same_and) { 18706 SETUP(); 18707 18708 START(); 18709 18710 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 18711 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff); 18712 18713 __ And(v16.V16B(), v0.V16B(), v0.V16B()); // self test 18714 __ And(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations 18715 __ And(v24.V8B(), v0.V8B(), v0.V8B()); // self test 18716 __ And(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations 18717 END(); 18718 18719 RUN(); 18720 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16); 18721 ASSERT_EQUAL_128(0x0000000000555500, 0xaa00aa00005500aa, q17); 18722 ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24); 18723 ASSERT_EQUAL_128(0, 0xaa00aa00005500aa, q25); 18724 TEARDOWN(); 18725 } 18726 18727 TEST(neon_3same_bic) { 18728 SETUP(); 18729 18730 START(); 18731 18732 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 18733 __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff); 18734 18735 __ Bic(v16.V16B(), v0.V16B(), v0.V16B()); // self test 18736 __ Bic(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations 18737 __ Bic(v24.V8B(), v0.V8B(), v0.V8B()); // self test 18738 __ Bic(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations 18739 END(); 18740 18741 RUN(); 18742 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16); 18743 ASSERT_EQUAL_128(0xff00005500aa5500, 0x0000aa0000005500, q17); 18744 ASSERT_EQUAL_128(0, 0x0000000000000000, q24); 18745 ASSERT_EQUAL_128(0, 0x0000aa0000005500, q25); 18746 TEARDOWN(); 18747 } 18748 18749 TEST(neon_3same_orr) { 18750 SETUP(); 18751 18752 START(); 18753 18754 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 18755 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff); 18756 18757 __ Orr(v16.V16B(), v0.V16B(), v0.V16B()); // self test 18758 __ Orr(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations 18759 __ Orr(v24.V8B(), v0.V8B(), v0.V8B()); // self test 18760 __ Orr(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations 18761 END(); 18762 18763 RUN(); 18764 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16); 18765 ASSERT_EQUAL_128(0xffaaffffffffffaa, 0xff55ff5555ff55ff, q17); 18766 ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24); 18767 ASSERT_EQUAL_128(0, 0xff55ff5555ff55ff, q25); 18768 TEARDOWN(); 18769 } 18770 18771 TEST(neon_3same_mov) { 18772 SETUP(); 18773 18774 START(); 18775 18776 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 18777 18778 __ Mov(v16.V16B(), v0.V16B()); 18779 __ Mov(v17.V8H(), v0.V8H()); 18780 __ Mov(v18.V4S(), v0.V4S()); 18781 __ Mov(v19.V2D(), v0.V2D()); 18782 18783 __ Mov(v24.V8B(), v0.V8B()); 18784 __ Mov(v25.V4H(), v0.V4H()); 18785 __ Mov(v26.V2S(), v0.V2S()); 18786 END(); 18787 18788 RUN(); 18789 18790 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16); 18791 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17); 18792 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q18); 18793 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q19); 18794 18795 ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q24); 18796 ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q25); 18797 ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q26); 18798 18799 TEARDOWN(); 18800 } 18801 18802 TEST(neon_3same_orn) { 18803 SETUP(); 18804 18805 START(); 18806 18807 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 18808 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff); 18809 18810 __ Orn(v16.V16B(), v0.V16B(), v0.V16B()); // self test 18811 __ Orn(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations 18812 __ Orn(v24.V8B(), v0.V8B(), v0.V8B()); // self test 18813 __ Orn(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations 18814 END(); 18815 18816 RUN(); 18817 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16); 18818 ASSERT_EQUAL_128(0xff55aa5500ff55ff, 0xffaaaaffaaffffaa, q17); 18819 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q24); 18820 ASSERT_EQUAL_128(0, 0xffaaaaffaaffffaa, q25); 18821 TEARDOWN(); 18822 } 18823 18824 TEST(neon_3same_eor) { 18825 SETUP(); 18826 18827 START(); 18828 18829 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 18830 __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff); 18831 18832 __ Eor(v16.V16B(), v0.V16B(), v0.V16B()); // self test 18833 __ Eor(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations 18834 __ Eor(v24.V8B(), v0.V8B(), v0.V8B()); // self test 18835 __ Eor(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations 18836 END(); 18837 18838 RUN(); 18839 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16); 18840 ASSERT_EQUAL_128(0xffff0055aaaaff55, 0x00ffaa0000005555, q17); 18841 ASSERT_EQUAL_128(0, 0x0000000000000000, q24); 18842 ASSERT_EQUAL_128(0, 0x00ffaa0000005555, q25); 18843 TEARDOWN(); 18844 } 18845 18846 TEST(neon_3same_bif) { 18847 SETUP(); 18848 18849 START(); 18850 18851 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa); 18852 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa); 18853 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa); 18854 18855 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0); 18856 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00); 18857 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00); 18858 18859 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f); 18860 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0); 18861 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00); 18862 18863 __ Bif(v16.V16B(), v0.V16B(), v1.V16B()); 18864 __ Bif(v17.V16B(), v2.V16B(), v3.V16B()); 18865 __ Bif(v18.V8B(), v4.V8B(), v5.V8B()); 18866 END(); 18867 18868 RUN(); 18869 18870 ASSERT_EQUAL_128(0xffffff00ff0055ff, 0xffaa0055aa00aaaa, q16); 18871 ASSERT_EQUAL_128(0x5555ffffffcccc00, 0xff333300fff0f000, q17); 18872 ASSERT_EQUAL_128(0, 0xf0f0f0f0f00f0ff0, q18); 18873 TEARDOWN(); 18874 } 18875 18876 TEST(neon_3same_bit) { 18877 SETUP(); 18878 18879 START(); 18880 18881 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa); 18882 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa); 18883 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa); 18884 18885 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0); 18886 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00); 18887 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00); 18888 18889 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f); 18890 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0); 18891 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00); 18892 18893 __ Bit(v16.V16B(), v0.V16B(), v1.V16B()); 18894 __ Bit(v17.V16B(), v2.V16B(), v3.V16B()); 18895 __ Bit(v18.V8B(), v4.V8B(), v5.V8B()); 18896 END(); 18897 18898 RUN(); 18899 18900 ASSERT_EQUAL_128(0xff000000ff00ff55, 0xaaff550000aaaaaa, q16); 18901 ASSERT_EQUAL_128(0x55550000cc00ffcc, 0x3300ff33f000fff0, q17); 18902 ASSERT_EQUAL_128(0, 0xf0f0f0f00ff0f00f, q18); 18903 TEARDOWN(); 18904 } 18905 18906 TEST(neon_3same_bsl) { 18907 SETUP(); 18908 18909 START(); 18910 18911 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa); 18912 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa); 18913 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa); 18914 18915 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0); 18916 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00); 18917 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00); 18918 18919 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f); 18920 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0); 18921 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00); 18922 18923 __ Bsl(v16.V16B(), v0.V16B(), v1.V16B()); 18924 __ Bsl(v17.V16B(), v2.V16B(), v3.V16B()); 18925 __ Bsl(v18.V8B(), v4.V8B(), v5.V8B()); 18926 END(); 18927 18928 RUN(); 18929 18930 ASSERT_EQUAL_128(0xff0000ffff005555, 0xaaaa55aa55aaffaa, q16); 18931 ASSERT_EQUAL_128(0xff550000cc33ff00, 0x33ccff00f00fff00, q17); 18932 ASSERT_EQUAL_128(0, 0xf0fffff000f0f000, q18); 18933 TEARDOWN(); 18934 } 18935 18936 18937 TEST(neon_3same_smax) { 18938 SETUP(); 18939 18940 START(); 18941 18942 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55); 18943 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff); 18944 18945 __ Smax(v16.V8B(), v0.V8B(), v1.V8B()); 18946 __ Smax(v18.V4H(), v0.V4H(), v1.V4H()); 18947 __ Smax(v20.V2S(), v0.V2S(), v1.V2S()); 18948 18949 __ Smax(v17.V16B(), v0.V16B(), v1.V16B()); 18950 __ Smax(v19.V8H(), v0.V8H(), v1.V8H()); 18951 __ Smax(v21.V4S(), v0.V4S(), v1.V4S()); 18952 END(); 18953 18954 RUN(); 18955 18956 ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16); 18957 ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18); 18958 ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20); 18959 ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17); 18960 ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19); 18961 ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21); 18962 TEARDOWN(); 18963 } 18964 18965 18966 TEST(neon_3same_smaxp) { 18967 SETUP(); 18968 18969 START(); 18970 18971 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55); 18972 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff); 18973 18974 __ Smaxp(v16.V8B(), v0.V8B(), v1.V8B()); 18975 __ Smaxp(v18.V4H(), v0.V4H(), v1.V4H()); 18976 __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S()); 18977 18978 __ Smaxp(v17.V16B(), v0.V16B(), v1.V16B()); 18979 __ Smaxp(v19.V8H(), v0.V8H(), v1.V8H()); 18980 __ Smaxp(v21.V4S(), v0.V4S(), v1.V4S()); 18981 END(); 18982 18983 RUN(); 18984 18985 ASSERT_EQUAL_128(0x0, 0x0000ff55ffff0055, q16); 18986 ASSERT_EQUAL_128(0x0, 0x000055ffffff0000, q18); 18987 ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20); 18988 ASSERT_EQUAL_128(0x5555aaaa0000ff55, 0xaaaa5555ffff0055, q17); 18989 ASSERT_EQUAL_128(0x55aaaaaa000055ff, 0xaaaa5555ffff0000, q19); 18990 ASSERT_EQUAL_128(0x55aa555500000000, 0x555555550000aa55, q21); 18991 TEARDOWN(); 18992 } 18993 18994 18995 TEST(neon_addp_scalar) { 18996 SETUP(); 18997 18998 START(); 18999 19000 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00); 19001 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff); 19002 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00); 19003 19004 __ Addp(d16, v0.V2D()); 19005 __ Addp(d17, v1.V2D()); 19006 __ Addp(d18, v2.V2D()); 19007 19008 END(); 19009 19010 RUN(); 19011 19012 ASSERT_EQUAL_128(0x0, 0x00224466ef66fa80, q16); 19013 ASSERT_EQUAL_128(0x0, 0x55aa5556aa5500a9, q17); 19014 ASSERT_EQUAL_128(0x0, 0xaaaaaaa96655ff55, q18); 19015 TEARDOWN(); 19016 } 19017 19018 TEST(neon_acrosslanes_addv) { 19019 SETUP(); 19020 19021 START(); 19022 19023 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00); 19024 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff); 19025 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00); 19026 19027 __ Addv(b16, v0.V8B()); 19028 __ Addv(b17, v0.V16B()); 19029 __ Addv(h18, v1.V4H()); 19030 __ Addv(h19, v1.V8H()); 19031 __ Addv(s20, v2.V4S()); 19032 19033 END(); 19034 19035 RUN(); 19036 19037 ASSERT_EQUAL_128(0x0, 0xc7, q16); 19038 ASSERT_EQUAL_128(0x0, 0x99, q17); 19039 ASSERT_EQUAL_128(0x0, 0x55a9, q18); 19040 ASSERT_EQUAL_128(0x0, 0x55fc, q19); 19041 ASSERT_EQUAL_128(0x0, 0x1100a9fe, q20); 19042 TEARDOWN(); 19043 } 19044 19045 19046 TEST(neon_acrosslanes_saddlv) { 19047 SETUP(); 19048 19049 START(); 19050 19051 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00); 19052 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff); 19053 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00); 19054 19055 __ Saddlv(h16, v0.V8B()); 19056 __ Saddlv(h17, v0.V16B()); 19057 __ Saddlv(s18, v1.V4H()); 19058 __ Saddlv(s19, v1.V8H()); 19059 __ Saddlv(d20, v2.V4S()); 19060 19061 END(); 19062 19063 RUN(); 19064 19065 ASSERT_EQUAL_128(0x0, 0xffc7, q16); 19066 ASSERT_EQUAL_128(0x0, 0xff99, q17); 19067 ASSERT_EQUAL_128(0x0, 0x000055a9, q18); 19068 ASSERT_EQUAL_128(0x0, 0x000055fc, q19); 19069 ASSERT_EQUAL_128(0x0, 0x0000001100a9fe, q20); 19070 TEARDOWN(); 19071 } 19072 19073 19074 TEST(neon_acrosslanes_uaddlv) { 19075 SETUP(); 19076 19077 START(); 19078 19079 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00); 19080 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff); 19081 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00); 19082 19083 __ Uaddlv(h16, v0.V8B()); 19084 __ Uaddlv(h17, v0.V16B()); 19085 __ Uaddlv(s18, v1.V4H()); 19086 __ Uaddlv(s19, v1.V8H()); 19087 __ Uaddlv(d20, v2.V4S()); 19088 19089 END(); 19090 19091 RUN(); 19092 19093 ASSERT_EQUAL_128(0x0, 0x02c7, q16); 19094 ASSERT_EQUAL_128(0x0, 0x0599, q17); 19095 ASSERT_EQUAL_128(0x0, 0x000155a9, q18); 19096 ASSERT_EQUAL_128(0x0, 0x000355fc, q19); 19097 ASSERT_EQUAL_128(0x0, 0x000000021100a9fe, q20); 19098 TEARDOWN(); 19099 } 19100 19101 19102 TEST(neon_acrosslanes_smaxv) { 19103 SETUP(); 19104 19105 START(); 19106 19107 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00); 19108 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff); 19109 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00); 19110 19111 __ Smaxv(b16, v0.V8B()); 19112 __ Smaxv(b17, v0.V16B()); 19113 __ Smaxv(h18, v1.V4H()); 19114 __ Smaxv(h19, v1.V8H()); 19115 __ Smaxv(s20, v2.V4S()); 19116 19117 END(); 19118 19119 RUN(); 19120 19121 ASSERT_EQUAL_128(0x0, 0x33, q16); 19122 ASSERT_EQUAL_128(0x0, 0x44, q17); 19123 ASSERT_EQUAL_128(0x0, 0x55ff, q18); 19124 ASSERT_EQUAL_128(0x0, 0x55ff, q19); 19125 ASSERT_EQUAL_128(0x0, 0x66555555, q20); 19126 TEARDOWN(); 19127 } 19128 19129 19130 TEST(neon_acrosslanes_sminv) { 19131 SETUP(); 19132 19133 START(); 19134 19135 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00); 19136 __ Movi(v1.V2D(), 0xfffa5555aaaaaaaa, 0x00000000ffaa55ff); 19137 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00); 19138 19139 __ Sminv(b16, v0.V8B()); 19140 __ Sminv(b17, v0.V16B()); 19141 __ Sminv(h18, v1.V4H()); 19142 __ Sminv(h19, v1.V8H()); 19143 __ Sminv(s20, v2.V4S()); 19144 19145 END(); 19146 19147 RUN(); 19148 19149 ASSERT_EQUAL_128(0x0, 0xaa, q16); 19150 ASSERT_EQUAL_128(0x0, 0x80, q17); 19151 ASSERT_EQUAL_128(0x0, 0xffaa, q18); 19152 ASSERT_EQUAL_128(0x0, 0xaaaa, q19); 19153 ASSERT_EQUAL_128(0x0, 0xaaaaaaaa, q20); 19154 TEARDOWN(); 19155 } 19156 19157 TEST(neon_acrosslanes_umaxv) { 19158 SETUP(); 19159 19160 START(); 19161 19162 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00); 19163 __ Movi(v1.V2D(), 0x55aa5555aaaaffab, 0x00000000ffaa55ff); 19164 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00); 19165 19166 __ Umaxv(b16, v0.V8B()); 19167 __ Umaxv(b17, v0.V16B()); 19168 __ Umaxv(h18, v1.V4H()); 19169 __ Umaxv(h19, v1.V8H()); 19170 __ Umaxv(s20, v2.V4S()); 19171 19172 END(); 19173 19174 RUN(); 19175 19176 ASSERT_EQUAL_128(0x0, 0xfc, q16); 19177 ASSERT_EQUAL_128(0x0, 0xfe, q17); 19178 ASSERT_EQUAL_128(0x0, 0xffaa, q18); 19179 ASSERT_EQUAL_128(0x0, 0xffab, q19); 19180 ASSERT_EQUAL_128(0x0, 0xffffffff, q20); 19181 TEARDOWN(); 19182 } 19183 19184 19185 TEST(neon_acrosslanes_uminv) { 19186 SETUP(); 19187 19188 START(); 19189 19190 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x02112233aabbfc01); 19191 __ Movi(v1.V2D(), 0xfffa5555aaaa0000, 0x00010003ffaa55ff); 19192 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00); 19193 19194 __ Uminv(b16, v0.V8B()); 19195 __ Uminv(b17, v0.V16B()); 19196 __ Uminv(h18, v1.V4H()); 19197 __ Uminv(h19, v1.V8H()); 19198 __ Uminv(s20, v2.V4S()); 19199 19200 END(); 19201 19202 RUN(); 19203 19204 ASSERT_EQUAL_128(0x0, 0x01, q16); 19205 ASSERT_EQUAL_128(0x0, 0x00, q17); 19206 ASSERT_EQUAL_128(0x0, 0x0001, q18); 19207 ASSERT_EQUAL_128(0x0, 0x0000, q19); 19208 ASSERT_EQUAL_128(0x0, 0x0000aa00, q20); 19209 TEARDOWN(); 19210 } 19211 19212 19213 TEST(neon_3same_smin) { 19214 SETUP(); 19215 19216 START(); 19217 19218 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55); 19219 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff); 19220 19221 __ Smin(v16.V8B(), v0.V8B(), v1.V8B()); 19222 __ Smin(v18.V4H(), v0.V4H(), v1.V4H()); 19223 __ Smin(v20.V2S(), v0.V2S(), v1.V2S()); 19224 19225 __ Smin(v17.V16B(), v0.V16B(), v1.V16B()); 19226 __ Smin(v19.V8H(), v0.V8H(), v1.V8H()); 19227 __ Smin(v21.V4S(), v0.V4S(), v1.V4S()); 19228 END(); 19229 19230 RUN(); 19231 19232 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16); 19233 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18); 19234 ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20); 19235 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17); 19236 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19); 19237 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21); 19238 TEARDOWN(); 19239 } 19240 19241 19242 TEST(neon_3same_umax) { 19243 SETUP(); 19244 19245 START(); 19246 19247 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55); 19248 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff); 19249 19250 __ Umax(v16.V8B(), v0.V8B(), v1.V8B()); 19251 __ Umax(v18.V4H(), v0.V4H(), v1.V4H()); 19252 __ Umax(v20.V2S(), v0.V2S(), v1.V2S()); 19253 19254 __ Umax(v17.V16B(), v0.V16B(), v1.V16B()); 19255 __ Umax(v19.V8H(), v0.V8H(), v1.V8H()); 19256 __ Umax(v21.V4S(), v0.V4S(), v1.V4S()); 19257 END(); 19258 19259 RUN(); 19260 19261 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16); 19262 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18); 19263 ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20); 19264 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17); 19265 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19); 19266 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21); 19267 TEARDOWN(); 19268 } 19269 19270 19271 TEST(neon_3same_umin) { 19272 SETUP(); 19273 19274 START(); 19275 19276 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55); 19277 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff); 19278 19279 __ Umin(v16.V8B(), v0.V8B(), v1.V8B()); 19280 __ Umin(v18.V4H(), v0.V4H(), v1.V4H()); 19281 __ Umin(v20.V2S(), v0.V2S(), v1.V2S()); 19282 19283 __ Umin(v17.V16B(), v0.V16B(), v1.V16B()); 19284 __ Umin(v19.V8H(), v0.V8H(), v1.V8H()); 19285 __ Umin(v21.V4S(), v0.V4S(), v1.V4S()); 19286 END(); 19287 19288 RUN(); 19289 19290 ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16); 19291 ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18); 19292 ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20); 19293 ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17); 19294 ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19); 19295 ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21); 19296 TEARDOWN(); 19297 } 19298 19299 19300 TEST(neon_2regmisc_mvn) { 19301 SETUP(); 19302 19303 START(); 19304 19305 __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa); 19306 19307 __ Mvn(v16.V16B(), v0.V16B()); 19308 __ Mvn(v17.V8H(), v0.V8H()); 19309 __ Mvn(v18.V4S(), v0.V4S()); 19310 __ Mvn(v19.V2D(), v0.V2D()); 19311 19312 __ Mvn(v24.V8B(), v0.V8B()); 19313 __ Mvn(v25.V4H(), v0.V4H()); 19314 __ Mvn(v26.V2S(), v0.V2S()); 19315 19316 END(); 19317 19318 RUN(); 19319 19320 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16); 19321 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q17); 19322 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q18); 19323 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q19); 19324 19325 ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q24); 19326 ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q25); 19327 ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q26); 19328 TEARDOWN(); 19329 } 19330 19331 19332 TEST(neon_2regmisc_not) { 19333 SETUP(); 19334 19335 START(); 19336 19337 __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa); 19338 __ Movi(v1.V2D(), 0, 0x00ffff0000ffff00); 19339 19340 __ Not(v16.V16B(), v0.V16B()); 19341 __ Not(v17.V8B(), v1.V8B()); 19342 END(); 19343 19344 RUN(); 19345 19346 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16); 19347 ASSERT_EQUAL_128(0x0, 0xff0000ffff0000ff, q17); 19348 TEARDOWN(); 19349 } 19350 19351 TEST(neon_2regmisc_cls_clz_cnt) { 19352 SETUP(); 19353 19354 START(); 19355 19356 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f); 19357 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef); 19358 19359 __ Cls(v16.V8B(), v1.V8B()); 19360 __ Cls(v17.V16B(), v1.V16B()); 19361 __ Cls(v18.V4H(), v1.V4H()); 19362 __ Cls(v19.V8H(), v1.V8H()); 19363 __ Cls(v20.V2S(), v1.V2S()); 19364 __ Cls(v21.V4S(), v1.V4S()); 19365 19366 __ Clz(v22.V8B(), v0.V8B()); 19367 __ Clz(v23.V16B(), v0.V16B()); 19368 __ Clz(v24.V4H(), v0.V4H()); 19369 __ Clz(v25.V8H(), v0.V8H()); 19370 __ Clz(v26.V2S(), v0.V2S()); 19371 __ Clz(v27.V4S(), v0.V4S()); 19372 19373 __ Cnt(v28.V8B(), v0.V8B()); 19374 __ Cnt(v29.V16B(), v1.V16B()); 19375 19376 END(); 19377 19378 RUN(); 19379 19380 ASSERT_EQUAL_128(0x0000000000000000, 0x0601000000000102, q16); 19381 ASSERT_EQUAL_128(0x0601000000000102, 0x0601000000000102, q17); 19382 ASSERT_EQUAL_128(0x0000000000000000, 0x0006000000000001, q18); 19383 ASSERT_EQUAL_128(0x0006000000000001, 0x0006000000000001, q19); 19384 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000600000000, q20); 19385 ASSERT_EQUAL_128(0x0000000600000000, 0x0000000600000000, q21); 19386 19387 ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q22); 19388 ASSERT_EQUAL_128(0x0807060605050505, 0x0404040404040404, q23); 19389 ASSERT_EQUAL_128(0x0000000000000000, 0x0004000400040004, q24); 19390 ASSERT_EQUAL_128(0x000f000600050005, 0x0004000400040004, q25); 19391 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000400000004, q26); 19392 ASSERT_EQUAL_128(0x0000000f00000005, 0x0000000400000004, q27); 19393 19394 ASSERT_EQUAL_128(0x0000000000000000, 0x0102020302030304, q28); 19395 ASSERT_EQUAL_128(0x0705050305030301, 0x0103030503050507, q29); 19396 19397 TEARDOWN(); 19398 } 19399 19400 TEST(neon_2regmisc_rev) { 19401 SETUP(); 19402 19403 START(); 19404 19405 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f); 19406 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef); 19407 19408 __ Rev16(v16.V8B(), v0.V8B()); 19409 __ Rev16(v17.V16B(), v0.V16B()); 19410 19411 __ Rev32(v18.V8B(), v0.V8B()); 19412 __ Rev32(v19.V16B(), v0.V16B()); 19413 __ Rev32(v20.V4H(), v0.V4H()); 19414 __ Rev32(v21.V8H(), v0.V8H()); 19415 19416 __ Rev64(v22.V8B(), v0.V8B()); 19417 __ Rev64(v23.V16B(), v0.V16B()); 19418 __ Rev64(v24.V4H(), v0.V4H()); 19419 __ Rev64(v25.V8H(), v0.V8H()); 19420 __ Rev64(v26.V2S(), v0.V2S()); 19421 __ Rev64(v27.V4S(), v0.V4S()); 19422 19423 __ Rbit(v28.V8B(), v1.V8B()); 19424 __ Rbit(v29.V16B(), v1.V16B()); 19425 19426 END(); 19427 19428 RUN(); 19429 19430 ASSERT_EQUAL_128(0x0000000000000000, 0x09080b0a0d0c0f0e, q16); 19431 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q17); 19432 19433 ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a09080f0e0d0c, q18); 19434 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q19); 19435 ASSERT_EQUAL_128(0x0000000000000000, 0x0a0b08090e0f0c0d, q20); 19436 ASSERT_EQUAL_128(0x0203000106070405, 0x0a0b08090e0f0c0d, q21); 19437 19438 ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0b0a0908, q22); 19439 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q23); 19440 ASSERT_EQUAL_128(0x0000000000000000, 0x0e0f0c0d0a0b0809, q24); 19441 ASSERT_EQUAL_128(0x0607040502030001, 0x0e0f0c0d0a0b0809, q25); 19442 ASSERT_EQUAL_128(0x0000000000000000, 0x0c0d0e0f08090a0b, q26); 19443 ASSERT_EQUAL_128(0x0405060700010203, 0x0c0d0e0f08090a0b, q27); 19444 19445 ASSERT_EQUAL_128(0x0000000000000000, 0x80c4a2e691d5b3f7, q28); 19446 ASSERT_EQUAL_128(0x7f3b5d196e2a4c08, 0x80c4a2e691d5b3f7, q29); 19447 19448 TEARDOWN(); 19449 } 19450 19451 19452 TEST(neon_sli) { 19453 SETUP(); 19454 19455 START(); 19456 19457 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f); 19458 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef); 19459 19460 __ Mov(v16.V2D(), v0.V2D()); 19461 __ Mov(v17.V2D(), v0.V2D()); 19462 __ Mov(v18.V2D(), v0.V2D()); 19463 __ Mov(v19.V2D(), v0.V2D()); 19464 __ Mov(v20.V2D(), v0.V2D()); 19465 __ Mov(v21.V2D(), v0.V2D()); 19466 __ Mov(v22.V2D(), v0.V2D()); 19467 __ Mov(v23.V2D(), v0.V2D()); 19468 19469 __ Sli(v16.V8B(), v1.V8B(), 4); 19470 __ Sli(v17.V16B(), v1.V16B(), 7); 19471 __ Sli(v18.V4H(), v1.V4H(), 8); 19472 __ Sli(v19.V8H(), v1.V8H(), 15); 19473 __ Sli(v20.V2S(), v1.V2S(), 0); 19474 __ Sli(v21.V4S(), v1.V4S(), 31); 19475 __ Sli(v22.V2D(), v1.V2D(), 48); 19476 19477 __ Sli(d23, d1, 48); 19478 19479 END(); 19480 19481 RUN(); 19482 19483 ASSERT_EQUAL_128(0x0000000000000000, 0x18395a7b9cbddeff, q16); 19484 ASSERT_EQUAL_128(0x0001020304050607, 0x88898a8b8c8d8e8f, q17); 19485 ASSERT_EQUAL_128(0x0000000000000000, 0x2309670bab0def0f, q18); 19486 ASSERT_EQUAL_128(0x0001020304050607, 0x88098a0b8c0d8e0f, q19); 19487 ASSERT_EQUAL_128(0x0000000000000000, 0x0123456789abcdef, q20); 19488 ASSERT_EQUAL_128(0x0001020304050607, 0x88090a0b8c0d0e0f, q21); 19489 ASSERT_EQUAL_128(0x3210020304050607, 0xcdef0a0b0c0d0e0f, q22); 19490 19491 ASSERT_EQUAL_128(0x0000000000000000, 0xcdef0a0b0c0d0e0f, q23); 19492 19493 19494 TEARDOWN(); 19495 } 19496 19497 19498 TEST(neon_sri) { 19499 SETUP(); 19500 19501 START(); 19502 19503 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f); 19504 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef); 19505 19506 __ Mov(v16.V2D(), v0.V2D()); 19507 __ Mov(v17.V2D(), v0.V2D()); 19508 __ Mov(v18.V2D(), v0.V2D()); 19509 __ Mov(v19.V2D(), v0.V2D()); 19510 __ Mov(v20.V2D(), v0.V2D()); 19511 __ Mov(v21.V2D(), v0.V2D()); 19512 __ Mov(v22.V2D(), v0.V2D()); 19513 __ Mov(v23.V2D(), v0.V2D()); 19514 19515 __ Sri(v16.V8B(), v1.V8B(), 4); 19516 __ Sri(v17.V16B(), v1.V16B(), 7); 19517 __ Sri(v18.V4H(), v1.V4H(), 8); 19518 __ Sri(v19.V8H(), v1.V8H(), 15); 19519 __ Sri(v20.V2S(), v1.V2S(), 1); 19520 __ Sri(v21.V4S(), v1.V4S(), 31); 19521 __ Sri(v22.V2D(), v1.V2D(), 48); 19522 19523 __ Sri(d23, d1, 48); 19524 19525 END(); 19526 19527 RUN(); 19528 19529 ASSERT_EQUAL_128(0x0000000000000000, 0x00020406080a0c0e, q16); 19530 ASSERT_EQUAL_128(0x0101030304040606, 0x08080a0a0d0d0f0f, q17); 19531 ASSERT_EQUAL_128(0x0000000000000000, 0x08010a450c890ecd, q18); 19532 ASSERT_EQUAL_128(0x0001020304040606, 0x08080a0a0c0d0e0f, q19); 19533 ASSERT_EQUAL_128(0x0000000000000000, 0x0091a2b344d5e6f7, q20); 19534 ASSERT_EQUAL_128(0x0001020304050606, 0x08090a0a0c0d0e0f, q21); 19535 ASSERT_EQUAL_128(0x000102030405fedc, 0x08090a0b0c0d0123, q22); 19536 19537 ASSERT_EQUAL_128(0x0000000000000000, 0x08090a0b0c0d0123, q23); 19538 19539 19540 TEARDOWN(); 19541 } 19542 19543 19544 TEST(neon_shrn) { 19545 SETUP(); 19546 19547 START(); 19548 19549 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 19550 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 19551 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 19552 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 19553 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 19554 19555 __ Shrn(v16.V8B(), v0.V8H(), 8); 19556 __ Shrn2(v16.V16B(), v1.V8H(), 1); 19557 __ Shrn(v17.V4H(), v1.V4S(), 16); 19558 __ Shrn2(v17.V8H(), v2.V4S(), 1); 19559 __ Shrn(v18.V2S(), v3.V2D(), 32); 19560 __ Shrn2(v18.V4S(), v3.V2D(), 1); 19561 19562 END(); 19563 19564 RUN(); 19565 ASSERT_EQUAL_128(0x0000ff00ff0000ff, 0x7f00817f80ff0180, q16); 19566 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8000ffffffff0001, q17); 19567 ASSERT_EQUAL_128(0x00000000ffffffff, 0x800000007fffffff, q18); 19568 TEARDOWN(); 19569 } 19570 19571 19572 TEST(neon_rshrn) { 19573 SETUP(); 19574 19575 START(); 19576 19577 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 19578 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 19579 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 19580 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 19581 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 19582 19583 __ Rshrn(v16.V8B(), v0.V8H(), 8); 19584 __ Rshrn2(v16.V16B(), v1.V8H(), 1); 19585 __ Rshrn(v17.V4H(), v1.V4S(), 16); 19586 __ Rshrn2(v17.V8H(), v2.V4S(), 1); 19587 __ Rshrn(v18.V2S(), v3.V2D(), 32); 19588 __ Rshrn2(v18.V4S(), v3.V2D(), 1); 19589 19590 END(); 19591 19592 RUN(); 19593 ASSERT_EQUAL_128(0x0001000000000100, 0x7f01827f81ff0181, q16); 19594 ASSERT_EQUAL_128(0x0000000000000000, 0x8001ffffffff0001, q17); 19595 ASSERT_EQUAL_128(0x0000000100000000, 0x8000000080000000, q18); 19596 TEARDOWN(); 19597 } 19598 19599 19600 TEST(neon_uqshrn) { 19601 SETUP(); 19602 19603 START(); 19604 19605 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 19606 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 19607 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 19608 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 19609 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 19610 19611 __ Uqshrn(v16.V8B(), v0.V8H(), 8); 19612 __ Uqshrn2(v16.V16B(), v1.V8H(), 1); 19613 __ Uqshrn(v17.V4H(), v1.V4S(), 16); 19614 __ Uqshrn2(v17.V8H(), v2.V4S(), 1); 19615 __ Uqshrn(v18.V2S(), v3.V2D(), 32); 19616 __ Uqshrn2(v18.V4S(), v3.V2D(), 1); 19617 19618 __ Uqshrn(b19, h0, 8); 19619 __ Uqshrn(h20, s1, 16); 19620 __ Uqshrn(s21, d3, 32); 19621 19622 END(); 19623 19624 RUN(); 19625 ASSERT_EQUAL_128(0xffffff00ff0000ff, 0x7f00817f80ff0180, q16); 19626 ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8000ffffffff0001, q17); 19627 ASSERT_EQUAL_128(0xffffffffffffffff, 0x800000007fffffff, q18); 19628 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19); 19629 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20); 19630 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21); 19631 TEARDOWN(); 19632 } 19633 19634 19635 TEST(neon_uqrshrn) { 19636 SETUP(); 19637 19638 START(); 19639 19640 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 19641 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 19642 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 19643 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 19644 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 19645 19646 __ Uqrshrn(v16.V8B(), v0.V8H(), 8); 19647 __ Uqrshrn2(v16.V16B(), v1.V8H(), 1); 19648 __ Uqrshrn(v17.V4H(), v1.V4S(), 16); 19649 __ Uqrshrn2(v17.V8H(), v2.V4S(), 1); 19650 __ Uqrshrn(v18.V2S(), v3.V2D(), 32); 19651 __ Uqrshrn2(v18.V4S(), v3.V2D(), 1); 19652 19653 __ Uqrshrn(b19, h0, 8); 19654 __ Uqrshrn(h20, s1, 16); 19655 __ Uqrshrn(s21, d3, 32); 19656 19657 END(); 19658 19659 RUN(); 19660 ASSERT_EQUAL_128(0xffffff00ff0001ff, 0x7f01827f81ff0181, q16); 19661 ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8001ffffffff0001, q17); 19662 ASSERT_EQUAL_128(0xffffffffffffffff, 0x8000000080000000, q18); 19663 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19); 19664 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20); 19665 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21); 19666 TEARDOWN(); 19667 } 19668 19669 19670 TEST(neon_sqshrn) { 19671 SETUP(); 19672 19673 START(); 19674 19675 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 19676 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 19677 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 19678 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 19679 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 19680 19681 __ Sqshrn(v16.V8B(), v0.V8H(), 8); 19682 __ Sqshrn2(v16.V16B(), v1.V8H(), 1); 19683 __ Sqshrn(v17.V4H(), v1.V4S(), 16); 19684 __ Sqshrn2(v17.V8H(), v2.V4S(), 1); 19685 __ Sqshrn(v18.V2S(), v3.V2D(), 32); 19686 __ Sqshrn2(v18.V4S(), v3.V2D(), 1); 19687 19688 __ Sqshrn(b19, h0, 8); 19689 __ Sqshrn(h20, s1, 16); 19690 __ Sqshrn(s21, d3, 32); 19691 19692 END(); 19693 19694 RUN(); 19695 ASSERT_EQUAL_128(0x8080ff00ff00007f, 0x7f00817f80ff0180, q16); 19696 ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000ffffffff0001, q17); 19697 ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18); 19698 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19); 19699 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20); 19700 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21); 19701 TEARDOWN(); 19702 } 19703 19704 19705 TEST(neon_sqrshrn) { 19706 SETUP(); 19707 19708 START(); 19709 19710 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 19711 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 19712 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 19713 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 19714 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 19715 19716 __ Sqrshrn(v16.V8B(), v0.V8H(), 8); 19717 __ Sqrshrn2(v16.V16B(), v1.V8H(), 1); 19718 __ Sqrshrn(v17.V4H(), v1.V4S(), 16); 19719 __ Sqrshrn2(v17.V8H(), v2.V4S(), 1); 19720 __ Sqrshrn(v18.V2S(), v3.V2D(), 32); 19721 __ Sqrshrn2(v18.V4S(), v3.V2D(), 1); 19722 19723 __ Sqrshrn(b19, h0, 8); 19724 __ Sqrshrn(h20, s1, 16); 19725 __ Sqrshrn(s21, d3, 32); 19726 19727 END(); 19728 19729 RUN(); 19730 ASSERT_EQUAL_128(0x808000000000017f, 0x7f01827f81ff0181, q16); 19731 ASSERT_EQUAL_128(0x8000000000007fff, 0x8001ffffffff0001, q17); 19732 ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18); 19733 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19); 19734 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20); 19735 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21); 19736 TEARDOWN(); 19737 } 19738 19739 19740 TEST(neon_sqshrun) { 19741 SETUP(); 19742 19743 START(); 19744 19745 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 19746 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 19747 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 19748 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 19749 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 19750 19751 __ Sqshrun(v16.V8B(), v0.V8H(), 8); 19752 __ Sqshrun2(v16.V16B(), v1.V8H(), 1); 19753 __ Sqshrun(v17.V4H(), v1.V4S(), 16); 19754 __ Sqshrun2(v17.V8H(), v2.V4S(), 1); 19755 __ Sqshrun(v18.V2S(), v3.V2D(), 32); 19756 __ Sqshrun2(v18.V4S(), v3.V2D(), 1); 19757 19758 __ Sqshrun(b19, h0, 8); 19759 __ Sqshrun(h20, s1, 16); 19760 __ Sqshrun(s21, d3, 32); 19761 19762 END(); 19763 19764 RUN(); 19765 ASSERT_EQUAL_128(0x00000000000000ff, 0x7f00007f00000100, q16); 19766 ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17); 19767 ASSERT_EQUAL_128(0x00000000ffffffff, 0x000000007fffffff, q18); 19768 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19); 19769 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20); 19770 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21); 19771 TEARDOWN(); 19772 } 19773 19774 19775 TEST(neon_sqrshrun) { 19776 SETUP(); 19777 19778 START(); 19779 19780 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 19781 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 19782 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 19783 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 19784 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 19785 19786 __ Sqrshrun(v16.V8B(), v0.V8H(), 8); 19787 __ Sqrshrun2(v16.V16B(), v1.V8H(), 1); 19788 __ Sqrshrun(v17.V4H(), v1.V4S(), 16); 19789 __ Sqrshrun2(v17.V8H(), v2.V4S(), 1); 19790 __ Sqrshrun(v18.V2S(), v3.V2D(), 32); 19791 __ Sqrshrun2(v18.V4S(), v3.V2D(), 1); 19792 19793 __ Sqrshrun(b19, h0, 8); 19794 __ Sqrshrun(h20, s1, 16); 19795 __ Sqrshrun(s21, d3, 32); 19796 19797 END(); 19798 19799 RUN(); 19800 ASSERT_EQUAL_128(0x00000000000001ff, 0x7f01007f00000100, q16); 19801 ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17); 19802 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000080000000, q18); 19803 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19); 19804 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20); 19805 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21); 19806 TEARDOWN(); 19807 } 19808 19809 TEST(neon_modimm_bic) { 19810 SETUP(); 19811 19812 START(); 19813 19814 __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19815 __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19816 __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19817 __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19818 __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19819 __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19820 __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19821 __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19822 __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19823 __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19824 __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19825 __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19826 19827 __ Bic(v16.V4H(), 0x00, 0); 19828 __ Bic(v17.V4H(), 0xff, 8); 19829 __ Bic(v18.V8H(), 0x00, 0); 19830 __ Bic(v19.V8H(), 0xff, 8); 19831 19832 __ Bic(v20.V2S(), 0x00, 0); 19833 __ Bic(v21.V2S(), 0xff, 8); 19834 __ Bic(v22.V2S(), 0x00, 16); 19835 __ Bic(v23.V2S(), 0xff, 24); 19836 19837 __ Bic(v24.V4S(), 0xff, 0); 19838 __ Bic(v25.V4S(), 0x00, 8); 19839 __ Bic(v26.V4S(), 0xff, 16); 19840 __ Bic(v27.V4S(), 0x00, 24); 19841 19842 END(); 19843 19844 RUN(); 19845 19846 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16); 19847 ASSERT_EQUAL_128(0x0, 0x005500ff000000aa, q17); 19848 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18); 19849 ASSERT_EQUAL_128(0x00aa0055000000aa, 0x005500ff000000aa, q19); 19850 19851 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20); 19852 ASSERT_EQUAL_128(0x0, 0x555500ff000000aa, q21); 19853 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22); 19854 ASSERT_EQUAL_128(0x0, 0x0055ffff0000aaaa, q23); 19855 19856 ASSERT_EQUAL_128(0x00aaff00ff005500, 0x5555ff000000aa00, q24); 19857 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25); 19858 ASSERT_EQUAL_128(0x0000ff55ff0055aa, 0x5500ffff0000aaaa, q26); 19859 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27); 19860 19861 TEARDOWN(); 19862 } 19863 19864 19865 TEST(neon_modimm_movi_16bit_any) { 19866 SETUP(); 19867 19868 START(); 19869 19870 __ Movi(v0.V4H(), 0xabab); 19871 __ Movi(v1.V4H(), 0xab00); 19872 __ Movi(v2.V4H(), 0xabff); 19873 __ Movi(v3.V8H(), 0x00ab); 19874 __ Movi(v4.V8H(), 0xffab); 19875 __ Movi(v5.V8H(), 0xabcd); 19876 19877 END(); 19878 19879 RUN(); 19880 19881 ASSERT_EQUAL_128(0x0, 0xabababababababab, q0); 19882 ASSERT_EQUAL_128(0x0, 0xab00ab00ab00ab00, q1); 19883 ASSERT_EQUAL_128(0x0, 0xabffabffabffabff, q2); 19884 ASSERT_EQUAL_128(0x00ab00ab00ab00ab, 0x00ab00ab00ab00ab, q3); 19885 ASSERT_EQUAL_128(0xffabffabffabffab, 0xffabffabffabffab, q4); 19886 ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q5); 19887 19888 TEARDOWN(); 19889 } 19890 19891 19892 TEST(neon_modimm_movi_32bit_any) { 19893 SETUP(); 19894 19895 START(); 19896 19897 __ Movi(v0.V2S(), 0x000000ab); 19898 __ Movi(v1.V2S(), 0x0000ab00); 19899 __ Movi(v2.V4S(), 0x00ab0000); 19900 __ Movi(v3.V4S(), 0xab000000); 19901 19902 __ Movi(v4.V2S(), 0xffffffab); 19903 __ Movi(v5.V2S(), 0xffffabff); 19904 __ Movi(v6.V4S(), 0xffabffff); 19905 __ Movi(v7.V4S(), 0xabffffff); 19906 19907 __ Movi(v16.V2S(), 0x0000abff); 19908 __ Movi(v17.V2S(), 0x00abffff); 19909 __ Movi(v18.V4S(), 0xffab0000); 19910 __ Movi(v19.V4S(), 0xffffab00); 19911 19912 __ Movi(v20.V4S(), 0xabababab); 19913 __ Movi(v21.V4S(), 0xabcdabcd); 19914 __ Movi(v22.V4S(), 0xabcdef01); 19915 __ Movi(v23.V4S(), 0x00ffff00); 19916 19917 END(); 19918 19919 RUN(); 19920 19921 ASSERT_EQUAL_128(0x0, 0x000000ab000000ab, q0); 19922 ASSERT_EQUAL_128(0x0, 0x0000ab000000ab00, q1); 19923 ASSERT_EQUAL_128(0x00ab000000ab0000, 0x00ab000000ab0000, q2); 19924 ASSERT_EQUAL_128(0xab000000ab000000, 0xab000000ab000000, q3); 19925 19926 ASSERT_EQUAL_128(0x0, 0xffffffabffffffab, q4); 19927 ASSERT_EQUAL_128(0x0, 0xffffabffffffabff, q5); 19928 ASSERT_EQUAL_128(0xffabffffffabffff, 0xffabffffffabffff, q6); 19929 ASSERT_EQUAL_128(0xabffffffabffffff, 0xabffffffabffffff, q7); 19930 19931 ASSERT_EQUAL_128(0x0, 0x0000abff0000abff, q16); 19932 ASSERT_EQUAL_128(0x0, 0x00abffff00abffff, q17); 19933 ASSERT_EQUAL_128(0xffab0000ffab0000, 0xffab0000ffab0000, q18); 19934 ASSERT_EQUAL_128(0xffffab00ffffab00, 0xffffab00ffffab00, q19); 19935 19936 ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q20); 19937 ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q21); 19938 ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q22); 19939 ASSERT_EQUAL_128(0x00ffff0000ffff00, 0x00ffff0000ffff00, q23); 19940 TEARDOWN(); 19941 } 19942 19943 19944 TEST(neon_modimm_movi_64bit_any) { 19945 SETUP(); 19946 19947 START(); 19948 19949 __ Movi(v0.V1D(), 0x00ffff0000ffffff); 19950 __ Movi(v1.V2D(), 0xabababababababab); 19951 __ Movi(v2.V2D(), 0xabcdabcdabcdabcd); 19952 __ Movi(v3.V2D(), 0xabcdef01abcdef01); 19953 __ Movi(v4.V1D(), 0xabcdef0123456789); 19954 __ Movi(v5.V2D(), 0xabcdef0123456789); 19955 19956 END(); 19957 19958 RUN(); 19959 19960 ASSERT_EQUAL_64(0x00ffff0000ffffff, d0); 19961 ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q1); 19962 ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q2); 19963 ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q3); 19964 ASSERT_EQUAL_64(0xabcdef0123456789, d4); 19965 ASSERT_EQUAL_128(0xabcdef0123456789, 0xabcdef0123456789, q5); 19966 19967 TEARDOWN(); 19968 } 19969 19970 19971 TEST(neon_modimm_movi) { 19972 SETUP(); 19973 19974 START(); 19975 19976 __ Movi(v0.V8B(), 0xaa); 19977 __ Movi(v1.V16B(), 0x55); 19978 19979 __ Movi(d2, 0x00ffff0000ffffff); 19980 __ Movi(v3.V2D(), 0x00ffff0000ffffff); 19981 19982 __ Movi(v16.V4H(), 0x00, LSL, 0); 19983 __ Movi(v17.V4H(), 0xff, LSL, 8); 19984 __ Movi(v18.V8H(), 0x00, LSL, 0); 19985 __ Movi(v19.V8H(), 0xff, LSL, 8); 19986 19987 __ Movi(v20.V2S(), 0x00, LSL, 0); 19988 __ Movi(v21.V2S(), 0xff, LSL, 8); 19989 __ Movi(v22.V2S(), 0x00, LSL, 16); 19990 __ Movi(v23.V2S(), 0xff, LSL, 24); 19991 19992 __ Movi(v24.V4S(), 0xff, LSL, 0); 19993 __ Movi(v25.V4S(), 0x00, LSL, 8); 19994 __ Movi(v26.V4S(), 0xff, LSL, 16); 19995 __ Movi(v27.V4S(), 0x00, LSL, 24); 19996 19997 __ Movi(v28.V2S(), 0xaa, MSL, 8); 19998 __ Movi(v29.V2S(), 0x55, MSL, 16); 19999 __ Movi(v30.V4S(), 0xff, MSL, 8); 20000 __ Movi(v31.V4S(), 0x00, MSL, 16); 20001 20002 END(); 20003 20004 RUN(); 20005 20006 ASSERT_EQUAL_128(0x0, 0xaaaaaaaaaaaaaaaa, q0); 20007 ASSERT_EQUAL_128(0x5555555555555555, 0x5555555555555555, q1); 20008 20009 ASSERT_EQUAL_128(0x0, 0x00ffff0000ffffff, q2); 20010 ASSERT_EQUAL_128(0x00ffff0000ffffff, 0x00ffff0000ffffff, q3); 20011 20012 ASSERT_EQUAL_128(0x0, 0x0000000000000000, q16); 20013 ASSERT_EQUAL_128(0x0, 0xff00ff00ff00ff00, q17); 20014 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q18); 20015 ASSERT_EQUAL_128(0xff00ff00ff00ff00, 0xff00ff00ff00ff00, q19); 20016 20017 ASSERT_EQUAL_128(0x0, 0x0000000000000000, q20); 20018 ASSERT_EQUAL_128(0x0, 0x0000ff000000ff00, q21); 20019 ASSERT_EQUAL_128(0x0, 0x0000000000000000, q22); 20020 ASSERT_EQUAL_128(0x0, 0xff000000ff000000, q23); 20021 20022 ASSERT_EQUAL_128(0x000000ff000000ff, 0x000000ff000000ff, q24); 20023 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25); 20024 ASSERT_EQUAL_128(0x00ff000000ff0000, 0x00ff000000ff0000, q26); 20025 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27); 20026 20027 ASSERT_EQUAL_128(0x0, 0x0000aaff0000aaff, q28); 20028 ASSERT_EQUAL_128(0x0, 0x0055ffff0055ffff, q29); 20029 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q30); 20030 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q31); 20031 20032 TEARDOWN(); 20033 } 20034 20035 20036 TEST(neon_modimm_mvni) { 20037 SETUP(); 20038 20039 START(); 20040 20041 __ Mvni(v16.V4H(), 0x00, LSL, 0); 20042 __ Mvni(v17.V4H(), 0xff, LSL, 8); 20043 __ Mvni(v18.V8H(), 0x00, LSL, 0); 20044 __ Mvni(v19.V8H(), 0xff, LSL, 8); 20045 20046 __ Mvni(v20.V2S(), 0x00, LSL, 0); 20047 __ Mvni(v21.V2S(), 0xff, LSL, 8); 20048 __ Mvni(v22.V2S(), 0x00, LSL, 16); 20049 __ Mvni(v23.V2S(), 0xff, LSL, 24); 20050 20051 __ Mvni(v24.V4S(), 0xff, LSL, 0); 20052 __ Mvni(v25.V4S(), 0x00, LSL, 8); 20053 __ Mvni(v26.V4S(), 0xff, LSL, 16); 20054 __ Mvni(v27.V4S(), 0x00, LSL, 24); 20055 20056 __ Mvni(v28.V2S(), 0xaa, MSL, 8); 20057 __ Mvni(v29.V2S(), 0x55, MSL, 16); 20058 __ Mvni(v30.V4S(), 0xff, MSL, 8); 20059 __ Mvni(v31.V4S(), 0x00, MSL, 16); 20060 20061 END(); 20062 20063 RUN(); 20064 20065 ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q16); 20066 ASSERT_EQUAL_128(0x0, 0x00ff00ff00ff00ff, q17); 20067 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18); 20068 ASSERT_EQUAL_128(0x00ff00ff00ff00ff, 0x00ff00ff00ff00ff, q19); 20069 20070 ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q20); 20071 ASSERT_EQUAL_128(0x0, 0xffff00ffffff00ff, q21); 20072 ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q22); 20073 ASSERT_EQUAL_128(0x0, 0x00ffffff00ffffff, q23); 20074 20075 ASSERT_EQUAL_128(0xffffff00ffffff00, 0xffffff00ffffff00, q24); 20076 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25); 20077 ASSERT_EQUAL_128(0xff00ffffff00ffff, 0xff00ffffff00ffff, q26); 20078 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q27); 20079 20080 ASSERT_EQUAL_128(0x0, 0xffff5500ffff5500, q28); 20081 ASSERT_EQUAL_128(0x0, 0xffaa0000ffaa0000, q29); 20082 ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q30); 20083 ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q31); 20084 20085 TEARDOWN(); 20086 } 20087 20088 20089 TEST(neon_modimm_orr) { 20090 SETUP(); 20091 20092 START(); 20093 20094 __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20095 __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20096 __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20097 __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20098 __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20099 __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20100 __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20101 __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20102 __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20103 __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20104 __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20105 __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20106 20107 __ Orr(v16.V4H(), 0x00, 0); 20108 __ Orr(v17.V4H(), 0xff, 8); 20109 __ Orr(v18.V8H(), 0x00, 0); 20110 __ Orr(v19.V8H(), 0xff, 8); 20111 20112 __ Orr(v20.V2S(), 0x00, 0); 20113 __ Orr(v21.V2S(), 0xff, 8); 20114 __ Orr(v22.V2S(), 0x00, 16); 20115 __ Orr(v23.V2S(), 0xff, 24); 20116 20117 __ Orr(v24.V4S(), 0xff, 0); 20118 __ Orr(v25.V4S(), 0x00, 8); 20119 __ Orr(v26.V4S(), 0xff, 16); 20120 __ Orr(v27.V4S(), 0x00, 24); 20121 20122 END(); 20123 20124 RUN(); 20125 20126 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16); 20127 ASSERT_EQUAL_128(0x0, 0xff55ffffff00ffaa, q17); 20128 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18); 20129 ASSERT_EQUAL_128(0xffaaff55ff00ffaa, 0xff55ffffff00ffaa, q19); 20130 20131 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20); 20132 ASSERT_EQUAL_128(0x0, 0x5555ffff0000ffaa, q21); 20133 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22); 20134 ASSERT_EQUAL_128(0x0, 0xff55ffffff00aaaa, q23); 20135 20136 ASSERT_EQUAL_128(0x00aaffffff0055ff, 0x5555ffff0000aaff, q24); 20137 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25); 20138 ASSERT_EQUAL_128(0x00ffff55ffff55aa, 0x55ffffff00ffaaaa, q26); 20139 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27); 20140 20141 TEARDOWN(); 20142 } 20143 20144 20145 // TODO: add arbitrary values once load literal to Q registers is supported. 20146 TEST(neon_modimm_fmov) { 20147 SETUP(); 20148 20149 // Immediates which can be encoded in the instructions. 20150 const float kOne = 1.0f; 20151 const float kPointFive = 0.5f; 20152 const double kMinusThirteen = -13.0; 20153 // Immediates which cannot be encoded in the instructions. 20154 const float kNonImmFP32 = 255.0f; 20155 const double kNonImmFP64 = 12.3456; 20156 20157 START(); 20158 __ Fmov(v11.V2S(), kOne); 20159 __ Fmov(v12.V4S(), kPointFive); 20160 __ Fmov(v22.V2D(), kMinusThirteen); 20161 __ Fmov(v13.V2S(), kNonImmFP32); 20162 __ Fmov(v14.V4S(), kNonImmFP32); 20163 __ Fmov(v23.V2D(), kNonImmFP64); 20164 __ Fmov(v1.V2S(), 0.0); 20165 __ Fmov(v2.V4S(), 0.0); 20166 __ Fmov(v3.V2D(), 0.0); 20167 __ Fmov(v4.V2S(), kFP32PositiveInfinity); 20168 __ Fmov(v5.V4S(), kFP32PositiveInfinity); 20169 __ Fmov(v6.V2D(), kFP64PositiveInfinity); 20170 END(); 20171 20172 RUN(); 20173 20174 const uint64_t kOne1S = FloatToRawbits(1.0); 20175 const uint64_t kOne2S = (kOne1S << 32) | kOne1S; 20176 const uint64_t kPointFive1S = FloatToRawbits(0.5); 20177 const uint64_t kPointFive2S = (kPointFive1S << 32) | kPointFive1S; 20178 const uint64_t kMinusThirteen1D = DoubleToRawbits(-13.0); 20179 const uint64_t kNonImmFP321S = FloatToRawbits(kNonImmFP32); 20180 const uint64_t kNonImmFP322S = (kNonImmFP321S << 32) | kNonImmFP321S; 20181 const uint64_t kNonImmFP641D = DoubleToRawbits(kNonImmFP64); 20182 const uint64_t kFP32Inf1S = FloatToRawbits(kFP32PositiveInfinity); 20183 const uint64_t kFP32Inf2S = (kFP32Inf1S << 32) | kFP32Inf1S; 20184 const uint64_t kFP64Inf1D = DoubleToRawbits(kFP64PositiveInfinity); 20185 20186 ASSERT_EQUAL_128(0x0, kOne2S, q11); 20187 ASSERT_EQUAL_128(kPointFive2S, kPointFive2S, q12); 20188 ASSERT_EQUAL_128(kMinusThirteen1D, kMinusThirteen1D, q22); 20189 ASSERT_EQUAL_128(0x0, kNonImmFP322S, q13); 20190 ASSERT_EQUAL_128(kNonImmFP322S, kNonImmFP322S, q14); 20191 ASSERT_EQUAL_128(kNonImmFP641D, kNonImmFP641D, q23); 20192 ASSERT_EQUAL_128(0x0, 0x0, q1); 20193 ASSERT_EQUAL_128(0x0, 0x0, q2); 20194 ASSERT_EQUAL_128(0x0, 0x0, q3); 20195 ASSERT_EQUAL_128(0x0, kFP32Inf2S, q4); 20196 ASSERT_EQUAL_128(kFP32Inf2S, kFP32Inf2S, q5); 20197 ASSERT_EQUAL_128(kFP64Inf1D, kFP64Inf1D, q6); 20198 20199 TEARDOWN(); 20200 } 20201 20202 20203 TEST(neon_perm) { 20204 SETUP(); 20205 20206 START(); 20207 20208 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f); 20209 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f); 20210 20211 __ Trn1(v16.V16B(), v0.V16B(), v1.V16B()); 20212 __ Trn2(v17.V16B(), v0.V16B(), v1.V16B()); 20213 __ Zip1(v18.V16B(), v0.V16B(), v1.V16B()); 20214 __ Zip2(v19.V16B(), v0.V16B(), v1.V16B()); 20215 __ Uzp1(v20.V16B(), v0.V16B(), v1.V16B()); 20216 __ Uzp2(v21.V16B(), v0.V16B(), v1.V16B()); 20217 20218 END(); 20219 20220 RUN(); 20221 20222 ASSERT_EQUAL_128(0x1101130315051707, 0x19091b0b1d0d1f0f, q16); 20223 ASSERT_EQUAL_128(0x1000120214041606, 0x18081a0a1c0c1e0e, q17); 20224 ASSERT_EQUAL_128(0x180819091a0a1b0b, 0x1c0c1d0d1e0e1f0f, q18); 20225 ASSERT_EQUAL_128(0x1000110112021303, 0x1404150516061707, q19); 20226 ASSERT_EQUAL_128(0x11131517191b1d1f, 0x01030507090b0d0f, q20); 20227 ASSERT_EQUAL_128(0x10121416181a1c1e, 0x00020406080a0c0e, q21); 20228 20229 TEARDOWN(); 20230 } 20231 20232 20233 TEST(neon_copy_dup_element) { 20234 SETUP(); 20235 20236 START(); 20237 20238 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff); 20239 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100); 20240 __ Movi(v2.V2D(), 0xffeddccbbaae9988, 0x0011223344556677); 20241 __ Movi(v3.V2D(), 0x7766554433221100, 0x8899aabbccddeeff); 20242 __ Movi(v4.V2D(), 0x7766554433221100, 0x0123456789abcdef); 20243 __ Movi(v5.V2D(), 0x0011223344556677, 0x0123456789abcdef); 20244 20245 __ Dup(v16.V16B(), v0.B(), 0); 20246 __ Dup(v17.V8H(), v1.H(), 7); 20247 __ Dup(v18.V4S(), v1.S(), 3); 20248 __ Dup(v19.V2D(), v0.D(), 0); 20249 20250 __ Dup(v20.V8B(), v0.B(), 0); 20251 __ Dup(v21.V4H(), v1.H(), 7); 20252 __ Dup(v22.V2S(), v1.S(), 3); 20253 20254 __ Dup(v23.B(), v0.B(), 0); 20255 __ Dup(v24.H(), v1.H(), 7); 20256 __ Dup(v25.S(), v1.S(), 3); 20257 __ Dup(v26.D(), v0.D(), 0); 20258 20259 __ Dup(v2.V16B(), v2.B(), 0); 20260 __ Dup(v3.V8H(), v3.H(), 7); 20261 __ Dup(v4.V4S(), v4.S(), 0); 20262 __ Dup(v5.V2D(), v5.D(), 1); 20263 20264 END(); 20265 20266 RUN(); 20267 20268 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16); 20269 ASSERT_EQUAL_128(0xffedffedffedffed, 0xffedffedffedffed, q17); 20270 ASSERT_EQUAL_128(0xffeddccbffeddccb, 0xffeddccbffeddccb, q18); 20271 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19); 20272 20273 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20); 20274 ASSERT_EQUAL_128(0, 0xffedffedffedffed, q21); 20275 ASSERT_EQUAL_128(0, 0xffeddccbffeddccb, q22); 20276 20277 ASSERT_EQUAL_128(0, 0x00000000000000ff, q23); 20278 ASSERT_EQUAL_128(0, 0x000000000000ffed, q24); 20279 ASSERT_EQUAL_128(0, 0x00000000ffeddccb, q25); 20280 ASSERT_EQUAL_128(0, 0x8899aabbccddeeff, q26); 20281 20282 ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q2); 20283 ASSERT_EQUAL_128(0x7766776677667766, 0x7766776677667766, q3); 20284 ASSERT_EQUAL_128(0x89abcdef89abcdef, 0x89abcdef89abcdef, q4); 20285 ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q5); 20286 TEARDOWN(); 20287 } 20288 20289 20290 TEST(neon_copy_dup_general) { 20291 SETUP(); 20292 20293 START(); 20294 20295 __ Mov(x0, 0x0011223344556677); 20296 20297 __ Dup(v16.V16B(), w0); 20298 __ Dup(v17.V8H(), w0); 20299 __ Dup(v18.V4S(), w0); 20300 __ Dup(v19.V2D(), x0); 20301 20302 __ Dup(v20.V8B(), w0); 20303 __ Dup(v21.V4H(), w0); 20304 __ Dup(v22.V2S(), w0); 20305 20306 __ Dup(v2.V16B(), wzr); 20307 __ Dup(v3.V8H(), wzr); 20308 __ Dup(v4.V4S(), wzr); 20309 __ Dup(v5.V2D(), xzr); 20310 20311 END(); 20312 20313 RUN(); 20314 20315 ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q16); 20316 ASSERT_EQUAL_128(0x6677667766776677, 0x6677667766776677, q17); 20317 ASSERT_EQUAL_128(0x4455667744556677, 0x4455667744556677, q18); 20318 ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19); 20319 20320 ASSERT_EQUAL_128(0, 0x7777777777777777, q20); 20321 ASSERT_EQUAL_128(0, 0x6677667766776677, q21); 20322 ASSERT_EQUAL_128(0, 0x4455667744556677, q22); 20323 20324 ASSERT_EQUAL_128(0, 0, q2); 20325 ASSERT_EQUAL_128(0, 0, q3); 20326 ASSERT_EQUAL_128(0, 0, q4); 20327 ASSERT_EQUAL_128(0, 0, q5); 20328 TEARDOWN(); 20329 } 20330 20331 20332 TEST(neon_copy_ins_element) { 20333 SETUP(); 20334 20335 START(); 20336 20337 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff); 20338 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100); 20339 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210); 20340 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef); 20341 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff); 20342 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff); 20343 20344 __ Movi(v2.V2D(), 0, 0x0011223344556677); 20345 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff); 20346 __ Movi(v4.V2D(), 0, 0x0123456789abcdef); 20347 __ Movi(v5.V2D(), 0, 0x0123456789abcdef); 20348 20349 __ Ins(v16.V16B(), 15, v0.V16B(), 0); 20350 __ Ins(v17.V8H(), 0, v1.V8H(), 7); 20351 __ Ins(v18.V4S(), 3, v1.V4S(), 0); 20352 __ Ins(v19.V2D(), 1, v0.V2D(), 0); 20353 20354 __ Ins(v2.V16B(), 2, v2.V16B(), 0); 20355 __ Ins(v3.V8H(), 0, v3.V8H(), 7); 20356 __ Ins(v4.V4S(), 3, v4.V4S(), 0); 20357 __ Ins(v5.V2D(), 0, v5.V2D(), 1); 20358 20359 END(); 20360 20361 RUN(); 20362 20363 ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16); 20364 ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17); 20365 ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18); 20366 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19); 20367 20368 ASSERT_EQUAL_128(0, 0x0011223344776677, q2); 20369 ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3); 20370 ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4); 20371 ASSERT_EQUAL_128(0, 0, q5); 20372 TEARDOWN(); 20373 } 20374 20375 20376 TEST(neon_copy_mov_element) { 20377 SETUP(); 20378 20379 START(); 20380 20381 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff); 20382 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100); 20383 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210); 20384 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef); 20385 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff); 20386 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff); 20387 20388 __ Movi(v2.V2D(), 0, 0x0011223344556677); 20389 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff); 20390 __ Movi(v4.V2D(), 0, 0x0123456789abcdef); 20391 __ Movi(v5.V2D(), 0, 0x0123456789abcdef); 20392 20393 __ Mov(v16.V16B(), 15, v0.V16B(), 0); 20394 __ Mov(v17.V8H(), 0, v1.V8H(), 7); 20395 __ Mov(v18.V4S(), 3, v1.V4S(), 0); 20396 __ Mov(v19.V2D(), 1, v0.V2D(), 0); 20397 20398 __ Mov(v2.V16B(), 2, v2.V16B(), 0); 20399 __ Mov(v3.V8H(), 0, v3.V8H(), 7); 20400 __ Mov(v4.V4S(), 3, v4.V4S(), 0); 20401 __ Mov(v5.V2D(), 0, v5.V2D(), 1); 20402 20403 END(); 20404 20405 RUN(); 20406 20407 ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16); 20408 ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17); 20409 ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18); 20410 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19); 20411 20412 ASSERT_EQUAL_128(0, 0x0011223344776677, q2); 20413 ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3); 20414 ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4); 20415 ASSERT_EQUAL_128(0, 0, q5); 20416 TEARDOWN(); 20417 } 20418 20419 20420 TEST(neon_copy_smov) { 20421 SETUP(); 20422 20423 START(); 20424 20425 __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210); 20426 20427 __ Smov(w0, v0.B(), 7); 20428 __ Smov(w1, v0.B(), 15); 20429 20430 __ Smov(w2, v0.H(), 0); 20431 __ Smov(w3, v0.H(), 3); 20432 20433 __ Smov(x4, v0.B(), 7); 20434 __ Smov(x5, v0.B(), 15); 20435 20436 __ Smov(x6, v0.H(), 0); 20437 __ Smov(x7, v0.H(), 3); 20438 20439 __ Smov(x16, v0.S(), 0); 20440 __ Smov(x17, v0.S(), 1); 20441 20442 END(); 20443 20444 RUN(); 20445 20446 ASSERT_EQUAL_32(0xfffffffe, w0); 20447 ASSERT_EQUAL_32(0x00000001, w1); 20448 ASSERT_EQUAL_32(0x00003210, w2); 20449 ASSERT_EQUAL_32(0xfffffedc, w3); 20450 ASSERT_EQUAL_64(0xfffffffffffffffe, x4); 20451 ASSERT_EQUAL_64(0x0000000000000001, x5); 20452 ASSERT_EQUAL_64(0x0000000000003210, x6); 20453 ASSERT_EQUAL_64(0xfffffffffffffedc, x7); 20454 ASSERT_EQUAL_64(0x0000000076543210, x16); 20455 ASSERT_EQUAL_64(0xfffffffffedcba98, x17); 20456 20457 TEARDOWN(); 20458 } 20459 20460 20461 TEST(neon_copy_umov_mov) { 20462 SETUP(); 20463 20464 START(); 20465 20466 __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210); 20467 20468 __ Umov(w0, v0.B(), 15); 20469 __ Umov(w1, v0.H(), 0); 20470 __ Umov(w2, v0.S(), 3); 20471 __ Umov(x3, v0.D(), 1); 20472 20473 __ Mov(w4, v0.S(), 3); 20474 __ Mov(x5, v0.D(), 1); 20475 20476 END(); 20477 20478 RUN(); 20479 20480 ASSERT_EQUAL_32(0x00000001, w0); 20481 ASSERT_EQUAL_32(0x00003210, w1); 20482 ASSERT_EQUAL_32(0x01234567, w2); 20483 ASSERT_EQUAL_64(0x0123456789abcdef, x3); 20484 ASSERT_EQUAL_32(0x01234567, w4); 20485 ASSERT_EQUAL_64(0x0123456789abcdef, x5); 20486 20487 TEARDOWN(); 20488 } 20489 20490 20491 TEST(neon_copy_ins_general) { 20492 SETUP(); 20493 20494 START(); 20495 20496 __ Mov(x0, 0x0011223344556677); 20497 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210); 20498 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef); 20499 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff); 20500 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff); 20501 20502 __ Movi(v2.V2D(), 0, 0x0011223344556677); 20503 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff); 20504 __ Movi(v4.V2D(), 0, 0x0123456789abcdef); 20505 __ Movi(v5.V2D(), 0, 0x0123456789abcdef); 20506 20507 __ Ins(v16.V16B(), 15, w0); 20508 __ Ins(v17.V8H(), 0, w0); 20509 __ Ins(v18.V4S(), 3, w0); 20510 __ Ins(v19.V2D(), 0, x0); 20511 20512 __ Ins(v2.V16B(), 2, w0); 20513 __ Ins(v3.V8H(), 0, w0); 20514 __ Ins(v4.V4S(), 3, w0); 20515 __ Ins(v5.V2D(), 1, x0); 20516 20517 END(); 20518 20519 RUN(); 20520 20521 ASSERT_EQUAL_128(0x7723456789abcdef, 0xfedcba9876543210, q16); 20522 ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789ab6677, q17); 20523 ASSERT_EQUAL_128(0x4455667744556677, 0x8899aabbccddeeff, q18); 20524 ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19); 20525 20526 ASSERT_EQUAL_128(0, 0x0011223344776677, q2); 20527 ASSERT_EQUAL_128(0, 0x8899aabbccdd6677, q3); 20528 ASSERT_EQUAL_128(0x4455667700000000, 0x0123456789abcdef, q4); 20529 ASSERT_EQUAL_128(0x0011223344556677, 0x0123456789abcdef, q5); 20530 TEARDOWN(); 20531 } 20532 20533 20534 TEST(neon_extract_ext) { 20535 SETUP(); 20536 20537 START(); 20538 20539 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff); 20540 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100); 20541 20542 __ Movi(v2.V2D(), 0, 0x0011223344556677); 20543 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff); 20544 20545 __ Ext(v16.V16B(), v0.V16B(), v1.V16B(), 0); 20546 __ Ext(v17.V16B(), v0.V16B(), v1.V16B(), 15); 20547 __ Ext(v1.V16B(), v0.V16B(), v1.V16B(), 8); // Dest is same as one Src 20548 __ Ext(v0.V16B(), v0.V16B(), v0.V16B(), 8); // All reg are the same 20549 20550 __ Ext(v18.V8B(), v2.V8B(), v3.V8B(), 0); 20551 __ Ext(v19.V8B(), v2.V8B(), v3.V8B(), 7); 20552 __ Ext(v2.V8B(), v2.V8B(), v3.V8B(), 4); // Dest is same as one Src 20553 __ Ext(v3.V8B(), v3.V8B(), v3.V8B(), 4); // All reg are the same 20554 20555 END(); 20556 20557 RUN(); 20558 20559 ASSERT_EQUAL_128(0x0011223344556677, 0x8899aabbccddeeff, q16); 20560 ASSERT_EQUAL_128(0xeddccbbaae998877, 0x6655443322110000, q17); 20561 ASSERT_EQUAL_128(0x7766554433221100, 0x0011223344556677, q1); 20562 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q0); 20563 20564 ASSERT_EQUAL_128(0, 0x0011223344556677, q18); 20565 ASSERT_EQUAL_128(0, 0x99aabbccddeeff00, q19); 20566 ASSERT_EQUAL_128(0, 0xccddeeff00112233, q2); 20567 ASSERT_EQUAL_128(0, 0xccddeeff8899aabb, q3); 20568 TEARDOWN(); 20569 } 20570 20571 20572 TEST(neon_3different_uaddl) { 20573 SETUP(); 20574 20575 START(); 20576 20577 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); 20578 __ Movi(v1.V2D(), 0, 0x00010280810e0fff); 20579 __ Movi(v2.V2D(), 0, 0x0101010101010101); 20580 20581 __ Movi(v3.V2D(), 0x0000000000000000, 0x0000000000000000); 20582 __ Movi(v4.V2D(), 0x0000000000000000, 0x0000000000000000); 20583 __ Movi(v5.V2D(), 0, 0x0000000180008001); 20584 __ Movi(v6.V2D(), 0, 0x000e000ff000ffff); 20585 __ Movi(v7.V2D(), 0, 0x0001000100010001); 20586 20587 __ Movi(v16.V2D(), 0x0000000000000000, 0x0000000000000000); 20588 __ Movi(v17.V2D(), 0x0000000000000000, 0x0000000000000000); 20589 __ Movi(v18.V2D(), 0, 0x0000000000000001); 20590 __ Movi(v19.V2D(), 0, 0x80000001ffffffff); 20591 __ Movi(v20.V2D(), 0, 0x0000000100000001); 20592 20593 __ Uaddl(v0.V8H(), v1.V8B(), v2.V8B()); 20594 20595 __ Uaddl(v3.V4S(), v5.V4H(), v7.V4H()); 20596 __ Uaddl(v4.V4S(), v6.V4H(), v7.V4H()); 20597 20598 __ Uaddl(v16.V2D(), v18.V2S(), v20.V2S()); 20599 __ Uaddl(v17.V2D(), v19.V2S(), v20.V2S()); 20600 20601 20602 END(); 20603 20604 RUN(); 20605 20606 ASSERT_EQUAL_128(0x0001000200030081, 0x0082000f00100100, q0); 20607 ASSERT_EQUAL_128(0x0000000100000002, 0x0000800100008002, q3); 20608 ASSERT_EQUAL_128(0x0000000f00000010, 0x0000f00100010000, q4); 20609 ASSERT_EQUAL_128(0x0000000000000001, 0x0000000000000002, q16); 20610 ASSERT_EQUAL_128(0x0000000080000002, 0x0000000100000000, q17); 20611 TEARDOWN(); 20612 } 20613 20614 20615 TEST(neon_3different_addhn_subhn) { 20616 SETUP(); 20617 20618 START(); 20619 20620 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 20621 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 20622 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 20623 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 20624 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 20625 20626 __ Addhn(v16.V8B(), v0.V8H(), v1.V8H()); 20627 __ Addhn2(v16.V16B(), v2.V8H(), v3.V8H()); 20628 __ Raddhn(v17.V8B(), v0.V8H(), v1.V8H()); 20629 __ Raddhn2(v17.V16B(), v2.V8H(), v3.V8H()); 20630 __ Subhn(v18.V8B(), v0.V8H(), v1.V8H()); 20631 __ Subhn2(v18.V16B(), v2.V8H(), v3.V8H()); 20632 __ Rsubhn(v19.V8B(), v0.V8H(), v1.V8H()); 20633 __ Rsubhn2(v19.V16B(), v2.V8H(), v3.V8H()); 20634 20635 END(); 20636 20637 RUN(); 20638 20639 ASSERT_EQUAL_128(0x0000ff007fff7fff, 0xff81817f80ff0100, q16); 20640 ASSERT_EQUAL_128(0x0000000080008000, 0xff81817f81ff0201, q17); 20641 ASSERT_EQUAL_128(0x0000ffff80008000, 0xff80817f80ff0100, q18); 20642 ASSERT_EQUAL_128(0x0000000080008000, 0xff81827f81ff0101, q19); 20643 TEARDOWN(); 20644 } 20645 20646 TEST(neon_d_only_scalar) { 20647 SETUP(); 20648 20649 START(); 20650 20651 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0); 20652 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f); 20653 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010); 20654 __ Movi(v3.V2D(), 0xffffffffffffffff, 2); 20655 __ Movi(v4.V2D(), 0xffffffffffffffff, -2); 20656 20657 __ Add(d16, d0, d0); 20658 __ Add(d17, d1, d1); 20659 __ Add(d18, d2, d2); 20660 __ Sub(d19, d0, d0); 20661 __ Sub(d20, d0, d1); 20662 __ Sub(d21, d1, d0); 20663 __ Ushl(d22, d0, d3); 20664 __ Ushl(d23, d0, d4); 20665 __ Sshl(d24, d0, d3); 20666 __ Sshl(d25, d0, d4); 20667 __ Ushr(d26, d0, 1); 20668 __ Sshr(d27, d0, 3); 20669 __ Shl(d28, d0, 0); 20670 __ Shl(d29, d0, 16); 20671 20672 END(); 20673 20674 RUN(); 20675 20676 ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q16); 20677 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q17); 20678 ASSERT_EQUAL_128(0, 0x2000000020002020, q18); 20679 ASSERT_EQUAL_128(0, 0, q19); 20680 ASSERT_EQUAL_128(0, 0x7000000170017171, q20); 20681 ASSERT_EQUAL_128(0, 0x8ffffffe8ffe8e8f, q21); 20682 ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q22); 20683 ASSERT_EQUAL_128(0, 0x3c0000003c003c3c, q23); 20684 ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q24); 20685 ASSERT_EQUAL_128(0, 0xfc0000003c003c3c, q25); 20686 ASSERT_EQUAL_128(0, 0x7800000078007878, q26); 20687 ASSERT_EQUAL_128(0, 0xfe0000001e001e1e, q27); 20688 ASSERT_EQUAL_128(0, 0xf0000000f000f0f0, q28); 20689 ASSERT_EQUAL_128(0, 0x0000f000f0f00000, q29); 20690 20691 TEARDOWN(); 20692 } 20693 20694 20695 TEST(neon_sqshl_imm_scalar) { 20696 SETUP(); 20697 20698 START(); 20699 20700 __ Movi(v0.V2D(), 0x0, 0x7f); 20701 __ Movi(v1.V2D(), 0x0, 0x80); 20702 __ Movi(v2.V2D(), 0x0, 0x01); 20703 __ Sqshl(b16, b0, 1); 20704 __ Sqshl(b17, b1, 1); 20705 __ Sqshl(b18, b2, 1); 20706 20707 __ Movi(v0.V2D(), 0x0, 0x7fff); 20708 __ Movi(v1.V2D(), 0x0, 0x8000); 20709 __ Movi(v2.V2D(), 0x0, 0x0001); 20710 __ Sqshl(h19, h0, 1); 20711 __ Sqshl(h20, h1, 1); 20712 __ Sqshl(h21, h2, 1); 20713 20714 __ Movi(v0.V2D(), 0x0, 0x7fffffff); 20715 __ Movi(v1.V2D(), 0x0, 0x80000000); 20716 __ Movi(v2.V2D(), 0x0, 0x00000001); 20717 __ Sqshl(s22, s0, 1); 20718 __ Sqshl(s23, s1, 1); 20719 __ Sqshl(s24, s2, 1); 20720 20721 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff); 20722 __ Movi(v1.V2D(), 0x0, 0x8000000000000000); 20723 __ Movi(v2.V2D(), 0x0, 0x0000000000000001); 20724 __ Sqshl(d25, d0, 1); 20725 __ Sqshl(d26, d1, 1); 20726 __ Sqshl(d27, d2, 1); 20727 20728 END(); 20729 20730 RUN(); 20731 20732 ASSERT_EQUAL_128(0, 0x7f, q16); 20733 ASSERT_EQUAL_128(0, 0x80, q17); 20734 ASSERT_EQUAL_128(0, 0x02, q18); 20735 20736 ASSERT_EQUAL_128(0, 0x7fff, q19); 20737 ASSERT_EQUAL_128(0, 0x8000, q20); 20738 ASSERT_EQUAL_128(0, 0x0002, q21); 20739 20740 ASSERT_EQUAL_128(0, 0x7fffffff, q22); 20741 ASSERT_EQUAL_128(0, 0x80000000, q23); 20742 ASSERT_EQUAL_128(0, 0x00000002, q24); 20743 20744 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q25); 20745 ASSERT_EQUAL_128(0, 0x8000000000000000, q26); 20746 ASSERT_EQUAL_128(0, 0x0000000000000002, q27); 20747 20748 TEARDOWN(); 20749 } 20750 20751 20752 TEST(neon_uqshl_imm_scalar) { 20753 SETUP(); 20754 20755 START(); 20756 20757 __ Movi(v0.V2D(), 0x0, 0x7f); 20758 __ Movi(v1.V2D(), 0x0, 0x80); 20759 __ Movi(v2.V2D(), 0x0, 0x01); 20760 __ Uqshl(b16, b0, 1); 20761 __ Uqshl(b17, b1, 1); 20762 __ Uqshl(b18, b2, 1); 20763 20764 __ Movi(v0.V2D(), 0x0, 0x7fff); 20765 __ Movi(v1.V2D(), 0x0, 0x8000); 20766 __ Movi(v2.V2D(), 0x0, 0x0001); 20767 __ Uqshl(h19, h0, 1); 20768 __ Uqshl(h20, h1, 1); 20769 __ Uqshl(h21, h2, 1); 20770 20771 __ Movi(v0.V2D(), 0x0, 0x7fffffff); 20772 __ Movi(v1.V2D(), 0x0, 0x80000000); 20773 __ Movi(v2.V2D(), 0x0, 0x00000001); 20774 __ Uqshl(s22, s0, 1); 20775 __ Uqshl(s23, s1, 1); 20776 __ Uqshl(s24, s2, 1); 20777 20778 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff); 20779 __ Movi(v1.V2D(), 0x0, 0x8000000000000000); 20780 __ Movi(v2.V2D(), 0x0, 0x0000000000000001); 20781 __ Uqshl(d25, d0, 1); 20782 __ Uqshl(d26, d1, 1); 20783 __ Uqshl(d27, d2, 1); 20784 20785 END(); 20786 20787 RUN(); 20788 20789 ASSERT_EQUAL_128(0, 0xfe, q16); 20790 ASSERT_EQUAL_128(0, 0xff, q17); 20791 ASSERT_EQUAL_128(0, 0x02, q18); 20792 20793 ASSERT_EQUAL_128(0, 0xfffe, q19); 20794 ASSERT_EQUAL_128(0, 0xffff, q20); 20795 ASSERT_EQUAL_128(0, 0x0002, q21); 20796 20797 ASSERT_EQUAL_128(0, 0xfffffffe, q22); 20798 ASSERT_EQUAL_128(0, 0xffffffff, q23); 20799 ASSERT_EQUAL_128(0, 0x00000002, q24); 20800 20801 ASSERT_EQUAL_128(0, 0xfffffffffffffffe, q25); 20802 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26); 20803 ASSERT_EQUAL_128(0, 0x0000000000000002, q27); 20804 20805 TEARDOWN(); 20806 } 20807 20808 20809 TEST(neon_sqshlu_scalar) { 20810 SETUP(); 20811 20812 START(); 20813 20814 __ Movi(v0.V2D(), 0x0, 0x7f); 20815 __ Movi(v1.V2D(), 0x0, 0x80); 20816 __ Movi(v2.V2D(), 0x0, 0x01); 20817 __ Sqshlu(b16, b0, 2); 20818 __ Sqshlu(b17, b1, 2); 20819 __ Sqshlu(b18, b2, 2); 20820 20821 __ Movi(v0.V2D(), 0x0, 0x7fff); 20822 __ Movi(v1.V2D(), 0x0, 0x8000); 20823 __ Movi(v2.V2D(), 0x0, 0x0001); 20824 __ Sqshlu(h19, h0, 2); 20825 __ Sqshlu(h20, h1, 2); 20826 __ Sqshlu(h21, h2, 2); 20827 20828 __ Movi(v0.V2D(), 0x0, 0x7fffffff); 20829 __ Movi(v1.V2D(), 0x0, 0x80000000); 20830 __ Movi(v2.V2D(), 0x0, 0x00000001); 20831 __ Sqshlu(s22, s0, 2); 20832 __ Sqshlu(s23, s1, 2); 20833 __ Sqshlu(s24, s2, 2); 20834 20835 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff); 20836 __ Movi(v1.V2D(), 0x0, 0x8000000000000000); 20837 __ Movi(v2.V2D(), 0x0, 0x0000000000000001); 20838 __ Sqshlu(d25, d0, 2); 20839 __ Sqshlu(d26, d1, 2); 20840 __ Sqshlu(d27, d2, 2); 20841 20842 END(); 20843 20844 RUN(); 20845 20846 ASSERT_EQUAL_128(0, 0xff, q16); 20847 ASSERT_EQUAL_128(0, 0x00, q17); 20848 ASSERT_EQUAL_128(0, 0x04, q18); 20849 20850 ASSERT_EQUAL_128(0, 0xffff, q19); 20851 ASSERT_EQUAL_128(0, 0x0000, q20); 20852 ASSERT_EQUAL_128(0, 0x0004, q21); 20853 20854 ASSERT_EQUAL_128(0, 0xffffffff, q22); 20855 ASSERT_EQUAL_128(0, 0x00000000, q23); 20856 ASSERT_EQUAL_128(0, 0x00000004, q24); 20857 20858 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25); 20859 ASSERT_EQUAL_128(0, 0x0000000000000000, q26); 20860 ASSERT_EQUAL_128(0, 0x0000000000000004, q27); 20861 20862 TEARDOWN(); 20863 } 20864 20865 20866 TEST(neon_sshll) { 20867 SETUP(); 20868 20869 START(); 20870 20871 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 20872 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 20873 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 20874 20875 __ Sshll(v16.V8H(), v0.V8B(), 4); 20876 __ Sshll2(v17.V8H(), v0.V16B(), 4); 20877 20878 __ Sshll(v18.V4S(), v1.V4H(), 8); 20879 __ Sshll2(v19.V4S(), v1.V8H(), 8); 20880 20881 __ Sshll(v20.V2D(), v2.V2S(), 16); 20882 __ Sshll2(v21.V2D(), v2.V4S(), 16); 20883 20884 END(); 20885 20886 RUN(); 20887 20888 ASSERT_EQUAL_128(0xf800f810fff00000, 0x001007f0f800f810, q16); 20889 ASSERT_EQUAL_128(0x07f000100000fff0, 0xf810f80007f00010, q17); 20890 ASSERT_EQUAL_128(0xffffff0000000000, 0x00000100007fff00, q18); 20891 ASSERT_EQUAL_128(0xff800000ff800100, 0xffffff0000000000, q19); 20892 ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20); 20893 ASSERT_EQUAL_128(0xffff800000000000, 0xffffffffffff0000, q21); 20894 TEARDOWN(); 20895 } 20896 20897 TEST(neon_shll) { 20898 SETUP(); 20899 20900 START(); 20901 20902 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 20903 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 20904 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 20905 20906 __ Shll(v16.V8H(), v0.V8B(), 8); 20907 __ Shll2(v17.V8H(), v0.V16B(), 8); 20908 20909 __ Shll(v18.V4S(), v1.V4H(), 16); 20910 __ Shll2(v19.V4S(), v1.V8H(), 16); 20911 20912 __ Shll(v20.V2D(), v2.V2S(), 32); 20913 __ Shll2(v21.V2D(), v2.V4S(), 32); 20914 20915 END(); 20916 20917 RUN(); 20918 20919 ASSERT_EQUAL_128(0x80008100ff000000, 0x01007f0080008100, q16); 20920 ASSERT_EQUAL_128(0x7f0001000000ff00, 0x810080007f000100, q17); 20921 ASSERT_EQUAL_128(0xffff000000000000, 0x000100007fff0000, q18); 20922 ASSERT_EQUAL_128(0x8000000080010000, 0xffff000000000000, q19); 20923 ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffff00000000, q20); 20924 ASSERT_EQUAL_128(0x8000000000000000, 0xffffffff00000000, q21); 20925 TEARDOWN(); 20926 } 20927 20928 TEST(neon_ushll) { 20929 SETUP(); 20930 20931 START(); 20932 20933 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 20934 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 20935 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 20936 20937 __ Ushll(v16.V8H(), v0.V8B(), 4); 20938 __ Ushll2(v17.V8H(), v0.V16B(), 4); 20939 20940 __ Ushll(v18.V4S(), v1.V4H(), 8); 20941 __ Ushll2(v19.V4S(), v1.V8H(), 8); 20942 20943 __ Ushll(v20.V2D(), v2.V2S(), 16); 20944 __ Ushll2(v21.V2D(), v2.V4S(), 16); 20945 20946 END(); 20947 20948 RUN(); 20949 20950 ASSERT_EQUAL_128(0x080008100ff00000, 0x001007f008000810, q16); 20951 ASSERT_EQUAL_128(0x07f0001000000ff0, 0x0810080007f00010, q17); 20952 ASSERT_EQUAL_128(0x00ffff0000000000, 0x00000100007fff00, q18); 20953 ASSERT_EQUAL_128(0x0080000000800100, 0x00ffff0000000000, q19); 20954 ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20); 20955 ASSERT_EQUAL_128(0x0000800000000000, 0x0000ffffffff0000, q21); 20956 TEARDOWN(); 20957 } 20958 20959 20960 TEST(neon_sxtl) { 20961 SETUP(); 20962 20963 START(); 20964 20965 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 20966 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 20967 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 20968 20969 __ Sxtl(v16.V8H(), v0.V8B()); 20970 __ Sxtl2(v17.V8H(), v0.V16B()); 20971 20972 __ Sxtl(v18.V4S(), v1.V4H()); 20973 __ Sxtl2(v19.V4S(), v1.V8H()); 20974 20975 __ Sxtl(v20.V2D(), v2.V2S()); 20976 __ Sxtl2(v21.V2D(), v2.V4S()); 20977 20978 END(); 20979 20980 RUN(); 20981 20982 ASSERT_EQUAL_128(0xff80ff81ffff0000, 0x0001007fff80ff81, q16); 20983 ASSERT_EQUAL_128(0x007f00010000ffff, 0xff81ff80007f0001, q17); 20984 ASSERT_EQUAL_128(0xffffffff00000000, 0x0000000100007fff, q18); 20985 ASSERT_EQUAL_128(0xffff8000ffff8001, 0xffffffff00000000, q19); 20986 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20); 20987 ASSERT_EQUAL_128(0xffffffff80000000, 0xffffffffffffffff, q21); 20988 TEARDOWN(); 20989 } 20990 20991 20992 TEST(neon_uxtl) { 20993 SETUP(); 20994 20995 START(); 20996 20997 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 20998 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 20999 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 21000 21001 __ Uxtl(v16.V8H(), v0.V8B()); 21002 __ Uxtl2(v17.V8H(), v0.V16B()); 21003 21004 __ Uxtl(v18.V4S(), v1.V4H()); 21005 __ Uxtl2(v19.V4S(), v1.V8H()); 21006 21007 __ Uxtl(v20.V2D(), v2.V2S()); 21008 __ Uxtl2(v21.V2D(), v2.V4S()); 21009 21010 END(); 21011 21012 RUN(); 21013 21014 ASSERT_EQUAL_128(0x0080008100ff0000, 0x0001007f00800081, q16); 21015 ASSERT_EQUAL_128(0x007f0001000000ff, 0x00810080007f0001, q17); 21016 ASSERT_EQUAL_128(0x0000ffff00000000, 0x0000000100007fff, q18); 21017 ASSERT_EQUAL_128(0x0000800000008001, 0x0000ffff00000000, q19); 21018 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20); 21019 ASSERT_EQUAL_128(0x0000000080000000, 0x00000000ffffffff, q21); 21020 TEARDOWN(); 21021 } 21022 21023 21024 TEST(neon_ssra) { 21025 SETUP(); 21026 21027 START(); 21028 21029 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 21030 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 21031 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 21032 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 21033 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 21034 21035 __ Mov(v16.V2D(), v0.V2D()); 21036 __ Mov(v17.V2D(), v0.V2D()); 21037 __ Mov(v18.V2D(), v1.V2D()); 21038 __ Mov(v19.V2D(), v1.V2D()); 21039 __ Mov(v20.V2D(), v2.V2D()); 21040 __ Mov(v21.V2D(), v2.V2D()); 21041 __ Mov(v22.V2D(), v3.V2D()); 21042 __ Mov(v23.V2D(), v4.V2D()); 21043 __ Mov(v24.V2D(), v3.V2D()); 21044 __ Mov(v25.V2D(), v4.V2D()); 21045 21046 __ Ssra(v16.V8B(), v0.V8B(), 4); 21047 __ Ssra(v17.V16B(), v0.V16B(), 4); 21048 21049 __ Ssra(v18.V4H(), v1.V4H(), 8); 21050 __ Ssra(v19.V8H(), v1.V8H(), 8); 21051 21052 __ Ssra(v20.V2S(), v2.V2S(), 16); 21053 __ Ssra(v21.V4S(), v2.V4S(), 16); 21054 21055 __ Ssra(v22.V2D(), v3.V2D(), 32); 21056 __ Ssra(v23.V2D(), v4.V2D(), 32); 21057 21058 __ Ssra(d24, d3, 48); 21059 21060 END(); 21061 21062 RUN(); 21063 21064 ASSERT_EQUAL_128(0x0000000000000000, 0x7879fe0001867879, q16); 21065 ASSERT_EQUAL_128(0x860100fe79788601, 0x7879fe0001867879, q17); 21066 ASSERT_EQUAL_128(0x0000000000000000, 0xfffe00000001807e, q18); 21067 ASSERT_EQUAL_128(0x7f807f81fffe0000, 0xfffe00000001807e, q19); 21068 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20); 21069 ASSERT_EQUAL_128(0x7fff8000fffffffe, 0x0000000080007ffe, q21); 21070 ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007ffffffe, q22); 21071 ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23); 21072 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24); 21073 TEARDOWN(); 21074 } 21075 21076 TEST(neon_srsra) { 21077 SETUP(); 21078 21079 START(); 21080 21081 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 21082 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 21083 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 21084 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 21085 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 21086 21087 __ Mov(v16.V2D(), v0.V2D()); 21088 __ Mov(v17.V2D(), v0.V2D()); 21089 __ Mov(v18.V2D(), v1.V2D()); 21090 __ Mov(v19.V2D(), v1.V2D()); 21091 __ Mov(v20.V2D(), v2.V2D()); 21092 __ Mov(v21.V2D(), v2.V2D()); 21093 __ Mov(v22.V2D(), v3.V2D()); 21094 __ Mov(v23.V2D(), v4.V2D()); 21095 __ Mov(v24.V2D(), v3.V2D()); 21096 __ Mov(v25.V2D(), v4.V2D()); 21097 21098 __ Srsra(v16.V8B(), v0.V8B(), 4); 21099 __ Srsra(v17.V16B(), v0.V16B(), 4); 21100 21101 __ Srsra(v18.V4H(), v1.V4H(), 8); 21102 __ Srsra(v19.V8H(), v1.V8H(), 8); 21103 21104 __ Srsra(v20.V2S(), v2.V2S(), 16); 21105 __ Srsra(v21.V4S(), v2.V4S(), 16); 21106 21107 __ Srsra(v22.V2D(), v3.V2D(), 32); 21108 __ Srsra(v23.V2D(), v4.V2D(), 32); 21109 21110 __ Srsra(d24, d3, 48); 21111 21112 END(); 21113 21114 RUN(); 21115 21116 ASSERT_EQUAL_128(0x0000000000000000, 0x7879ff0001877879, q16); 21117 ASSERT_EQUAL_128(0x870100ff79788701, 0x7879ff0001877879, q17); 21118 ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000001807f, q18); 21119 ASSERT_EQUAL_128(0x7f807f81ffff0000, 0xffff00000001807f, q19); 21120 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20); 21121 ASSERT_EQUAL_128(0x7fff8000ffffffff, 0x0000000080007fff, q21); 21122 ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007fffffff, q22); 21123 ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23); 21124 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24); 21125 21126 TEARDOWN(); 21127 } 21128 21129 TEST(neon_usra) { 21130 SETUP(); 21131 21132 START(); 21133 21134 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 21135 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 21136 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 21137 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 21138 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 21139 21140 __ Mov(v16.V2D(), v0.V2D()); 21141 __ Mov(v17.V2D(), v0.V2D()); 21142 __ Mov(v18.V2D(), v1.V2D()); 21143 __ Mov(v19.V2D(), v1.V2D()); 21144 __ Mov(v20.V2D(), v2.V2D()); 21145 __ Mov(v21.V2D(), v2.V2D()); 21146 __ Mov(v22.V2D(), v3.V2D()); 21147 __ Mov(v23.V2D(), v4.V2D()); 21148 __ Mov(v24.V2D(), v3.V2D()); 21149 __ Mov(v25.V2D(), v4.V2D()); 21150 21151 __ Usra(v16.V8B(), v0.V8B(), 4); 21152 __ Usra(v17.V16B(), v0.V16B(), 4); 21153 21154 __ Usra(v18.V4H(), v1.V4H(), 8); 21155 __ Usra(v19.V8H(), v1.V8H(), 8); 21156 21157 __ Usra(v20.V2S(), v2.V2S(), 16); 21158 __ Usra(v21.V4S(), v2.V4S(), 16); 21159 21160 __ Usra(v22.V2D(), v3.V2D(), 32); 21161 __ Usra(v23.V2D(), v4.V2D(), 32); 21162 21163 __ Usra(d24, d3, 48); 21164 21165 END(); 21166 21167 RUN(); 21168 21169 ASSERT_EQUAL_128(0x0000000000000000, 0x88890e0001868889, q16); 21170 ASSERT_EQUAL_128(0x8601000e89888601, 0x88890e0001868889, q17); 21171 ASSERT_EQUAL_128(0x0000000000000000, 0x00fe00000001807e, q18); 21172 ASSERT_EQUAL_128(0x8080808100fe0000, 0x00fe00000001807e, q19); 21173 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20); 21174 ASSERT_EQUAL_128(0x800080000000fffe, 0x0000000080007ffe, q21); 21175 ASSERT_EQUAL_128(0x8000000080000001, 0x800000007ffffffe, q22); 21176 ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23); 21177 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24); 21178 21179 TEARDOWN(); 21180 } 21181 21182 TEST(neon_ursra) { 21183 SETUP(); 21184 21185 START(); 21186 21187 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 21188 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 21189 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 21190 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 21191 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 21192 21193 __ Mov(v16.V2D(), v0.V2D()); 21194 __ Mov(v17.V2D(), v0.V2D()); 21195 __ Mov(v18.V2D(), v1.V2D()); 21196 __ Mov(v19.V2D(), v1.V2D()); 21197 __ Mov(v20.V2D(), v2.V2D()); 21198 __ Mov(v21.V2D(), v2.V2D()); 21199 __ Mov(v22.V2D(), v3.V2D()); 21200 __ Mov(v23.V2D(), v4.V2D()); 21201 __ Mov(v24.V2D(), v3.V2D()); 21202 __ Mov(v25.V2D(), v4.V2D()); 21203 21204 __ Ursra(v16.V8B(), v0.V8B(), 4); 21205 __ Ursra(v17.V16B(), v0.V16B(), 4); 21206 21207 __ Ursra(v18.V4H(), v1.V4H(), 8); 21208 __ Ursra(v19.V8H(), v1.V8H(), 8); 21209 21210 __ Ursra(v20.V2S(), v2.V2S(), 16); 21211 __ Ursra(v21.V4S(), v2.V4S(), 16); 21212 21213 __ Ursra(v22.V2D(), v3.V2D(), 32); 21214 __ Ursra(v23.V2D(), v4.V2D(), 32); 21215 21216 __ Ursra(d24, d3, 48); 21217 21218 END(); 21219 21220 RUN(); 21221 21222 ASSERT_EQUAL_128(0x0000000000000000, 0x88890f0001878889, q16); 21223 ASSERT_EQUAL_128(0x8701000f89888701, 0x88890f0001878889, q17); 21224 ASSERT_EQUAL_128(0x0000000000000000, 0x00ff00000001807f, q18); 21225 ASSERT_EQUAL_128(0x8080808100ff0000, 0x00ff00000001807f, q19); 21226 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20); 21227 ASSERT_EQUAL_128(0x800080000000ffff, 0x0000000080007fff, q21); 21228 ASSERT_EQUAL_128(0x8000000080000001, 0x800000007fffffff, q22); 21229 ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23); 21230 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24); 21231 TEARDOWN(); 21232 } 21233 21234 21235 TEST(neon_uqshl_scalar) { 21236 SETUP(); 21237 21238 START(); 21239 21240 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0); 21241 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f); 21242 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001); 21243 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff); 21244 21245 __ Uqshl(b16, b0, b2); 21246 __ Uqshl(b17, b0, b3); 21247 __ Uqshl(b18, b1, b2); 21248 __ Uqshl(b19, b1, b3); 21249 __ Uqshl(h20, h0, h2); 21250 __ Uqshl(h21, h0, h3); 21251 __ Uqshl(h22, h1, h2); 21252 __ Uqshl(h23, h1, h3); 21253 __ Uqshl(s24, s0, s2); 21254 __ Uqshl(s25, s0, s3); 21255 __ Uqshl(s26, s1, s2); 21256 __ Uqshl(s27, s1, s3); 21257 __ Uqshl(d28, d0, d2); 21258 __ Uqshl(d29, d0, d3); 21259 __ Uqshl(d30, d1, d2); 21260 __ Uqshl(d31, d1, d3); 21261 21262 END(); 21263 21264 RUN(); 21265 21266 ASSERT_EQUAL_128(0, 0xff, q16); 21267 ASSERT_EQUAL_128(0, 0x78, q17); 21268 ASSERT_EQUAL_128(0, 0xfe, q18); 21269 ASSERT_EQUAL_128(0, 0x3f, q19); 21270 ASSERT_EQUAL_128(0, 0xffff, q20); 21271 ASSERT_EQUAL_128(0, 0x7878, q21); 21272 ASSERT_EQUAL_128(0, 0xfefe, q22); 21273 ASSERT_EQUAL_128(0, 0x3fbf, q23); 21274 ASSERT_EQUAL_128(0, 0xffffffff, q24); 21275 ASSERT_EQUAL_128(0, 0x78007878, q25); 21276 ASSERT_EQUAL_128(0, 0xfffefefe, q26); 21277 ASSERT_EQUAL_128(0, 0x3fffbfbf, q27); 21278 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28); 21279 ASSERT_EQUAL_128(0, 0x7800000078007878, q29); 21280 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30); 21281 ASSERT_EQUAL_128(0, 0x3fffffffbfffbfbf, q31); 21282 21283 TEARDOWN(); 21284 } 21285 21286 21287 TEST(neon_sqshl_scalar) { 21288 SETUP(); 21289 21290 START(); 21291 21292 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf); 21293 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040); 21294 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001); 21295 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff); 21296 21297 __ Sqshl(b16, b0, b2); 21298 __ Sqshl(b17, b0, b3); 21299 __ Sqshl(b18, b1, b2); 21300 __ Sqshl(b19, b1, b3); 21301 __ Sqshl(h20, h0, h2); 21302 __ Sqshl(h21, h0, h3); 21303 __ Sqshl(h22, h1, h2); 21304 __ Sqshl(h23, h1, h3); 21305 __ Sqshl(s24, s0, s2); 21306 __ Sqshl(s25, s0, s3); 21307 __ Sqshl(s26, s1, s2); 21308 __ Sqshl(s27, s1, s3); 21309 __ Sqshl(d28, d0, d2); 21310 __ Sqshl(d29, d0, d3); 21311 __ Sqshl(d30, d1, d2); 21312 __ Sqshl(d31, d1, d3); 21313 21314 END(); 21315 21316 RUN(); 21317 21318 ASSERT_EQUAL_128(0, 0x80, q16); 21319 ASSERT_EQUAL_128(0, 0xdf, q17); 21320 ASSERT_EQUAL_128(0, 0x7f, q18); 21321 ASSERT_EQUAL_128(0, 0x20, q19); 21322 ASSERT_EQUAL_128(0, 0x8000, q20); 21323 ASSERT_EQUAL_128(0, 0xdfdf, q21); 21324 ASSERT_EQUAL_128(0, 0x7fff, q22); 21325 ASSERT_EQUAL_128(0, 0x2020, q23); 21326 ASSERT_EQUAL_128(0, 0x80000000, q24); 21327 ASSERT_EQUAL_128(0, 0xdfffdfdf, q25); 21328 ASSERT_EQUAL_128(0, 0x7fffffff, q26); 21329 ASSERT_EQUAL_128(0, 0x20002020, q27); 21330 ASSERT_EQUAL_128(0, 0x8000000000000000, q28); 21331 ASSERT_EQUAL_128(0, 0xdfffffffdfffdfdf, q29); 21332 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30); 21333 ASSERT_EQUAL_128(0, 0x2000000020002020, q31); 21334 21335 TEARDOWN(); 21336 } 21337 21338 21339 TEST(neon_urshl_scalar) { 21340 SETUP(); 21341 21342 START(); 21343 21344 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0); 21345 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f); 21346 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001); 21347 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff); 21348 21349 __ Urshl(d28, d0, d2); 21350 __ Urshl(d29, d0, d3); 21351 __ Urshl(d30, d1, d2); 21352 __ Urshl(d31, d1, d3); 21353 21354 END(); 21355 21356 RUN(); 21357 21358 ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q28); 21359 ASSERT_EQUAL_128(0, 0x7800000078007878, q29); 21360 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30); 21361 ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31); 21362 21363 TEARDOWN(); 21364 } 21365 21366 21367 TEST(neon_srshl_scalar) { 21368 SETUP(); 21369 21370 START(); 21371 21372 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf); 21373 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040); 21374 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001); 21375 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff); 21376 21377 __ Srshl(d28, d0, d2); 21378 __ Srshl(d29, d0, d3); 21379 __ Srshl(d30, d1, d2); 21380 __ Srshl(d31, d1, d3); 21381 21382 END(); 21383 21384 RUN(); 21385 21386 ASSERT_EQUAL_128(0, 0x7fffffff7fff7f7e, q28); 21387 ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29); 21388 ASSERT_EQUAL_128(0, 0x8000000080008080, q30); 21389 ASSERT_EQUAL_128(0, 0x2000000020002020, q31); 21390 21391 TEARDOWN(); 21392 } 21393 21394 21395 TEST(neon_uqrshl_scalar) { 21396 SETUP(); 21397 21398 START(); 21399 21400 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0); 21401 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f); 21402 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001); 21403 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff); 21404 21405 __ Uqrshl(b16, b0, b2); 21406 __ Uqrshl(b17, b0, b3); 21407 __ Uqrshl(b18, b1, b2); 21408 __ Uqrshl(b19, b1, b3); 21409 __ Uqrshl(h20, h0, h2); 21410 __ Uqrshl(h21, h0, h3); 21411 __ Uqrshl(h22, h1, h2); 21412 __ Uqrshl(h23, h1, h3); 21413 __ Uqrshl(s24, s0, s2); 21414 __ Uqrshl(s25, s0, s3); 21415 __ Uqrshl(s26, s1, s2); 21416 __ Uqrshl(s27, s1, s3); 21417 __ Uqrshl(d28, d0, d2); 21418 __ Uqrshl(d29, d0, d3); 21419 __ Uqrshl(d30, d1, d2); 21420 __ Uqrshl(d31, d1, d3); 21421 21422 END(); 21423 21424 RUN(); 21425 21426 ASSERT_EQUAL_128(0, 0xff, q16); 21427 ASSERT_EQUAL_128(0, 0x78, q17); 21428 ASSERT_EQUAL_128(0, 0xfe, q18); 21429 ASSERT_EQUAL_128(0, 0x40, q19); 21430 ASSERT_EQUAL_128(0, 0xffff, q20); 21431 ASSERT_EQUAL_128(0, 0x7878, q21); 21432 ASSERT_EQUAL_128(0, 0xfefe, q22); 21433 ASSERT_EQUAL_128(0, 0x3fc0, q23); 21434 ASSERT_EQUAL_128(0, 0xffffffff, q24); 21435 ASSERT_EQUAL_128(0, 0x78007878, q25); 21436 ASSERT_EQUAL_128(0, 0xfffefefe, q26); 21437 ASSERT_EQUAL_128(0, 0x3fffbfc0, q27); 21438 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28); 21439 ASSERT_EQUAL_128(0, 0x7800000078007878, q29); 21440 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30); 21441 ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31); 21442 21443 TEARDOWN(); 21444 } 21445 21446 21447 TEST(neon_sqrshl_scalar) { 21448 SETUP(); 21449 21450 START(); 21451 21452 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf); 21453 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040); 21454 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001); 21455 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff); 21456 21457 __ Sqrshl(b16, b0, b2); 21458 __ Sqrshl(b17, b0, b3); 21459 __ Sqrshl(b18, b1, b2); 21460 __ Sqrshl(b19, b1, b3); 21461 __ Sqrshl(h20, h0, h2); 21462 __ Sqrshl(h21, h0, h3); 21463 __ Sqrshl(h22, h1, h2); 21464 __ Sqrshl(h23, h1, h3); 21465 __ Sqrshl(s24, s0, s2); 21466 __ Sqrshl(s25, s0, s3); 21467 __ Sqrshl(s26, s1, s2); 21468 __ Sqrshl(s27, s1, s3); 21469 __ Sqrshl(d28, d0, d2); 21470 __ Sqrshl(d29, d0, d3); 21471 __ Sqrshl(d30, d1, d2); 21472 __ Sqrshl(d31, d1, d3); 21473 21474 END(); 21475 21476 RUN(); 21477 21478 ASSERT_EQUAL_128(0, 0x80, q16); 21479 ASSERT_EQUAL_128(0, 0xe0, q17); 21480 ASSERT_EQUAL_128(0, 0x7f, q18); 21481 ASSERT_EQUAL_128(0, 0x20, q19); 21482 ASSERT_EQUAL_128(0, 0x8000, q20); 21483 ASSERT_EQUAL_128(0, 0xdfe0, q21); 21484 ASSERT_EQUAL_128(0, 0x7fff, q22); 21485 ASSERT_EQUAL_128(0, 0x2020, q23); 21486 ASSERT_EQUAL_128(0, 0x80000000, q24); 21487 ASSERT_EQUAL_128(0, 0xdfffdfe0, q25); 21488 ASSERT_EQUAL_128(0, 0x7fffffff, q26); 21489 ASSERT_EQUAL_128(0, 0x20002020, q27); 21490 ASSERT_EQUAL_128(0, 0x8000000000000000, q28); 21491 ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29); 21492 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30); 21493 ASSERT_EQUAL_128(0, 0x2000000020002020, q31); 21494 21495 TEARDOWN(); 21496 } 21497 21498 21499 TEST(neon_uqadd_scalar) { 21500 SETUP(); 21501 21502 START(); 21503 21504 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0); 21505 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f); 21506 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010); 21507 21508 __ Uqadd(b16, b0, b0); 21509 __ Uqadd(b17, b1, b1); 21510 __ Uqadd(b18, b2, b2); 21511 __ Uqadd(h19, h0, h0); 21512 __ Uqadd(h20, h1, h1); 21513 __ Uqadd(h21, h2, h2); 21514 __ Uqadd(s22, s0, s0); 21515 __ Uqadd(s23, s1, s1); 21516 __ Uqadd(s24, s2, s2); 21517 __ Uqadd(d25, d0, d0); 21518 __ Uqadd(d26, d1, d1); 21519 __ Uqadd(d27, d2, d2); 21520 21521 END(); 21522 21523 RUN(); 21524 21525 ASSERT_EQUAL_128(0, 0xff, q16); 21526 ASSERT_EQUAL_128(0, 0xfe, q17); 21527 ASSERT_EQUAL_128(0, 0x20, q18); 21528 ASSERT_EQUAL_128(0, 0xffff, q19); 21529 ASSERT_EQUAL_128(0, 0xfefe, q20); 21530 ASSERT_EQUAL_128(0, 0x2020, q21); 21531 ASSERT_EQUAL_128(0, 0xffffffff, q22); 21532 ASSERT_EQUAL_128(0, 0xfffefefe, q23); 21533 ASSERT_EQUAL_128(0, 0x20002020, q24); 21534 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25); 21535 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q26); 21536 ASSERT_EQUAL_128(0, 0x2000000020002020, q27); 21537 21538 TEARDOWN(); 21539 } 21540 21541 21542 TEST(neon_sqadd_scalar) { 21543 SETUP(); 21544 21545 START(); 21546 21547 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0x8000000180018181); 21548 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f); 21549 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010); 21550 21551 __ Sqadd(b16, b0, b0); 21552 __ Sqadd(b17, b1, b1); 21553 __ Sqadd(b18, b2, b2); 21554 __ Sqadd(h19, h0, h0); 21555 __ Sqadd(h20, h1, h1); 21556 __ Sqadd(h21, h2, h2); 21557 __ Sqadd(s22, s0, s0); 21558 __ Sqadd(s23, s1, s1); 21559 __ Sqadd(s24, s2, s2); 21560 __ Sqadd(d25, d0, d0); 21561 __ Sqadd(d26, d1, d1); 21562 __ Sqadd(d27, d2, d2); 21563 21564 END(); 21565 21566 RUN(); 21567 21568 ASSERT_EQUAL_128(0, 0x80, q16); 21569 ASSERT_EQUAL_128(0, 0x7f, q17); 21570 ASSERT_EQUAL_128(0, 0x20, q18); 21571 ASSERT_EQUAL_128(0, 0x8000, q19); 21572 ASSERT_EQUAL_128(0, 0x7fff, q20); 21573 ASSERT_EQUAL_128(0, 0x2020, q21); 21574 ASSERT_EQUAL_128(0, 0x80000000, q22); 21575 ASSERT_EQUAL_128(0, 0x7fffffff, q23); 21576 ASSERT_EQUAL_128(0, 0x20002020, q24); 21577 ASSERT_EQUAL_128(0, 0x8000000000000000, q25); 21578 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26); 21579 ASSERT_EQUAL_128(0, 0x2000000020002020, q27); 21580 21581 TEARDOWN(); 21582 } 21583 21584 21585 TEST(neon_uqsub_scalar) { 21586 SETUP(); 21587 21588 START(); 21589 21590 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0); 21591 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f); 21592 21593 __ Uqsub(b16, b0, b0); 21594 __ Uqsub(b17, b0, b1); 21595 __ Uqsub(b18, b1, b0); 21596 __ Uqsub(h19, h0, h0); 21597 __ Uqsub(h20, h0, h1); 21598 __ Uqsub(h21, h1, h0); 21599 __ Uqsub(s22, s0, s0); 21600 __ Uqsub(s23, s0, s1); 21601 __ Uqsub(s24, s1, s0); 21602 __ Uqsub(d25, d0, d0); 21603 __ Uqsub(d26, d0, d1); 21604 __ Uqsub(d27, d1, d0); 21605 21606 END(); 21607 21608 RUN(); 21609 21610 ASSERT_EQUAL_128(0, 0, q16); 21611 ASSERT_EQUAL_128(0, 0x71, q17); 21612 ASSERT_EQUAL_128(0, 0, q18); 21613 21614 ASSERT_EQUAL_128(0, 0, q19); 21615 ASSERT_EQUAL_128(0, 0x7171, q20); 21616 ASSERT_EQUAL_128(0, 0, q21); 21617 21618 ASSERT_EQUAL_128(0, 0, q22); 21619 ASSERT_EQUAL_128(0, 0x70017171, q23); 21620 ASSERT_EQUAL_128(0, 0, q24); 21621 21622 ASSERT_EQUAL_128(0, 0, q25); 21623 ASSERT_EQUAL_128(0, 0x7000000170017171, q26); 21624 ASSERT_EQUAL_128(0, 0, q27); 21625 21626 TEARDOWN(); 21627 } 21628 21629 21630 TEST(neon_sqsub_scalar) { 21631 SETUP(); 21632 21633 START(); 21634 21635 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0); 21636 __ Movi(v1.V2D(), 0x5555555555555555, 0x7eeeeeee7eee7e7e); 21637 21638 __ Sqsub(b16, b0, b0); 21639 __ Sqsub(b17, b0, b1); 21640 __ Sqsub(b18, b1, b0); 21641 __ Sqsub(h19, h0, h0); 21642 __ Sqsub(h20, h0, h1); 21643 __ Sqsub(h21, h1, h0); 21644 __ Sqsub(s22, s0, s0); 21645 __ Sqsub(s23, s0, s1); 21646 __ Sqsub(s24, s1, s0); 21647 __ Sqsub(d25, d0, d0); 21648 __ Sqsub(d26, d0, d1); 21649 __ Sqsub(d27, d1, d0); 21650 21651 END(); 21652 21653 RUN(); 21654 21655 ASSERT_EQUAL_128(0, 0, q16); 21656 ASSERT_EQUAL_128(0, 0x80, q17); 21657 ASSERT_EQUAL_128(0, 0x7f, q18); 21658 21659 ASSERT_EQUAL_128(0, 0, q19); 21660 ASSERT_EQUAL_128(0, 0x8000, q20); 21661 ASSERT_EQUAL_128(0, 0x7fff, q21); 21662 21663 ASSERT_EQUAL_128(0, 0, q22); 21664 ASSERT_EQUAL_128(0, 0x80000000, q23); 21665 ASSERT_EQUAL_128(0, 0x7fffffff, q24); 21666 21667 ASSERT_EQUAL_128(0, 0, q25); 21668 ASSERT_EQUAL_128(0, 0x8000000000000000, q26); 21669 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27); 21670 21671 TEARDOWN(); 21672 } 21673 21674 21675 TEST(neon_fmla_fmls) { 21676 SETUP(); 21677 21678 START(); 21679 __ Movi(v0.V2D(), 0x3f80000040000000, 0x4100000000000000); 21680 __ Movi(v1.V2D(), 0x400000003f800000, 0x000000003f800000); 21681 __ Movi(v2.V2D(), 0x3f800000ffffffff, 0x7f800000ff800000); 21682 __ Mov(v16.V16B(), v0.V16B()); 21683 __ Mov(v17.V16B(), v0.V16B()); 21684 __ Mov(v18.V16B(), v0.V16B()); 21685 __ Mov(v19.V16B(), v0.V16B()); 21686 __ Mov(v20.V16B(), v0.V16B()); 21687 __ Mov(v21.V16B(), v0.V16B()); 21688 21689 __ Fmla(v16.V2S(), v1.V2S(), v2.V2S()); 21690 __ Fmla(v17.V4S(), v1.V4S(), v2.V4S()); 21691 __ Fmla(v18.V2D(), v1.V2D(), v2.V2D()); 21692 __ Fmls(v19.V2S(), v1.V2S(), v2.V2S()); 21693 __ Fmls(v20.V4S(), v1.V4S(), v2.V4S()); 21694 __ Fmls(v21.V2D(), v1.V2D(), v2.V2D()); 21695 END(); 21696 21697 RUN(); 21698 21699 ASSERT_EQUAL_128(0x0000000000000000, 0x7fc00000ff800000, q16); 21700 ASSERT_EQUAL_128(0x40400000ffffffff, 0x7fc00000ff800000, q17); 21701 ASSERT_EQUAL_128(0x3f9800015f8003f7, 0x41000000000000fe, q18); 21702 ASSERT_EQUAL_128(0x0000000000000000, 0x7fc000007f800000, q19); 21703 ASSERT_EQUAL_128(0xbf800000ffffffff, 0x7fc000007f800000, q20); 21704 ASSERT_EQUAL_128(0xbf8000023f0007ee, 0x40fffffffffffe04, q21); 21705 21706 TEARDOWN(); 21707 } 21708 21709 21710 TEST(neon_fmulx_scalar) { 21711 SETUP(); 21712 21713 START(); 21714 __ Fmov(s0, 2.0); 21715 __ Fmov(s1, 0.5); 21716 __ Fmov(s2, 0.0); 21717 __ Fmov(s3, -0.0); 21718 __ Fmov(s4, kFP32PositiveInfinity); 21719 __ Fmov(s5, kFP32NegativeInfinity); 21720 __ Fmulx(s16, s0, s1); 21721 __ Fmulx(s17, s2, s4); 21722 __ Fmulx(s18, s2, s5); 21723 __ Fmulx(s19, s3, s4); 21724 __ Fmulx(s20, s3, s5); 21725 21726 __ Fmov(d21, 2.0); 21727 __ Fmov(d22, 0.5); 21728 __ Fmov(d23, 0.0); 21729 __ Fmov(d24, -0.0); 21730 __ Fmov(d25, kFP64PositiveInfinity); 21731 __ Fmov(d26, kFP64NegativeInfinity); 21732 __ Fmulx(d27, d21, d22); 21733 __ Fmulx(d28, d23, d25); 21734 __ Fmulx(d29, d23, d26); 21735 __ Fmulx(d30, d24, d25); 21736 __ Fmulx(d31, d24, d26); 21737 END(); 21738 21739 RUN(); 21740 21741 ASSERT_EQUAL_FP32(1.0, s16); 21742 ASSERT_EQUAL_FP32(2.0, s17); 21743 ASSERT_EQUAL_FP32(-2.0, s18); 21744 ASSERT_EQUAL_FP32(-2.0, s19); 21745 ASSERT_EQUAL_FP32(2.0, s20); 21746 ASSERT_EQUAL_FP64(1.0, d27); 21747 ASSERT_EQUAL_FP64(2.0, d28); 21748 ASSERT_EQUAL_FP64(-2.0, d29); 21749 ASSERT_EQUAL_FP64(-2.0, d30); 21750 ASSERT_EQUAL_FP64(2.0, d31); 21751 21752 TEARDOWN(); 21753 } 21754 21755 21756 // We currently disable tests for CRC32 instructions when running natively. 21757 // Support for this family of instruction is optional, and so native platforms 21758 // may simply fail to execute the test. 21759 // TODO: Run the test on native platforms where the CRC32 instructions are 21760 // available. 21761 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 21762 TEST(crc32b) { 21763 SETUP(); 21764 START(); 21765 21766 __ Mov(w0, 0); 21767 __ Mov(w1, 0); 21768 __ Crc32b(w10, w0, w1); 21769 21770 __ Mov(w0, 0x1); 21771 __ Mov(w1, 0x138); 21772 __ Crc32b(w11, w0, w1); 21773 21774 __ Mov(w0, 0x1); 21775 __ Mov(w1, 0x38); 21776 __ Crc32b(w12, w0, w1); 21777 21778 __ Mov(w0, 0); 21779 __ Mov(w1, 128); 21780 __ Crc32b(w13, w0, w1); 21781 21782 __ Mov(w0, UINT32_MAX); 21783 __ Mov(w1, 255); 21784 __ Crc32b(w14, w0, w1); 21785 21786 __ Mov(w0, 0x00010001); 21787 __ Mov(w1, 0x10001000); 21788 __ Crc32b(w15, w0, w1); 21789 21790 END(); 21791 RUN(); 21792 21793 ASSERT_EQUAL_64(0x0, x10); 21794 ASSERT_EQUAL_64(0x5f058808, x11); 21795 ASSERT_EQUAL_64(0x5f058808, x12); 21796 ASSERT_EQUAL_64(0xedb88320, x13); 21797 ASSERT_EQUAL_64(0x00ffffff, x14); 21798 ASSERT_EQUAL_64(0x77073196, x15); 21799 21800 TEARDOWN(); 21801 } 21802 21803 21804 TEST(crc32h) { 21805 SETUP(); 21806 START(); 21807 21808 __ Mov(w0, 0); 21809 __ Mov(w1, 0); 21810 __ Crc32h(w10, w0, w1); 21811 21812 __ Mov(w0, 0x1); 21813 __ Mov(w1, 0x10038); 21814 __ Crc32h(w11, w0, w1); 21815 21816 __ Mov(w0, 0x1); 21817 __ Mov(w1, 0x38); 21818 __ Crc32h(w12, w0, w1); 21819 21820 __ Mov(w0, 0); 21821 __ Mov(w1, 128); 21822 __ Crc32h(w13, w0, w1); 21823 21824 __ Mov(w0, UINT32_MAX); 21825 __ Mov(w1, 255); 21826 __ Crc32h(w14, w0, w1); 21827 21828 __ Mov(w0, 0x00010001); 21829 __ Mov(w1, 0x10001000); 21830 __ Crc32h(w15, w0, w1); 21831 21832 END(); 21833 RUN(); 21834 21835 ASSERT_EQUAL_64(0x0, x10); 21836 ASSERT_EQUAL_64(0x0e848dba, x11); 21837 ASSERT_EQUAL_64(0x0e848dba, x12); 21838 ASSERT_EQUAL_64(0x3b83984b, x13); 21839 ASSERT_EQUAL_64(0x2d021072, x14); 21840 ASSERT_EQUAL_64(0x04ac2124, x15); 21841 21842 TEARDOWN(); 21843 } 21844 21845 21846 TEST(crc32w) { 21847 SETUP(); 21848 START(); 21849 21850 __ Mov(w0, 0); 21851 __ Mov(w1, 0); 21852 __ Crc32w(w10, w0, w1); 21853 21854 __ Mov(w0, 0x1); 21855 __ Mov(w1, 0x80000031); 21856 __ Crc32w(w11, w0, w1); 21857 21858 __ Mov(w0, 0); 21859 __ Mov(w1, 128); 21860 __ Crc32w(w13, w0, w1); 21861 21862 __ Mov(w0, UINT32_MAX); 21863 __ Mov(w1, 255); 21864 __ Crc32w(w14, w0, w1); 21865 21866 __ Mov(w0, 0x00010001); 21867 __ Mov(w1, 0x10001000); 21868 __ Crc32w(w15, w0, w1); 21869 21870 END(); 21871 RUN(); 21872 21873 ASSERT_EQUAL_64(0x0, x10); 21874 ASSERT_EQUAL_64(0x1d937b81, x11); 21875 ASSERT_EQUAL_64(0xed59b63b, x13); 21876 ASSERT_EQUAL_64(0x00be2612, x14); 21877 ASSERT_EQUAL_64(0xa036e530, x15); 21878 21879 TEARDOWN(); 21880 } 21881 21882 21883 TEST(crc32x) { 21884 SETUP(); 21885 START(); 21886 21887 __ Mov(w0, 0); 21888 __ Mov(x1, 0); 21889 __ Crc32x(w10, w0, x1); 21890 21891 __ Mov(w0, 0x1); 21892 __ Mov(x1, UINT64_C(0x0000000800000031)); 21893 __ Crc32x(w11, w0, x1); 21894 21895 __ Mov(w0, 0); 21896 __ Mov(x1, 128); 21897 __ Crc32x(w13, w0, x1); 21898 21899 __ Mov(w0, UINT32_MAX); 21900 __ Mov(x1, 255); 21901 __ Crc32x(w14, w0, x1); 21902 21903 __ Mov(w0, 0x00010001); 21904 __ Mov(x1, UINT64_C(0x1000100000000000)); 21905 __ Crc32x(w15, w0, x1); 21906 21907 END(); 21908 RUN(); 21909 21910 ASSERT_EQUAL_64(0x0, x10); 21911 ASSERT_EQUAL_64(0x40797b92, x11); 21912 ASSERT_EQUAL_64(0x533b85da, x13); 21913 ASSERT_EQUAL_64(0xbc962670, x14); 21914 ASSERT_EQUAL_64(0x0667602f, x15); 21915 21916 TEARDOWN(); 21917 } 21918 21919 21920 TEST(crc32cb) { 21921 SETUP(); 21922 START(); 21923 21924 __ Mov(w0, 0); 21925 __ Mov(w1, 0); 21926 __ Crc32cb(w10, w0, w1); 21927 21928 __ Mov(w0, 0x1); 21929 __ Mov(w1, 0x138); 21930 __ Crc32cb(w11, w0, w1); 21931 21932 __ Mov(w0, 0x1); 21933 __ Mov(w1, 0x38); 21934 __ Crc32cb(w12, w0, w1); 21935 21936 __ Mov(w0, 0); 21937 __ Mov(w1, 128); 21938 __ Crc32cb(w13, w0, w1); 21939 21940 __ Mov(w0, UINT32_MAX); 21941 __ Mov(w1, 255); 21942 __ Crc32cb(w14, w0, w1); 21943 21944 __ Mov(w0, 0x00010001); 21945 __ Mov(w1, 0x10001000); 21946 __ Crc32cb(w15, w0, w1); 21947 21948 END(); 21949 RUN(); 21950 21951 ASSERT_EQUAL_64(0x0, x10); 21952 ASSERT_EQUAL_64(0x4851927d, x11); 21953 ASSERT_EQUAL_64(0x4851927d, x12); 21954 ASSERT_EQUAL_64(0x82f63b78, x13); 21955 ASSERT_EQUAL_64(0x00ffffff, x14); 21956 ASSERT_EQUAL_64(0xf26b8203, x15); 21957 21958 TEARDOWN(); 21959 } 21960 21961 21962 TEST(crc32ch) { 21963 SETUP(); 21964 START(); 21965 21966 __ Mov(w0, 0); 21967 __ Mov(w1, 0); 21968 __ Crc32ch(w10, w0, w1); 21969 21970 __ Mov(w0, 0x1); 21971 __ Mov(w1, 0x10038); 21972 __ Crc32ch(w11, w0, w1); 21973 21974 __ Mov(w0, 0x1); 21975 __ Mov(w1, 0x38); 21976 __ Crc32ch(w12, w0, w1); 21977 21978 __ Mov(w0, 0); 21979 __ Mov(w1, 128); 21980 __ Crc32ch(w13, w0, w1); 21981 21982 __ Mov(w0, UINT32_MAX); 21983 __ Mov(w1, 255); 21984 __ Crc32ch(w14, w0, w1); 21985 21986 __ Mov(w0, 0x00010001); 21987 __ Mov(w1, 0x10001000); 21988 __ Crc32ch(w15, w0, w1); 21989 21990 END(); 21991 RUN(); 21992 21993 ASSERT_EQUAL_64(0x0, x10); 21994 ASSERT_EQUAL_64(0xcef8494c, x11); 21995 ASSERT_EQUAL_64(0xcef8494c, x12); 21996 ASSERT_EQUAL_64(0xfbc3faf9, x13); 21997 ASSERT_EQUAL_64(0xad7dacae, x14); 21998 ASSERT_EQUAL_64(0x03fc5f19, x15); 21999 22000 TEARDOWN(); 22001 } 22002 22003 22004 TEST(crc32cw) { 22005 SETUP(); 22006 START(); 22007 22008 __ Mov(w0, 0); 22009 __ Mov(w1, 0); 22010 __ Crc32cw(w10, w0, w1); 22011 22012 __ Mov(w0, 0x1); 22013 __ Mov(w1, 0x80000031); 22014 __ Crc32cw(w11, w0, w1); 22015 22016 __ Mov(w0, 0); 22017 __ Mov(w1, 128); 22018 __ Crc32cw(w13, w0, w1); 22019 22020 __ Mov(w0, UINT32_MAX); 22021 __ Mov(w1, 255); 22022 __ Crc32cw(w14, w0, w1); 22023 22024 __ Mov(w0, 0x00010001); 22025 __ Mov(w1, 0x10001000); 22026 __ Crc32cw(w15, w0, w1); 22027 22028 END(); 22029 RUN(); 22030 22031 ASSERT_EQUAL_64(0x0, x10); 22032 ASSERT_EQUAL_64(0xbcb79ece, x11); 22033 ASSERT_EQUAL_64(0x52a0c93f, x13); 22034 ASSERT_EQUAL_64(0x9f9b5c7a, x14); 22035 ASSERT_EQUAL_64(0xae1b882a, x15); 22036 22037 TEARDOWN(); 22038 } 22039 22040 22041 TEST(crc32cx) { 22042 SETUP(); 22043 START(); 22044 22045 __ Mov(w0, 0); 22046 __ Mov(x1, 0); 22047 __ Crc32cx(w10, w0, x1); 22048 22049 __ Mov(w0, 0x1); 22050 __ Mov(x1, UINT64_C(0x0000000800000031)); 22051 __ Crc32cx(w11, w0, x1); 22052 22053 __ Mov(w0, 0); 22054 __ Mov(x1, 128); 22055 __ Crc32cx(w13, w0, x1); 22056 22057 __ Mov(w0, UINT32_MAX); 22058 __ Mov(x1, 255); 22059 __ Crc32cx(w14, w0, x1); 22060 22061 __ Mov(w0, 0x00010001); 22062 __ Mov(x1, UINT64_C(0x1000100000000000)); 22063 __ Crc32cx(w15, w0, x1); 22064 22065 END(); 22066 RUN(); 22067 22068 ASSERT_EQUAL_64(0x0, x10); 22069 ASSERT_EQUAL_64(0x7f320fcb, x11); 22070 ASSERT_EQUAL_64(0x34019664, x13); 22071 ASSERT_EQUAL_64(0x6cc27dd0, x14); 22072 ASSERT_EQUAL_64(0xc6f0acdb, x15); 22073 22074 TEARDOWN(); 22075 } 22076 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64 22077 22078 22079 TEST(neon_fabd_scalar) { 22080 SETUP(); 22081 22082 START(); 22083 __ Fmov(s0, 2.0); 22084 __ Fmov(s1, 0.5); 22085 __ Fmov(s2, 0.0); 22086 __ Fmov(s3, -0.0); 22087 __ Fmov(s4, kFP32PositiveInfinity); 22088 __ Fmov(s5, kFP32NegativeInfinity); 22089 __ Fabd(s16, s1, s0); 22090 __ Fabd(s17, s2, s3); 22091 __ Fabd(s18, s2, s5); 22092 __ Fabd(s19, s3, s4); 22093 __ Fabd(s20, s3, s5); 22094 22095 __ Fmov(d21, 2.0); 22096 __ Fmov(d22, 0.5); 22097 __ Fmov(d23, 0.0); 22098 __ Fmov(d24, -0.0); 22099 __ Fmov(d25, kFP64PositiveInfinity); 22100 __ Fmov(d26, kFP64NegativeInfinity); 22101 __ Fabd(d27, d21, d22); 22102 __ Fabd(d28, d23, d24); 22103 __ Fabd(d29, d23, d26); 22104 __ Fabd(d30, d24, d25); 22105 __ Fabd(d31, d24, d26); 22106 END(); 22107 22108 RUN(); 22109 22110 ASSERT_EQUAL_FP32(1.5, s16); 22111 ASSERT_EQUAL_FP32(0.0, s17); 22112 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s18); 22113 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s19); 22114 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s20); 22115 ASSERT_EQUAL_FP64(1.5, d27); 22116 ASSERT_EQUAL_FP64(0.0, d28); 22117 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d29); 22118 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d30); 22119 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d31); 22120 22121 TEARDOWN(); 22122 } 22123 22124 22125 TEST(neon_faddp_scalar) { 22126 SETUP(); 22127 22128 START(); 22129 __ Movi(d0, 0x3f80000040000000); 22130 __ Movi(d1, 0xff8000007f800000); 22131 __ Movi(d2, 0x0000000080000000); 22132 __ Faddp(s0, v0.V2S()); 22133 __ Faddp(s1, v1.V2S()); 22134 __ Faddp(s2, v2.V2S()); 22135 22136 __ Movi(v3.V2D(), 0xc000000000000000, 0x4000000000000000); 22137 __ Movi(v4.V2D(), 0xfff8000000000000, 0x7ff8000000000000); 22138 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000); 22139 __ Faddp(d3, v3.V2D()); 22140 __ Faddp(d4, v4.V2D()); 22141 __ Faddp(d5, v5.V2D()); 22142 END(); 22143 22144 RUN(); 22145 22146 ASSERT_EQUAL_FP32(3.0, s0); 22147 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s1); 22148 ASSERT_EQUAL_FP32(0.0, s2); 22149 ASSERT_EQUAL_FP64(0.0, d3); 22150 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d4); 22151 ASSERT_EQUAL_FP64(0.0, d5); 22152 22153 TEARDOWN(); 22154 } 22155 22156 22157 TEST(neon_fmaxp_scalar) { 22158 SETUP(); 22159 22160 START(); 22161 __ Movi(d0, 0x3f80000040000000); 22162 __ Movi(d1, 0xff8000007f800000); 22163 __ Movi(d2, 0x7fc00000ff800000); 22164 __ Fmaxp(s0, v0.V2S()); 22165 __ Fmaxp(s1, v1.V2S()); 22166 __ Fmaxp(s2, v2.V2S()); 22167 22168 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000); 22169 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000); 22170 __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000); 22171 __ Fmaxp(d3, v3.V2D()); 22172 __ Fmaxp(d4, v4.V2D()); 22173 __ Fmaxp(d5, v5.V2D()); 22174 END(); 22175 22176 RUN(); 22177 22178 ASSERT_EQUAL_FP32(2.0, s0); 22179 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1); 22180 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2); 22181 ASSERT_EQUAL_FP64(2.0, d3); 22182 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4); 22183 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5); 22184 22185 TEARDOWN(); 22186 } 22187 22188 22189 TEST(neon_fmaxnmp_scalar) { 22190 SETUP(); 22191 22192 START(); 22193 __ Movi(d0, 0x3f80000040000000); 22194 __ Movi(d1, 0xff8000007f800000); 22195 __ Movi(d2, 0x7fc00000ff800000); 22196 __ Fmaxnmp(s0, v0.V2S()); 22197 __ Fmaxnmp(s1, v1.V2S()); 22198 __ Fmaxnmp(s2, v2.V2S()); 22199 22200 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000); 22201 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000); 22202 __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000); 22203 __ Fmaxnmp(d3, v3.V2D()); 22204 __ Fmaxnmp(d4, v4.V2D()); 22205 __ Fmaxnmp(d5, v5.V2D()); 22206 END(); 22207 22208 RUN(); 22209 22210 ASSERT_EQUAL_FP32(2.0, s0); 22211 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1); 22212 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2); 22213 ASSERT_EQUAL_FP64(2.0, d3); 22214 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4); 22215 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5); 22216 22217 TEARDOWN(); 22218 } 22219 22220 22221 TEST(neon_fminp_scalar) { 22222 SETUP(); 22223 22224 START(); 22225 __ Movi(d0, 0x3f80000040000000); 22226 __ Movi(d1, 0xff8000007f800000); 22227 __ Movi(d2, 0x7fc00000ff800000); 22228 __ Fminp(s0, v0.V2S()); 22229 __ Fminp(s1, v1.V2S()); 22230 __ Fminp(s2, v2.V2S()); 22231 22232 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000); 22233 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000); 22234 __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000); 22235 __ Fminp(d3, v3.V2D()); 22236 __ Fminp(d4, v4.V2D()); 22237 __ Fminp(d5, v5.V2D()); 22238 END(); 22239 22240 RUN(); 22241 22242 ASSERT_EQUAL_FP32(1.0, s0); 22243 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1); 22244 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2); 22245 ASSERT_EQUAL_FP64(1.0, d3); 22246 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4); 22247 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5); 22248 22249 TEARDOWN(); 22250 } 22251 22252 22253 TEST(neon_fminnmp_scalar) { 22254 SETUP(); 22255 22256 START(); 22257 __ Movi(d0, 0x3f80000040000000); 22258 __ Movi(d1, 0xff8000007f800000); 22259 __ Movi(d2, 0x7fc00000ff800000); 22260 __ Fminnmp(s0, v0.V2S()); 22261 __ Fminnmp(s1, v1.V2S()); 22262 __ Fminnmp(s2, v2.V2S()); 22263 22264 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000); 22265 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000); 22266 __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000); 22267 __ Fminnmp(d3, v3.V2D()); 22268 __ Fminnmp(d4, v4.V2D()); 22269 __ Fminnmp(d5, v5.V2D()); 22270 END(); 22271 22272 RUN(); 22273 22274 ASSERT_EQUAL_FP32(1.0, s0); 22275 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1); 22276 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2); 22277 ASSERT_EQUAL_FP64(1.0, d3); 22278 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4); 22279 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5); 22280 22281 TEARDOWN(); 22282 } 22283 22284 22285 TEST(neon_tbl) { 22286 SETUP(); 22287 22288 START(); 22289 __ Movi(v30.V2D(), 0xbf561e188b1280e9, 0xbd542b8cbd24e8e8); 22290 __ Movi(v31.V2D(), 0xb5e9883d2c88a46d, 0x12276d5b614c915e); 22291 __ Movi(v0.V2D(), 0xc45b7782bc5ecd72, 0x5dd4fe5a4bc6bf5e); 22292 __ Movi(v1.V2D(), 0x1e3254094bd1746a, 0xf099ecf50e861c80); 22293 22294 __ Movi(v4.V2D(), 0xf80c030100031f16, 0x00070504031201ff); 22295 __ Movi(v5.V2D(), 0x1f01001afc14202a, 0x2a081e1b0c02020c); 22296 __ Movi(v6.V2D(), 0x353f1a13022a2360, 0x2c464a00203a0a33); 22297 __ Movi(v7.V2D(), 0x64801a1c054cf30d, 0x793a2c052e213739); 22298 22299 __ Movi(v8.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842); 22300 __ Movi(v9.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443); 22301 __ Movi(v10.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2); 22302 __ Movi(v11.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8); 22303 __ Movi(v12.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b); 22304 __ Movi(v13.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669); 22305 __ Movi(v14.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682); 22306 __ Movi(v15.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd); 22307 22308 __ Tbl(v8.V16B(), v1.V16B(), v4.V16B()); 22309 __ Tbl(v9.V16B(), v0.V16B(), v1.V16B(), v5.V16B()); 22310 __ Tbl(v10.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B()); 22311 __ Tbl(v11.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B()); 22312 __ Tbl(v12.V8B(), v1.V16B(), v4.V8B()); 22313 __ Tbl(v13.V8B(), v0.V16B(), v1.V16B(), v5.V8B()); 22314 __ Tbl(v14.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B()); 22315 __ Tbl(v15.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B()); 22316 22317 __ Movi(v16.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842); 22318 __ Movi(v17.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443); 22319 __ Movi(v18.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2); 22320 __ Movi(v19.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8); 22321 __ Movi(v20.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b); 22322 __ Movi(v21.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669); 22323 __ Movi(v22.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682); 22324 __ Movi(v23.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd); 22325 22326 __ Tbx(v16.V16B(), v1.V16B(), v4.V16B()); 22327 __ Tbx(v17.V16B(), v0.V16B(), v1.V16B(), v5.V16B()); 22328 __ Tbx(v18.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B()); 22329 __ Tbx(v19.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B()); 22330 __ Tbx(v20.V8B(), v1.V16B(), v4.V8B()); 22331 __ Tbx(v21.V8B(), v0.V16B(), v1.V16B(), v5.V8B()); 22332 __ Tbx(v22.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B()); 22333 __ Tbx(v23.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B()); 22334 END(); 22335 22336 RUN(); 22337 22338 ASSERT_EQUAL_128(0x00090e1c800e0000, 0x80f0ecf50e001c00, v8); 22339 ASSERT_EQUAL_128(0x1ebf5ed100f50000, 0x0072324b82c6c682, v9); 22340 ASSERT_EQUAL_128(0x00005e4b4cd10e00, 0x0900005e80008800, v10); 22341 ASSERT_EQUAL_128(0x0000883d2b00001e, 0x00d1822b5bbff074, v11); 22342 ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e001c00, v12); 22343 ASSERT_EQUAL_128(0x0000000000000000, 0x0072324b82c6c682, v13); 22344 ASSERT_EQUAL_128(0x0000000000000000, 0x0900005e80008800, v14); 22345 ASSERT_EQUAL_128(0x0000000000000000, 0x00d1822b5bbff074, v15); 22346 22347 ASSERT_EQUAL_128(0xb7090e1c800e8f13, 0x80f0ecf50e961c42, v16); 22348 ASSERT_EQUAL_128(0x1ebf5ed1c6f547ec, 0x8e72324b82c6c682, v17); 22349 ASSERT_EQUAL_128(0x9bd25e4b4cd10e8f, 0x0943d05e802688d2, v18); 22350 ASSERT_EQUAL_128(0xc31d883d2b39301e, 0x1ed1822b5bbff074, v19); 22351 ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e011c3b, v20); 22352 ASSERT_EQUAL_128(0x0000000000000000, 0x2072324b82c6c682, v21); 22353 ASSERT_EQUAL_128(0x0000000000000000, 0x0946cd5e80ba8882, v22); 22354 ASSERT_EQUAL_128(0x0000000000000000, 0xe6d1822b5bbff074, v23); 22355 22356 TEARDOWN(); 22357 } 22358 22359 22360 TEST(regress_cmp_shift_imm) { 22361 SETUP(); 22362 22363 START(); 22364 22365 __ Mov(x0, 0x3d720c8d); 22366 __ Cmp(x0, Operand(0x3d720c8d)); 22367 22368 END(); 22369 RUN(); 22370 22371 ASSERT_EQUAL_NZCV(ZCFlag); 22372 22373 TEARDOWN(); 22374 } 22375 22376 22377 TEST(compute_address) { 22378 SETUP(); 22379 22380 START(); 22381 int64_t base_address = INT64_C(0x123000000abc); 22382 int64_t reg_offset = INT64_C(0x1087654321); 22383 Register base = x0; 22384 Register offset = x1; 22385 22386 __ Mov(base, base_address); 22387 __ Mov(offset, reg_offset); 22388 22389 22390 __ ComputeAddress(x2, MemOperand(base, 0)); 22391 __ ComputeAddress(x3, MemOperand(base, 8)); 22392 __ ComputeAddress(x4, MemOperand(base, -100)); 22393 22394 __ ComputeAddress(x5, MemOperand(base, offset)); 22395 __ ComputeAddress(x6, MemOperand(base, offset, LSL, 2)); 22396 __ ComputeAddress(x7, MemOperand(base, offset, LSL, 4)); 22397 __ ComputeAddress(x8, MemOperand(base, offset, LSL, 8)); 22398 22399 __ ComputeAddress(x9, MemOperand(base, offset, SXTW)); 22400 __ ComputeAddress(x10, MemOperand(base, offset, UXTW, 1)); 22401 __ ComputeAddress(x11, MemOperand(base, offset, SXTW, 2)); 22402 __ ComputeAddress(x12, MemOperand(base, offset, UXTW, 3)); 22403 22404 END(); 22405 22406 RUN(); 22407 22408 ASSERT_EQUAL_64(base_address, base); 22409 22410 ASSERT_EQUAL_64(INT64_C(0x123000000abc), x2); 22411 ASSERT_EQUAL_64(INT64_C(0x123000000ac4), x3); 22412 ASSERT_EQUAL_64(INT64_C(0x123000000a58), x4); 22413 22414 ASSERT_EQUAL_64(INT64_C(0x124087654ddd), x5); 22415 ASSERT_EQUAL_64(INT64_C(0x12721d951740), x6); 22416 ASSERT_EQUAL_64(INT64_C(0x133876543ccc), x7); 22417 ASSERT_EQUAL_64(INT64_C(0x22b765432bbc), x8); 22418 22419 ASSERT_EQUAL_64(INT64_C(0x122f87654ddd), x9); 22420 ASSERT_EQUAL_64(INT64_C(0x12310eca90fe), x10); 22421 ASSERT_EQUAL_64(INT64_C(0x122e1d951740), x11); 22422 ASSERT_EQUAL_64(INT64_C(0x12343b2a23c4), x12); 22423 22424 TEARDOWN(); 22425 } 22426 22427 22428 TEST(far_branch_backward) { 22429 // Test that the MacroAssembler correctly resolves backward branches to labels 22430 // that are outside the immediate range of branch instructions. 22431 // Take into account that backward branches can reach one instruction further 22432 // than forward branches. 22433 const int overflow_size = 22434 kInstructionSize + 22435 std::max(Instruction::GetImmBranchForwardRange(TestBranchType), 22436 std::max(Instruction::GetImmBranchForwardRange( 22437 CompareBranchType), 22438 Instruction::GetImmBranchForwardRange(CondBranchType))); 22439 22440 SETUP(); 22441 START(); 22442 22443 Label done, fail; 22444 Label test_tbz, test_cbz, test_bcond; 22445 Label success_tbz, success_cbz, success_bcond; 22446 22447 __ Mov(x0, 0); 22448 __ Mov(x1, 1); 22449 __ Mov(x10, 0); 22450 22451 __ B(&test_tbz); 22452 __ Bind(&success_tbz); 22453 __ Orr(x0, x0, 1 << 0); 22454 __ B(&test_cbz); 22455 __ Bind(&success_cbz); 22456 __ Orr(x0, x0, 1 << 1); 22457 __ B(&test_bcond); 22458 __ Bind(&success_bcond); 22459 __ Orr(x0, x0, 1 << 2); 22460 22461 __ B(&done); 22462 22463 // Generate enough code to overflow the immediate range of the three types of 22464 // branches below. 22465 for (unsigned i = 0; i < overflow_size / kInstructionSize; ++i) { 22466 if (i % 100 == 0) { 22467 // If we do land in this code, we do not want to execute so many nops 22468 // before reaching the end of test (especially if tracing is activated). 22469 __ B(&fail); 22470 } else { 22471 __ Nop(); 22472 } 22473 } 22474 __ B(&fail); 22475 22476 __ Bind(&test_tbz); 22477 __ Tbz(x10, 7, &success_tbz); 22478 __ Bind(&test_cbz); 22479 __ Cbz(x10, &success_cbz); 22480 __ Bind(&test_bcond); 22481 __ Cmp(x10, 0); 22482 __ B(eq, &success_bcond); 22483 22484 // For each out-of-range branch instructions, at least two instructions should 22485 // have been generated. 22486 VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&test_tbz) >= 22487 7 * kInstructionSize); 22488 22489 __ Bind(&fail); 22490 __ Mov(x1, 0); 22491 __ Bind(&done); 22492 22493 END(); 22494 RUN(); 22495 22496 ASSERT_EQUAL_64(0x7, x0); 22497 ASSERT_EQUAL_64(0x1, x1); 22498 22499 TEARDOWN(); 22500 } 22501 22502 22503 TEST(single_veneer) { 22504 SETUP(); 22505 START(); 22506 22507 const int max_range = Instruction::GetImmBranchForwardRange(TestBranchType); 22508 22509 Label success, fail, done; 22510 22511 __ Mov(x0, 0); 22512 __ Mov(x1, 1); 22513 __ Mov(x10, 0); 22514 22515 __ Tbz(x10, 7, &success); 22516 22517 // Generate enough code to overflow the immediate range of the `tbz`. 22518 for (unsigned i = 0; i < max_range / kInstructionSize + 1; ++i) { 22519 if (i % 100 == 0) { 22520 // If we do land in this code, we do not want to execute so many nops 22521 // before reaching the end of test (especially if tracing is activated). 22522 __ B(&fail); 22523 } else { 22524 __ Nop(); 22525 } 22526 } 22527 __ B(&fail); 22528 22529 __ Bind(&success); 22530 __ Mov(x0, 1); 22531 22532 __ B(&done); 22533 __ Bind(&fail); 22534 __ Mov(x1, 0); 22535 __ Bind(&done); 22536 22537 END(); 22538 RUN(); 22539 22540 ASSERT_EQUAL_64(1, x0); 22541 ASSERT_EQUAL_64(1, x1); 22542 22543 TEARDOWN(); 22544 } 22545 22546 22547 TEST(simple_veneers) { 22548 // Test that the MacroAssembler correctly emits veneers for forward branches 22549 // to labels that are outside the immediate range of branch instructions. 22550 const int max_range = 22551 std::max(Instruction::GetImmBranchForwardRange(TestBranchType), 22552 std::max(Instruction::GetImmBranchForwardRange( 22553 CompareBranchType), 22554 Instruction::GetImmBranchForwardRange(CondBranchType))); 22555 22556 SETUP(); 22557 START(); 22558 22559 Label done, fail; 22560 Label test_tbz, test_cbz, test_bcond; 22561 Label success_tbz, success_cbz, success_bcond; 22562 22563 __ Mov(x0, 0); 22564 __ Mov(x1, 1); 22565 __ Mov(x10, 0); 22566 22567 __ Bind(&test_tbz); 22568 __ Tbz(x10, 7, &success_tbz); 22569 __ Bind(&test_cbz); 22570 __ Cbz(x10, &success_cbz); 22571 __ Bind(&test_bcond); 22572 __ Cmp(x10, 0); 22573 __ B(eq, &success_bcond); 22574 22575 // Generate enough code to overflow the immediate range of the three types of 22576 // branches below. 22577 for (unsigned i = 0; i < max_range / kInstructionSize + 1; ++i) { 22578 if (i % 100 == 0) { 22579 // If we do land in this code, we do not want to execute so many nops 22580 // before reaching the end of test (especially if tracing is activated). 22581 __ B(&fail); 22582 } else { 22583 __ Nop(); 22584 } 22585 } 22586 __ B(&fail); 22587 22588 __ Bind(&success_tbz); 22589 __ Orr(x0, x0, 1 << 0); 22590 __ B(&test_cbz); 22591 __ Bind(&success_cbz); 22592 __ Orr(x0, x0, 1 << 1); 22593 __ B(&test_bcond); 22594 __ Bind(&success_bcond); 22595 __ Orr(x0, x0, 1 << 2); 22596 22597 __ B(&done); 22598 __ Bind(&fail); 22599 __ Mov(x1, 0); 22600 __ Bind(&done); 22601 22602 END(); 22603 RUN(); 22604 22605 ASSERT_EQUAL_64(0x7, x0); 22606 ASSERT_EQUAL_64(0x1, x1); 22607 22608 TEARDOWN(); 22609 } 22610 22611 22612 TEST(veneers_stress) { 22613 SETUP(); 22614 START(); 22615 22616 // This is a code generation test stressing the emission of veneers. The code 22617 // generated is not executed. 22618 22619 Label target; 22620 const unsigned max_range = 22621 Instruction::GetImmBranchForwardRange(CondBranchType); 22622 const unsigned iterations = 22623 (max_range + max_range / 4) / (4 * kInstructionSize); 22624 for (unsigned i = 0; i < iterations; i++) { 22625 __ B(&target); 22626 __ B(eq, &target); 22627 __ Cbz(x0, &target); 22628 __ Tbz(x0, 0, &target); 22629 } 22630 __ Bind(&target); 22631 22632 END(); 22633 TEARDOWN(); 22634 } 22635 22636 22637 TEST(veneers_two_out_of_range) { 22638 SETUP(); 22639 START(); 22640 22641 // This is a code generation test. The code generated is not executed. 22642 // Ensure that the MacroAssembler considers unresolved branches to chose when 22643 // a veneer pool should be emitted. We generate two branches that go out of 22644 // range at the same offset. When the MacroAssembler decides to emit the 22645 // veneer pool, the emission of a first veneer should not cause the other 22646 // branch to go out of range. 22647 22648 int range_cbz = Instruction::GetImmBranchForwardRange(CompareBranchType); 22649 int range_tbz = Instruction::GetImmBranchForwardRange(TestBranchType); 22650 int max_target = static_cast<int>(masm.GetCursorOffset()) + range_cbz; 22651 22652 Label done; 22653 22654 // We use different labels to prevent the MacroAssembler from sharing veneers. 22655 Label target_cbz, target_tbz; 22656 22657 __ Cbz(x0, &target_cbz); 22658 while (masm.GetCursorOffset() < max_target - range_tbz) { 22659 __ Nop(); 22660 } 22661 __ Tbz(x0, 0, &target_tbz); 22662 while (masm.GetCursorOffset() < max_target) { 22663 __ Nop(); 22664 } 22665 22666 // This additional nop makes the branches go out of range. 22667 __ Nop(); 22668 22669 __ Bind(&target_cbz); 22670 __ Bind(&target_tbz); 22671 22672 END(); 22673 TEARDOWN(); 22674 } 22675 22676 22677 TEST(veneers_hanging) { 22678 SETUP(); 22679 START(); 22680 22681 // This is a code generation test. The code generated is not executed. 22682 // Ensure that the MacroAssembler considers unresolved branches to chose when 22683 // a veneer pool should be emitted. This is similar to the 22684 // 'veneers_two_out_of_range' test. We try to trigger the following situation: 22685 // b.eq label 22686 // b.eq label 22687 // ... 22688 // nop 22689 // ... 22690 // cbz x0, label 22691 // cbz x0, label 22692 // ... 22693 // tbz x0, 0 label 22694 // nop 22695 // ... 22696 // nop <- From here the `b.eq` and `cbz` instructions run out of range, 22697 // so a literal pool is required. 22698 // veneer 22699 // veneer 22700 // veneer <- The `tbz` runs out of range somewhere in the middle of the 22701 // veneer veneer pool. 22702 // veneer 22703 22704 const int range_bcond = Instruction::GetImmBranchForwardRange(CondBranchType); 22705 const int range_cbz = 22706 Instruction::GetImmBranchForwardRange(CompareBranchType); 22707 const int range_tbz = Instruction::GetImmBranchForwardRange(TestBranchType); 22708 const int max_target = static_cast<int>(masm.GetCursorOffset()) + range_bcond; 22709 22710 Label done; 22711 const int n_bcond = 100; 22712 const int n_cbz = 100; 22713 const int n_tbz = 1; 22714 const int kNTotalBranches = n_bcond + n_cbz + n_tbz; 22715 22716 // We use different labels to prevent the MacroAssembler from sharing veneers. 22717 Label labels[kNTotalBranches]; 22718 for (int i = 0; i < kNTotalBranches; i++) { 22719 new (&labels[i]) Label(); 22720 } 22721 22722 for (int i = 0; i < n_bcond; i++) { 22723 __ B(eq, &labels[i]); 22724 } 22725 22726 while (masm.GetCursorOffset() < max_target - range_cbz) { 22727 __ Nop(); 22728 } 22729 22730 for (int i = 0; i < n_cbz; i++) { 22731 __ Cbz(x0, &labels[n_bcond + i]); 22732 } 22733 22734 // Ensure the 'tbz' will go out of range after some of the previously 22735 // generated branches. 22736 int margin = (n_bcond / 2) * kInstructionSize; 22737 while (masm.GetCursorOffset() < max_target - range_tbz + margin) { 22738 __ Nop(); 22739 } 22740 22741 __ Tbz(x0, 0, &labels[n_bcond + n_cbz]); 22742 22743 while (masm.GetCursorOffset() < max_target) { 22744 __ Nop(); 22745 } 22746 22747 // This additional nop makes the 'b.eq' and 'cbz' instructions go out of range 22748 // and forces the emission of a veneer pool. The 'tbz' is not yet out of 22749 // range, but will go out of range while veneers are emitted for the other 22750 // branches. 22751 // The MacroAssembler should ensure that veneers are correctly emitted for all 22752 // the branches, including the 'tbz'. Checks will fail if the target of a 22753 // branch is out of range. 22754 __ Nop(); 22755 22756 for (int i = 0; i < kNTotalBranches; i++) { 22757 __ Bind(&labels[i]); 22758 } 22759 22760 END(); 22761 TEARDOWN(); 22762 } 22763 22764 22765 TEST(collision_literal_veneer_pools) { 22766 SETUP(); 22767 START(); 22768 22769 // This is a code generation test. The code generated is not executed. 22770 22771 // Make sure the literal pool is empty; 22772 masm.EmitLiteralPool(LiteralPool::kBranchRequired); 22773 ASSERT_LITERAL_POOL_SIZE(0); 22774 22775 // We chose the offsets below to (try to) trigger the following situation: 22776 // buffer offset 22777 // 0: tbz x0, 0, target_tbz ----------------------------------. 22778 // 4: nop | 22779 // ... | 22780 // nop | 22781 // literal gen: ldr s0, [pc + ...] ; load from `pool start + 0` | 22782 // ldr s0, [pc + ...] ; load from `pool start + 4` | 22783 // ... | 22784 // ldr s0, [pc + ...] | 22785 // pool start: floating-point literal (0.1) | 22786 // floating-point literal (1.1) | 22787 // ... | 22788 // floating-point literal (<n>.1) <-----tbz-max-range--' 22789 // floating-point literal (<n+1>.1) 22790 // ... 22791 22792 const int range_tbz = Instruction::GetImmBranchForwardRange(TestBranchType); 22793 const int max_target = static_cast<int>(masm.GetCursorOffset()) + range_tbz; 22794 22795 const size_t target_literal_pool_size = 100 * kInstructionSize; 22796 const int offset_start_literal_gen = 22797 target_literal_pool_size + target_literal_pool_size / 2; 22798 22799 22800 Label target_tbz; 22801 22802 __ Tbz(x0, 0, &target_tbz); 22803 VIXL_CHECK(masm.GetNumberOfPotentialVeneers() == 1); 22804 while (masm.GetCursorOffset() < max_target - offset_start_literal_gen) { 22805 __ Nop(); 22806 } 22807 VIXL_CHECK(masm.GetNumberOfPotentialVeneers() == 1); 22808 22809 for (int i = 0; i < 100; i++) { 22810 // Use a different value to force one literal pool entry per iteration. 22811 __ Ldr(s0, i + 0.1); 22812 } 22813 VIXL_CHECK(masm.GetLiteralPoolSize() >= target_literal_pool_size); 22814 22815 // Force emission of a literal pool. 22816 masm.EmitLiteralPool(LiteralPool::kBranchRequired); 22817 ASSERT_LITERAL_POOL_SIZE(0); 22818 22819 // The branch should not have gone out of range during the emission of the 22820 // literal pool. 22821 __ Bind(&target_tbz); 22822 22823 VIXL_CHECK(masm.GetNumberOfPotentialVeneers() == 0); 22824 22825 END(); 22826 TEARDOWN(); 22827 } 22828 22829 22830 TEST(ldr_literal_explicit) { 22831 SETUP(); 22832 22833 START(); 22834 Literal<int64_t> automatically_placed_literal(1, masm.GetLiteralPool()); 22835 Literal<int64_t> manually_placed_literal(2); 22836 { 22837 ExactAssemblyScope scope(&masm, kInstructionSize + sizeof(int64_t)); 22838 Label over_literal; 22839 __ b(&over_literal); 22840 __ place(&manually_placed_literal); 22841 __ bind(&over_literal); 22842 } 22843 __ Ldr(x1, &manually_placed_literal); 22844 __ Ldr(x2, &automatically_placed_literal); 22845 __ Add(x0, x1, x2); 22846 END(); 22847 22848 RUN(); 22849 22850 ASSERT_EQUAL_64(3, x0); 22851 22852 TEARDOWN(); 22853 } 22854 22855 22856 TEST(ldr_literal_automatically_placed) { 22857 SETUP(); 22858 22859 START(); 22860 22861 // We start with an empty literal pool. 22862 ASSERT_LITERAL_POOL_SIZE(0); 22863 22864 // Create a literal that should be placed by the literal pool. 22865 Literal<int64_t> explicit_literal(2, masm.GetLiteralPool()); 22866 // It should not appear in the literal pool until its first use. 22867 ASSERT_LITERAL_POOL_SIZE(0); 22868 22869 // Check that using standard literals does not break the use of explicitly 22870 // created literals. 22871 __ Ldr(d1, 1.1); 22872 ASSERT_LITERAL_POOL_SIZE(8); 22873 masm.EmitLiteralPool(LiteralPool::kBranchRequired); 22874 ASSERT_LITERAL_POOL_SIZE(0); 22875 22876 __ Ldr(x2, &explicit_literal); 22877 ASSERT_LITERAL_POOL_SIZE(8); 22878 masm.EmitLiteralPool(LiteralPool::kBranchRequired); 22879 ASSERT_LITERAL_POOL_SIZE(0); 22880 22881 __ Ldr(d3, 3.3); 22882 ASSERT_LITERAL_POOL_SIZE(8); 22883 masm.EmitLiteralPool(LiteralPool::kBranchRequired); 22884 ASSERT_LITERAL_POOL_SIZE(0); 22885 22886 // Re-use our explicitly created literal. It has already been placed, so it 22887 // should not impact the literal pool. 22888 __ Ldr(x4, &explicit_literal); 22889 ASSERT_LITERAL_POOL_SIZE(0); 22890 22891 END(); 22892 22893 RUN(); 22894 22895 ASSERT_EQUAL_FP64(1.1, d1); 22896 ASSERT_EQUAL_64(2, x2); 22897 ASSERT_EQUAL_FP64(3.3, d3); 22898 ASSERT_EQUAL_64(2, x4); 22899 22900 TEARDOWN(); 22901 } 22902 22903 22904 TEST(literal_update_overwrite) { 22905 SETUP(); 22906 22907 START(); 22908 22909 ASSERT_LITERAL_POOL_SIZE(0); 22910 LiteralPool* literal_pool = masm.GetLiteralPool(); 22911 22912 Literal<int32_t> lit_32_update_before_pool(0xbad, literal_pool); 22913 Literal<int32_t> lit_32_update_after_pool(0xbad, literal_pool); 22914 Literal<int64_t> lit_64_update_before_pool(0xbad, literal_pool); 22915 Literal<int64_t> lit_64_update_after_pool(0xbad, literal_pool); 22916 22917 ASSERT_LITERAL_POOL_SIZE(0); 22918 22919 lit_32_update_before_pool.UpdateValue(32); 22920 lit_64_update_before_pool.UpdateValue(64); 22921 22922 __ Ldr(w1, &lit_32_update_before_pool); 22923 __ Ldr(x2, &lit_64_update_before_pool); 22924 __ Ldr(w3, &lit_32_update_after_pool); 22925 __ Ldr(x4, &lit_64_update_after_pool); 22926 22927 masm.EmitLiteralPool(LiteralPool::kBranchRequired); 22928 22929 VIXL_ASSERT(lit_32_update_after_pool.IsPlaced()); 22930 VIXL_ASSERT(lit_64_update_after_pool.IsPlaced()); 22931 lit_32_update_after_pool.UpdateValue(128, &masm); 22932 lit_64_update_after_pool.UpdateValue(256, &masm); 22933 22934 END(); 22935 22936 RUN(); 22937 22938 ASSERT_EQUAL_64(32, x1); 22939 ASSERT_EQUAL_64(64, x2); 22940 ASSERT_EQUAL_64(128, x3); 22941 ASSERT_EQUAL_64(256, x4); 22942 22943 TEARDOWN(); 22944 } 22945 22946 22947 TEST(literal_deletion_policies) { 22948 SETUP(); 22949 22950 START(); 22951 22952 // We cannot check exactly when the deletion of the literals occur, but we 22953 // check that usage of the deletion policies is not broken. 22954 22955 ASSERT_LITERAL_POOL_SIZE(0); 22956 LiteralPool* literal_pool = masm.GetLiteralPool(); 22957 22958 Literal<int32_t> lit_manual(0xbad, literal_pool); 22959 Literal<int32_t>* lit_deleted_on_placement = 22960 new Literal<int32_t>(0xbad, 22961 literal_pool, 22962 RawLiteral::kDeletedOnPlacementByPool); 22963 Literal<int32_t>* lit_deleted_on_pool_destruction = 22964 new Literal<int32_t>(0xbad, 22965 literal_pool, 22966 RawLiteral::kDeletedOnPoolDestruction); 22967 22968 ASSERT_LITERAL_POOL_SIZE(0); 22969 22970 lit_manual.UpdateValue(32); 22971 lit_deleted_on_placement->UpdateValue(64); 22972 22973 __ Ldr(w1, &lit_manual); 22974 __ Ldr(w2, lit_deleted_on_placement); 22975 __ Ldr(w3, lit_deleted_on_pool_destruction); 22976 22977 masm.EmitLiteralPool(LiteralPool::kBranchRequired); 22978 22979 VIXL_ASSERT(lit_manual.IsPlaced()); 22980 VIXL_ASSERT(lit_deleted_on_pool_destruction->IsPlaced()); 22981 lit_deleted_on_pool_destruction->UpdateValue(128, &masm); 22982 22983 END(); 22984 22985 RUN(); 22986 22987 ASSERT_EQUAL_64(32, x1); 22988 ASSERT_EQUAL_64(64, x2); 22989 ASSERT_EQUAL_64(128, x3); 22990 22991 TEARDOWN(); 22992 } 22993 22994 22995 TEST(generic_operand) { 22996 SETUP(); 22997 22998 int32_t data_32_array[5] = {0xbadbeef, 22999 0x11111111, 23000 0xbadbeef, 23001 0x33333333, 23002 0xbadbeef}; 23003 int64_t data_64_array[5] = {INT64_C(0xbadbadbadbeef), 23004 INT64_C(0x1111111111111111), 23005 INT64_C(0xbadbadbadbeef), 23006 INT64_C(0x3333333333333333), 23007 INT64_C(0xbadbadbadbeef)}; 23008 size_t size_32 = sizeof(data_32_array[0]); 23009 size_t size_64 = sizeof(data_64_array[0]); 23010 23011 START(); 23012 23013 intptr_t data_32_address = reinterpret_cast<intptr_t>(&data_32_array[0]); 23014 intptr_t data_64_address = reinterpret_cast<intptr_t>(&data_64_array[0]); 23015 Register data_32 = x27; 23016 Register data_64 = x28; 23017 __ Mov(data_32, data_32_address); 23018 __ Mov(data_64, data_64_address); 23019 23020 __ Move(GenericOperand(w0), 23021 GenericOperand(MemOperand(data_32, 1 * size_32), size_32)); 23022 __ Move(GenericOperand(s0), 23023 GenericOperand(MemOperand(data_32, 3 * size_32), size_32)); 23024 __ Move(GenericOperand(x10), 23025 GenericOperand(MemOperand(data_64, 1 * size_64), size_64)); 23026 __ Move(GenericOperand(d10), 23027 GenericOperand(MemOperand(data_64, 3 * size_64), size_64)); 23028 23029 __ Move(GenericOperand(w1), GenericOperand(w0)); 23030 __ Move(GenericOperand(s1), GenericOperand(s0)); 23031 __ Move(GenericOperand(x11), GenericOperand(x10)); 23032 __ Move(GenericOperand(d11), GenericOperand(d10)); 23033 23034 __ Move(GenericOperand(MemOperand(data_32, 0 * size_32), size_32), 23035 GenericOperand(w1)); 23036 __ Move(GenericOperand(MemOperand(data_32, 2 * size_32), size_32), 23037 GenericOperand(s1)); 23038 __ Move(GenericOperand(MemOperand(data_64, 0 * size_64), size_64), 23039 GenericOperand(x11)); 23040 __ Move(GenericOperand(MemOperand(data_64, 2 * size_64), size_64), 23041 GenericOperand(d11)); 23042 23043 __ Move(GenericOperand(MemOperand(data_32, 4 * size_32), size_32), 23044 GenericOperand(MemOperand(data_32, 0 * size_32), size_32)); 23045 __ Move(GenericOperand(MemOperand(data_64, 4 * size_64), size_64), 23046 GenericOperand(MemOperand(data_64, 0 * size_64), size_64)); 23047 END(); 23048 23049 RUN(); 23050 23051 ASSERT_EQUAL_64(data_32_address, data_32); 23052 ASSERT_EQUAL_64(data_64_address, data_64); 23053 23054 ASSERT_EQUAL_32(0x11111111, w0); 23055 ASSERT_EQUAL_32(0x33333333, core.sreg_bits(0)); 23056 ASSERT_EQUAL_64(INT64_C(0x1111111111111111), x10); 23057 ASSERT_EQUAL_64(INT64_C(0x3333333333333333), core.dreg_bits(10)); 23058 23059 ASSERT_EQUAL_32(0x11111111, w1); 23060 ASSERT_EQUAL_32(0x33333333, core.sreg_bits(1)); 23061 ASSERT_EQUAL_64(INT64_C(0x1111111111111111), x11); 23062 ASSERT_EQUAL_64(INT64_C(0x3333333333333333), core.dreg_bits(11)); 23063 23064 VIXL_CHECK(data_32_array[0] == 0x11111111); 23065 VIXL_CHECK(data_32_array[1] == 0x11111111); 23066 VIXL_CHECK(data_32_array[2] == 0x33333333); 23067 VIXL_CHECK(data_32_array[3] == 0x33333333); 23068 VIXL_CHECK(data_32_array[4] == 0x11111111); 23069 23070 VIXL_CHECK(data_64_array[0] == INT64_C(0x1111111111111111)); 23071 VIXL_CHECK(data_64_array[1] == INT64_C(0x1111111111111111)); 23072 VIXL_CHECK(data_64_array[2] == INT64_C(0x3333333333333333)); 23073 VIXL_CHECK(data_64_array[3] == INT64_C(0x3333333333333333)); 23074 VIXL_CHECK(data_64_array[4] == INT64_C(0x1111111111111111)); 23075 23076 TEARDOWN(); 23077 } 23078 23079 23080 // Test feature detection of calls to runtime functions. 23081 23082 // C++11 should be sufficient to provide simulated runtime calls, except for a 23083 // GCC bug before 4.9.1. 23084 #if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) && (__cplusplus >= 201103L) && \ 23085 (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1)) && \ 23086 !defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) 23087 #error \ 23088 "C++11 should be sufficient to provide support for simulated runtime calls." 23089 #endif // #if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) && ... 23090 23091 #if (__cplusplus >= 201103L) && \ 23092 !defined(VIXL_HAS_MACROASSEMBLER_RUNTIME_CALL_SUPPORT) 23093 #error \ 23094 "C++11 should be sufficient to provide support for `MacroAssembler::CallRuntime()`." 23095 #endif // #if (__cplusplus >= 201103L) && ... 23096 23097 #ifdef VIXL_HAS_MACROASSEMBLER_RUNTIME_CALL_SUPPORT 23098 int32_t runtime_call_add_one(int32_t a) { return a + 1; } 23099 23100 double runtime_call_add_doubles(double a, double b, double c) { 23101 return a + b + c; 23102 } 23103 23104 int64_t runtime_call_one_argument_on_stack(int64_t arg1 __attribute__((unused)), 23105 int64_t arg2 __attribute__((unused)), 23106 int64_t arg3 __attribute__((unused)), 23107 int64_t arg4 __attribute__((unused)), 23108 int64_t arg5 __attribute__((unused)), 23109 int64_t arg6 __attribute__((unused)), 23110 int64_t arg7 __attribute__((unused)), 23111 int64_t arg8 __attribute__((unused)), 23112 int64_t arg9) { 23113 return arg9; 23114 } 23115 23116 double runtime_call_two_arguments_on_stack(int64_t arg1 __attribute__((unused)), 23117 int64_t arg2 __attribute__((unused)), 23118 int64_t arg3 __attribute__((unused)), 23119 int64_t arg4 __attribute__((unused)), 23120 int64_t arg5 __attribute__((unused)), 23121 int64_t arg6 __attribute__((unused)), 23122 int64_t arg7 __attribute__((unused)), 23123 int64_t arg8 __attribute__((unused)), 23124 double arg9, 23125 double arg10) { 23126 return arg9 - arg10; 23127 } 23128 23129 void runtime_call_store_at_address(int64_t* address) { *address = 0xf00d; } 23130 23131 enum RuntimeCallTestEnum { Enum0 }; 23132 23133 RuntimeCallTestEnum runtime_call_enum(RuntimeCallTestEnum e) { return e; } 23134 23135 enum class RuntimeCallTestEnumClass { Enum0 }; 23136 23137 RuntimeCallTestEnumClass runtime_call_enum_class(RuntimeCallTestEnumClass e) { 23138 return e; 23139 } 23140 23141 int8_t test_int8_t(int8_t x) { return x; } 23142 uint8_t test_uint8_t(uint8_t x) { return x; } 23143 int16_t test_int16_t(int16_t x) { return x; } 23144 uint16_t test_uint16_t(uint16_t x) { return x; } 23145 23146 TEST(runtime_calls) { 23147 SETUP(); 23148 23149 #ifndef VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT 23150 if (masm.GenerateSimulatorCode()) { 23151 // This configuration is unsupported and a `VIXL_UNREACHABLE()` would fire 23152 // while trying to generate `CallRuntime`. This configuration should only be 23153 // reachable with C++11 and a (buggy) version of GCC pre-4.9.1. 23154 TEARDOWN(); 23155 return; 23156 } 23157 #endif 23158 23159 START(); 23160 23161 // Test `CallRuntime`. 23162 23163 __ Mov(w0, 0); 23164 __ CallRuntime(runtime_call_add_one); 23165 __ Mov(w20, w0); 23166 23167 __ Fmov(d0, 0.0); 23168 __ Fmov(d1, 1.5); 23169 __ Fmov(d2, 2.5); 23170 __ CallRuntime(runtime_call_add_doubles); 23171 __ Fmov(d20, d0); 23172 23173 __ Mov(x0, 0x123); 23174 __ Push(x0, x0); 23175 __ CallRuntime(runtime_call_one_argument_on_stack); 23176 __ Mov(x21, x0); 23177 __ Pop(x0, x1); 23178 23179 __ Fmov(d0, 314.0); 23180 __ Fmov(d1, 4.0); 23181 __ Push(d1, d0); 23182 __ CallRuntime(runtime_call_two_arguments_on_stack); 23183 __ Fmov(d21, d0); 23184 __ Pop(d1, d0); 23185 23186 // Test that the template mechanisms don't break with enums. 23187 __ Mov(w0, 0); 23188 __ CallRuntime(runtime_call_enum); 23189 __ Mov(w0, 0); 23190 __ CallRuntime(runtime_call_enum_class); 23191 23192 // Test `TailCallRuntime`. 23193 23194 Label function, after_function; 23195 __ B(&after_function); 23196 __ Bind(&function); 23197 __ Mov(x22, 0); 23198 __ Mov(w0, 123); 23199 __ TailCallRuntime(runtime_call_add_one); 23200 // Control should not fall through. 23201 __ Mov(x22, 0xbad); 23202 __ Ret(); 23203 __ Bind(&after_function); 23204 23205 // Call our dummy function, taking care to preserve the link register. 23206 __ Push(ip0, lr); 23207 __ Bl(&function); 23208 __ Pop(lr, ip0); 23209 // Save the result. 23210 __ Mov(w23, w0); 23211 23212 __ Mov(x24, 0); 23213 int test_values[] = {static_cast<int8_t>(-1), 23214 static_cast<uint8_t>(-1), 23215 static_cast<int16_t>(-1), 23216 static_cast<uint16_t>(-1), 23217 -256, 23218 -1, 23219 0, 23220 1, 23221 256}; 23222 for (size_t i = 0; i < sizeof(test_values) / sizeof(test_values[0]); ++i) { 23223 Label pass_int8, pass_uint8, pass_int16, pass_uint16; 23224 int x = test_values[i]; 23225 __ Mov(w0, static_cast<int8_t>(x)); 23226 __ CallRuntime(test_int8_t); 23227 __ Cmp(w0, static_cast<int8_t>(x)); 23228 __ Cinc(x24, x24, ne); 23229 __ Mov(w0, static_cast<uint8_t>(x)); 23230 __ CallRuntime(test_uint8_t); 23231 __ Cmp(w0, static_cast<uint8_t>(x)); 23232 __ Cinc(x24, x24, ne); 23233 __ Mov(w0, static_cast<int16_t>(x)); 23234 __ CallRuntime(test_int16_t); 23235 __ Cmp(w0, static_cast<int16_t>(x)); 23236 __ Cinc(x24, x24, ne); 23237 __ Mov(w0, static_cast<uint16_t>(x)); 23238 __ CallRuntime(test_uint16_t); 23239 __ Cmp(w0, static_cast<uint16_t>(x)); 23240 __ Cinc(x24, x24, ne); 23241 } 23242 23243 23244 int64_t value = 0xbadbeef; 23245 __ Mov(x0, reinterpret_cast<uint64_t>(&value)); 23246 __ CallRuntime(runtime_call_store_at_address); 23247 23248 END(); 23249 23250 #if defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) || \ 23251 !defined(VIXL_INCLUDE_SIMULATOR_AARCH64) 23252 RUN(); 23253 23254 ASSERT_EQUAL_32(1, w20); 23255 ASSERT_EQUAL_FP64(4.0, d20); 23256 ASSERT_EQUAL_64(0x123, x21); 23257 ASSERT_EQUAL_FP64(310.0, d21); 23258 VIXL_CHECK(value == 0xf00d); 23259 ASSERT_EQUAL_64(0, x22); 23260 ASSERT_EQUAL_32(124, w23); 23261 ASSERT_EQUAL_64(0, x24); 23262 #endif // #if defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) || ... 23263 23264 TEARDOWN(); 23265 } 23266 #endif // #ifdef VIXL_HAS_MACROASSEMBLER_RUNTIME_CALL_SUPPORT 23267 23268 23269 TEST(optimised_mov_register) { 23270 SETUP(); 23271 23272 START(); 23273 Label start; 23274 __ Bind(&start); 23275 __ Mov(x0, x0); 23276 VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&start) == 0); 23277 __ Mov(w0, w0, kDiscardForSameWReg); 23278 VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&start) == 0); 23279 __ Mov(w0, w0); 23280 VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&start) == kInstructionSize); 23281 23282 END(); 23283 23284 RUN(); 23285 23286 TEARDOWN(); 23287 } 23288 23289 23290 TEST(nop) { 23291 MacroAssembler masm; 23292 23293 Label start; 23294 __ Bind(&start); 23295 __ Nop(); 23296 // `MacroAssembler::Nop` must generate at least one nop. 23297 VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&start) >= kInstructionSize); 23298 23299 masm.FinalizeCode(); 23300 } 23301 23302 TEST(scratch_scope_basic_v) { 23303 MacroAssembler masm; 23304 23305 { 23306 UseScratchRegisterScope temps(&masm); 23307 VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize); 23308 VIXL_CHECK(temp.Aliases(v31)); 23309 } 23310 { 23311 UseScratchRegisterScope temps(&masm); 23312 VRegister temp = temps.AcquireVRegisterOfSize(kDRegSize); 23313 VIXL_CHECK(temp.Aliases(v31)); 23314 } 23315 { 23316 UseScratchRegisterScope temps(&masm); 23317 VRegister temp = temps.AcquireVRegisterOfSize(kSRegSize); 23318 VIXL_CHECK(temp.Aliases(v31)); 23319 } 23320 } 23321 23322 23323 } // namespace aarch64 23324 } // namespace vixl 23325