1 // Copyright 2015, VIXL authors 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #include <sys/mman.h> 28 #include <cfloat> 29 #include <cmath> 30 #include <cstdio> 31 #include <cstdlib> 32 #include <cstring> 33 34 #include "test-runner.h" 35 #include "test-utils.h" 36 #include "aarch64/test-utils-aarch64.h" 37 38 #include "aarch64/cpu-aarch64.h" 39 #include "aarch64/debugger-aarch64.h" 40 #include "aarch64/disasm-aarch64.h" 41 #include "aarch64/macro-assembler-aarch64.h" 42 #include "aarch64/simulator-aarch64.h" 43 44 namespace vixl { 45 namespace aarch64 { 46 47 // Test infrastructure. 48 // 49 // Tests are functions which accept no parameters and have no return values. 50 // The testing code should not perform an explicit return once completed. For 51 // example to test the mov immediate instruction a very simple test would be: 52 // 53 // TEST(mov_x0_one) { 54 // SETUP(); 55 // 56 // START(); 57 // __ mov(x0, Operand(1)); 58 // END(); 59 // 60 // RUN(); 61 // 62 // ASSERT_EQUAL_64(1, x0); 63 // 64 // TEARDOWN(); 65 // } 66 // 67 // Within a START ... END block all registers but sp can be modified. sp has to 68 // be explicitly saved/restored. The END() macro replaces the function return 69 // so it may appear multiple times in a test if the test has multiple exit 70 // points. 71 // 72 // Once the test has been run all integer and floating point registers as well 73 // as flags are accessible through a RegisterDump instance, see 74 // utils-aarch64.cc for more info on RegisterDump. 75 // 76 // We provide some helper assert to handle common cases: 77 // 78 // ASSERT_EQUAL_32(int32_t, int_32t) 79 // ASSERT_EQUAL_FP32(float, float) 80 // ASSERT_EQUAL_32(int32_t, W register) 81 // ASSERT_EQUAL_FP32(float, S register) 82 // ASSERT_EQUAL_64(int64_t, int_64t) 83 // ASSERT_EQUAL_FP64(double, double) 84 // ASSERT_EQUAL_64(int64_t, X register) 85 // ASSERT_EQUAL_64(X register, X register) 86 // ASSERT_EQUAL_FP64(double, D register) 87 // 88 // e.g. ASSERT_EQUAL_64(0.5, d30); 89 // 90 // If more advanced computation is required before the assert then access the 91 // RegisterDump named core directly: 92 // 93 // ASSERT_EQUAL_64(0x1234, core->reg_x0() & 0xffff); 94 95 96 #define __ masm. 97 #define TEST(name) TEST_(AARCH64_ASM_##name) 98 99 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 100 // Run tests with the simulator. 101 102 #define SETUP() \ 103 MacroAssembler masm; \ 104 SETUP_COMMON() 105 106 #define SETUP_CUSTOM(size, pic) \ 107 byte* buf = new byte[size + CodeBuffer::kDefaultCapacity]; \ 108 MacroAssembler masm(buf, size + CodeBuffer::kDefaultCapacity, pic); \ 109 SETUP_COMMON() 110 111 #define SETUP_COMMON() \ 112 masm.SetGenerateSimulatorCode(true); \ 113 Decoder simulator_decoder; \ 114 Simulator* simulator = Test::run_debugger() \ 115 ? new Debugger(&simulator_decoder) \ 116 : new Simulator(&simulator_decoder); \ 117 simulator->SetColouredTrace(Test::coloured_trace()); \ 118 simulator->SetInstructionStats(Test::instruction_stats()); \ 119 Disassembler disasm; \ 120 Decoder disassembler_decoder; \ 121 disassembler_decoder.AppendVisitor(&disasm); \ 122 RegisterDump core 123 124 #define START() \ 125 masm.Reset(); \ 126 simulator->ResetState(); \ 127 __ PushCalleeSavedRegisters(); \ 128 { \ 129 int trace_parameters = 0; \ 130 if (Test::trace_reg()) trace_parameters |= LOG_STATE; \ 131 if (Test::trace_write()) trace_parameters |= LOG_WRITE; \ 132 if (Test::trace_sim()) trace_parameters |= LOG_DISASM; \ 133 if (Test::trace_branch()) trace_parameters |= LOG_BRANCH; \ 134 if (trace_parameters != 0) { \ 135 __ Trace(static_cast<TraceParameters>(trace_parameters), TRACE_ENABLE); \ 136 } \ 137 } \ 138 if (Test::instruction_stats()) { \ 139 __ EnableInstrumentation(); \ 140 } 141 142 #define END() \ 143 if (Test::instruction_stats()) { \ 144 __ DisableInstrumentation(); \ 145 } \ 146 __ Trace(LOG_ALL, TRACE_DISABLE); \ 147 core.Dump(&masm); \ 148 __ PopCalleeSavedRegisters(); \ 149 __ Ret(); \ 150 masm.FinalizeCode() 151 152 #define RUN() \ 153 DISASSEMBLE(); \ 154 simulator->RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>()) 155 156 #define RUN_CUSTOM() RUN() 157 158 #define TEARDOWN() TEARDOWN_COMMON() 159 160 #define TEARDOWN_CUSTOM() \ 161 delete[] buf; \ 162 TEARDOWN_COMMON() 163 164 #define TEARDOWN_COMMON() delete simulator; 165 166 #else // ifdef VIXL_INCLUDE_SIMULATOR_AARCH64. 167 // Run the test on real hardware or models. 168 #define SETUP() \ 169 MacroAssembler masm; \ 170 SETUP_COMMON() 171 172 #define SETUP_CUSTOM(size, pic) \ 173 byte* buffer = \ 174 reinterpret_cast<byte*>(mmap(NULL, \ 175 size + CodeBuffer::kDefaultCapacity, \ 176 PROT_READ | PROT_WRITE, \ 177 MAP_PRIVATE | MAP_ANONYMOUS, \ 178 -1, \ 179 0)); \ 180 size_t buffer_size = size + CodeBuffer::kDefaultCapacity; \ 181 MacroAssembler masm(buffer, buffer_size, pic); \ 182 SETUP_COMMON() 183 184 #define SETUP_COMMON() \ 185 Disassembler disasm; \ 186 Decoder disassembler_decoder; \ 187 disassembler_decoder.AppendVisitor(&disasm); \ 188 masm.SetGenerateSimulatorCode(false); \ 189 RegisterDump core; \ 190 CPU::SetUp() 191 192 #define START() \ 193 masm.Reset(); \ 194 __ PushCalleeSavedRegisters() 195 196 #define END() \ 197 core.Dump(&masm); \ 198 __ PopCalleeSavedRegisters(); \ 199 __ Ret(); \ 200 masm.FinalizeCode() 201 202 // Execute the generated code from the memory area. 203 #define RUN() \ 204 DISASSEMBLE(); \ 205 masm.GetBuffer()->SetExecutable(); \ 206 ExecuteMemory(masm.GetBuffer()->GetStartAddress<byte*>(), \ 207 masm.GetSizeOfCodeGenerated()); \ 208 masm.GetBuffer()->SetWritable() 209 210 // The generated code was written directly into `buffer`, execute it directly. 211 #define RUN_CUSTOM() \ 212 DISASSEMBLE(); \ 213 mprotect(buffer, buffer_size, PROT_READ | PROT_EXEC); \ 214 ExecuteMemory(buffer, buffer_size); \ 215 mprotect(buffer, buffer_size, PROT_READ | PROT_WRITE) 216 217 #define TEARDOWN() 218 219 #define TEARDOWN_CUSTOM() 220 221 #endif // ifdef VIXL_INCLUDE_SIMULATOR_AARCH64. 222 223 #define DISASSEMBLE() \ 224 if (Test::disassemble()) { \ 225 Instruction* instruction = \ 226 masm.GetBuffer()->GetStartAddress<Instruction*>(); \ 227 Instruction* end = masm.GetBuffer()->GetOffsetAddress<Instruction*>( \ 228 masm.GetSizeOfCodeGenerated()); \ 229 while (instruction != end) { \ 230 disassembler_decoder.Decode(instruction); \ 231 uint32_t encoding = *reinterpret_cast<uint32_t*>(instruction); \ 232 printf("%08" PRIx32 "\t%s\n", encoding, disasm.GetOutput()); \ 233 instruction += kInstructionSize; \ 234 } \ 235 } 236 237 #define ASSERT_EQUAL_NZCV(expected) \ 238 VIXL_CHECK(EqualNzcv(expected, core.flags_nzcv())) 239 240 #define ASSERT_EQUAL_REGISTERS(expected) \ 241 VIXL_CHECK(EqualRegisters(&expected, &core)) 242 243 #define ASSERT_EQUAL_32(expected, result) \ 244 VIXL_CHECK(Equal32(static_cast<uint32_t>(expected), &core, result)) 245 246 #define ASSERT_EQUAL_FP32(expected, result) \ 247 VIXL_CHECK(EqualFP32(expected, &core, result)) 248 249 #define ASSERT_EQUAL_64(expected, result) \ 250 VIXL_CHECK(Equal64(expected, &core, result)) 251 252 #define ASSERT_EQUAL_FP64(expected, result) \ 253 VIXL_CHECK(EqualFP64(expected, &core, result)) 254 255 #define ASSERT_EQUAL_128(expected_h, expected_l, result) \ 256 VIXL_CHECK(Equal128(expected_h, expected_l, &core, result)) 257 258 #define ASSERT_LITERAL_POOL_SIZE(expected) \ 259 VIXL_CHECK((expected + kInstructionSize) == (masm.GetLiteralPoolSize())) 260 261 262 TEST(stack_ops) { 263 SETUP(); 264 265 START(); 266 // save sp. 267 __ Mov(x29, sp); 268 269 // Set the sp to a known value. 270 __ Mov(sp, 0x1004); 271 __ Mov(x0, sp); 272 273 // Add immediate to the sp, and move the result to a normal register. 274 __ Add(sp, sp, 0x50); 275 __ Mov(x1, sp); 276 277 // Add extended to the sp, and move the result to a normal register. 278 __ Mov(x17, 0xfff); 279 __ Add(sp, sp, Operand(x17, SXTB)); 280 __ Mov(x2, sp); 281 282 // Create an sp using a logical instruction, and move to normal register. 283 __ Orr(sp, xzr, 0x1fff); 284 __ Mov(x3, sp); 285 286 // Write wsp using a logical instruction. 287 __ Orr(wsp, wzr, 0xfffffff8); 288 __ Mov(x4, sp); 289 290 // Write sp, and read back wsp. 291 __ Orr(sp, xzr, 0xfffffff8); 292 __ Mov(w5, wsp); 293 294 // restore sp. 295 __ Mov(sp, x29); 296 END(); 297 298 RUN(); 299 300 ASSERT_EQUAL_64(0x1004, x0); 301 ASSERT_EQUAL_64(0x1054, x1); 302 ASSERT_EQUAL_64(0x1053, x2); 303 ASSERT_EQUAL_64(0x1fff, x3); 304 ASSERT_EQUAL_64(0xfffffff8, x4); 305 ASSERT_EQUAL_64(0xfffffff8, x5); 306 307 TEARDOWN(); 308 } 309 310 311 TEST(mvn) { 312 SETUP(); 313 314 START(); 315 __ Mvn(w0, 0xfff); 316 __ Mvn(x1, 0xfff); 317 __ Mvn(w2, Operand(w0, LSL, 1)); 318 __ Mvn(x3, Operand(x1, LSL, 2)); 319 __ Mvn(w4, Operand(w0, LSR, 3)); 320 __ Mvn(x5, Operand(x1, LSR, 4)); 321 __ Mvn(w6, Operand(w0, ASR, 11)); 322 __ Mvn(x7, Operand(x1, ASR, 12)); 323 __ Mvn(w8, Operand(w0, ROR, 13)); 324 __ Mvn(x9, Operand(x1, ROR, 14)); 325 __ Mvn(w10, Operand(w2, UXTB)); 326 __ Mvn(x11, Operand(x2, SXTB, 1)); 327 __ Mvn(w12, Operand(w2, UXTH, 2)); 328 __ Mvn(x13, Operand(x2, SXTH, 3)); 329 __ Mvn(x14, Operand(w2, UXTW, 4)); 330 __ Mvn(x15, Operand(w2, SXTW, 4)); 331 END(); 332 333 RUN(); 334 335 ASSERT_EQUAL_64(0xfffff000, x0); 336 ASSERT_EQUAL_64(0xfffffffffffff000, x1); 337 ASSERT_EQUAL_64(0x00001fff, x2); 338 ASSERT_EQUAL_64(0x0000000000003fff, x3); 339 ASSERT_EQUAL_64(0xe00001ff, x4); 340 ASSERT_EQUAL_64(0xf0000000000000ff, x5); 341 ASSERT_EQUAL_64(0x00000001, x6); 342 ASSERT_EQUAL_64(0x0000000000000000, x7); 343 ASSERT_EQUAL_64(0x7ff80000, x8); 344 ASSERT_EQUAL_64(0x3ffc000000000000, x9); 345 ASSERT_EQUAL_64(0xffffff00, x10); 346 ASSERT_EQUAL_64(0x0000000000000001, x11); 347 ASSERT_EQUAL_64(0xffff8003, x12); 348 ASSERT_EQUAL_64(0xffffffffffff0007, x13); 349 ASSERT_EQUAL_64(0xfffffffffffe000f, x14); 350 ASSERT_EQUAL_64(0xfffffffffffe000f, x15); 351 352 TEARDOWN(); 353 } 354 355 356 TEST(mov_imm_w) { 357 SETUP(); 358 359 START(); 360 __ Mov(w0, 0xffffffff); 361 __ Mov(w1, 0xffff1234); 362 __ Mov(w2, 0x1234ffff); 363 __ Mov(w3, 0x00000000); 364 __ Mov(w4, 0x00001234); 365 __ Mov(w5, 0x12340000); 366 __ Mov(w6, 0x12345678); 367 __ Mov(w7, (int32_t)0x80000000); 368 __ Mov(w8, (int32_t)0xffff0000); 369 __ Mov(w9, kWMinInt); 370 END(); 371 372 RUN(); 373 374 ASSERT_EQUAL_64(0xffffffff, x0); 375 ASSERT_EQUAL_64(0xffff1234, x1); 376 ASSERT_EQUAL_64(0x1234ffff, x2); 377 ASSERT_EQUAL_64(0x00000000, x3); 378 ASSERT_EQUAL_64(0x00001234, x4); 379 ASSERT_EQUAL_64(0x12340000, x5); 380 ASSERT_EQUAL_64(0x12345678, x6); 381 ASSERT_EQUAL_64(0x80000000, x7); 382 ASSERT_EQUAL_64(0xffff0000, x8); 383 ASSERT_EQUAL_32(kWMinInt, w9); 384 385 TEARDOWN(); 386 } 387 388 389 TEST(mov_imm_x) { 390 SETUP(); 391 392 START(); 393 __ Mov(x0, 0xffffffffffffffff); 394 __ Mov(x1, 0xffffffffffff1234); 395 __ Mov(x2, 0xffffffff12345678); 396 __ Mov(x3, 0xffff1234ffff5678); 397 __ Mov(x4, 0x1234ffffffff5678); 398 __ Mov(x5, 0x1234ffff5678ffff); 399 __ Mov(x6, 0x12345678ffffffff); 400 __ Mov(x7, 0x1234ffffffffffff); 401 __ Mov(x8, 0x123456789abcffff); 402 __ Mov(x9, 0x12345678ffff9abc); 403 __ Mov(x10, 0x1234ffff56789abc); 404 __ Mov(x11, 0xffff123456789abc); 405 __ Mov(x12, 0x0000000000000000); 406 __ Mov(x13, 0x0000000000001234); 407 __ Mov(x14, 0x0000000012345678); 408 __ Mov(x15, 0x0000123400005678); 409 __ Mov(x18, 0x1234000000005678); 410 __ Mov(x19, 0x1234000056780000); 411 __ Mov(x20, 0x1234567800000000); 412 __ Mov(x21, 0x1234000000000000); 413 __ Mov(x22, 0x123456789abc0000); 414 __ Mov(x23, 0x1234567800009abc); 415 __ Mov(x24, 0x1234000056789abc); 416 __ Mov(x25, 0x0000123456789abc); 417 __ Mov(x26, 0x123456789abcdef0); 418 __ Mov(x27, 0xffff000000000001); 419 __ Mov(x28, 0x8000ffff00000000); 420 END(); 421 422 RUN(); 423 424 ASSERT_EQUAL_64(0xffffffffffff1234, x1); 425 ASSERT_EQUAL_64(0xffffffff12345678, x2); 426 ASSERT_EQUAL_64(0xffff1234ffff5678, x3); 427 ASSERT_EQUAL_64(0x1234ffffffff5678, x4); 428 ASSERT_EQUAL_64(0x1234ffff5678ffff, x5); 429 ASSERT_EQUAL_64(0x12345678ffffffff, x6); 430 ASSERT_EQUAL_64(0x1234ffffffffffff, x7); 431 ASSERT_EQUAL_64(0x123456789abcffff, x8); 432 ASSERT_EQUAL_64(0x12345678ffff9abc, x9); 433 ASSERT_EQUAL_64(0x1234ffff56789abc, x10); 434 ASSERT_EQUAL_64(0xffff123456789abc, x11); 435 ASSERT_EQUAL_64(0x0000000000000000, x12); 436 ASSERT_EQUAL_64(0x0000000000001234, x13); 437 ASSERT_EQUAL_64(0x0000000012345678, x14); 438 ASSERT_EQUAL_64(0x0000123400005678, x15); 439 ASSERT_EQUAL_64(0x1234000000005678, x18); 440 ASSERT_EQUAL_64(0x1234000056780000, x19); 441 ASSERT_EQUAL_64(0x1234567800000000, x20); 442 ASSERT_EQUAL_64(0x1234000000000000, x21); 443 ASSERT_EQUAL_64(0x123456789abc0000, x22); 444 ASSERT_EQUAL_64(0x1234567800009abc, x23); 445 ASSERT_EQUAL_64(0x1234000056789abc, x24); 446 ASSERT_EQUAL_64(0x0000123456789abc, x25); 447 ASSERT_EQUAL_64(0x123456789abcdef0, x26); 448 ASSERT_EQUAL_64(0xffff000000000001, x27); 449 ASSERT_EQUAL_64(0x8000ffff00000000, x28); 450 451 452 TEARDOWN(); 453 } 454 455 456 TEST(mov) { 457 SETUP(); 458 459 START(); 460 __ Mov(x0, 0xffffffffffffffff); 461 __ Mov(x1, 0xffffffffffffffff); 462 __ Mov(x2, 0xffffffffffffffff); 463 __ Mov(x3, 0xffffffffffffffff); 464 465 __ Mov(x0, 0x0123456789abcdef); 466 467 { 468 ExactAssemblyScope scope(&masm, 3 * kInstructionSize); 469 __ movz(x1, UINT64_C(0xabcd) << 16); 470 __ movk(x2, UINT64_C(0xabcd) << 32); 471 __ movn(x3, UINT64_C(0xabcd) << 48); 472 } 473 474 __ Mov(x4, 0x0123456789abcdef); 475 __ Mov(x5, x4); 476 477 __ Mov(w6, -1); 478 479 // Test that moves back to the same register have the desired effect. This 480 // is a no-op for X registers, and a truncation for W registers. 481 __ Mov(x7, 0x0123456789abcdef); 482 __ Mov(x7, x7); 483 __ Mov(x8, 0x0123456789abcdef); 484 __ Mov(w8, w8); 485 __ Mov(x9, 0x0123456789abcdef); 486 __ Mov(x9, Operand(x9)); 487 __ Mov(x10, 0x0123456789abcdef); 488 __ Mov(w10, Operand(w10)); 489 490 __ Mov(w11, 0xfff); 491 __ Mov(x12, 0xfff); 492 __ Mov(w13, Operand(w11, LSL, 1)); 493 __ Mov(x14, Operand(x12, LSL, 2)); 494 __ Mov(w15, Operand(w11, LSR, 3)); 495 __ Mov(x18, Operand(x12, LSR, 4)); 496 __ Mov(w19, Operand(w11, ASR, 11)); 497 __ Mov(x20, Operand(x12, ASR, 12)); 498 __ Mov(w21, Operand(w11, ROR, 13)); 499 __ Mov(x22, Operand(x12, ROR, 14)); 500 __ Mov(w23, Operand(w13, UXTB)); 501 __ Mov(x24, Operand(x13, SXTB, 1)); 502 __ Mov(w25, Operand(w13, UXTH, 2)); 503 __ Mov(x26, Operand(x13, SXTH, 3)); 504 __ Mov(x27, Operand(w13, UXTW, 4)); 505 506 __ Mov(x28, 0x0123456789abcdef); 507 __ Mov(w28, w28, kDiscardForSameWReg); 508 END(); 509 510 RUN(); 511 512 ASSERT_EQUAL_64(0x0123456789abcdef, x0); 513 ASSERT_EQUAL_64(0x00000000abcd0000, x1); 514 ASSERT_EQUAL_64(0xffffabcdffffffff, x2); 515 ASSERT_EQUAL_64(0x5432ffffffffffff, x3); 516 ASSERT_EQUAL_64(x4, x5); 517 ASSERT_EQUAL_32(-1, w6); 518 ASSERT_EQUAL_64(0x0123456789abcdef, x7); 519 ASSERT_EQUAL_32(0x89abcdef, w8); 520 ASSERT_EQUAL_64(0x0123456789abcdef, x9); 521 ASSERT_EQUAL_32(0x89abcdef, w10); 522 ASSERT_EQUAL_64(0x00000fff, x11); 523 ASSERT_EQUAL_64(0x0000000000000fff, x12); 524 ASSERT_EQUAL_64(0x00001ffe, x13); 525 ASSERT_EQUAL_64(0x0000000000003ffc, x14); 526 ASSERT_EQUAL_64(0x000001ff, x15); 527 ASSERT_EQUAL_64(0x00000000000000ff, x18); 528 ASSERT_EQUAL_64(0x00000001, x19); 529 ASSERT_EQUAL_64(0x0000000000000000, x20); 530 ASSERT_EQUAL_64(0x7ff80000, x21); 531 ASSERT_EQUAL_64(0x3ffc000000000000, x22); 532 ASSERT_EQUAL_64(0x000000fe, x23); 533 ASSERT_EQUAL_64(0xfffffffffffffffc, x24); 534 ASSERT_EQUAL_64(0x00007ff8, x25); 535 ASSERT_EQUAL_64(0x000000000000fff0, x26); 536 ASSERT_EQUAL_64(0x000000000001ffe0, x27); 537 ASSERT_EQUAL_64(0x0123456789abcdef, x28); 538 539 TEARDOWN(); 540 } 541 542 543 TEST(mov_negative) { 544 SETUP(); 545 546 START(); 547 __ Mov(w11, 0xffffffff); 548 __ Mov(x12, 0xffffffffffffffff); 549 550 __ Mov(w13, Operand(w11, LSL, 1)); 551 __ Mov(w14, Operand(w11, LSR, 1)); 552 __ Mov(w15, Operand(w11, ASR, 1)); 553 __ Mov(w18, Operand(w11, ROR, 1)); 554 __ Mov(w19, Operand(w11, UXTB, 1)); 555 __ Mov(w20, Operand(w11, SXTB, 1)); 556 __ Mov(w21, Operand(w11, UXTH, 1)); 557 __ Mov(w22, Operand(w11, SXTH, 1)); 558 559 __ Mov(x23, Operand(x12, LSL, 1)); 560 __ Mov(x24, Operand(x12, LSR, 1)); 561 __ Mov(x25, Operand(x12, ASR, 1)); 562 __ Mov(x26, Operand(x12, ROR, 1)); 563 __ Mov(x27, Operand(x12, UXTH, 1)); 564 __ Mov(x28, Operand(x12, SXTH, 1)); 565 __ Mov(x29, Operand(x12, UXTW, 1)); 566 __ Mov(x30, Operand(x12, SXTW, 1)); 567 END(); 568 569 RUN(); 570 571 ASSERT_EQUAL_64(0xfffffffe, x13); 572 ASSERT_EQUAL_64(0x7fffffff, x14); 573 ASSERT_EQUAL_64(0xffffffff, x15); 574 ASSERT_EQUAL_64(0xffffffff, x18); 575 ASSERT_EQUAL_64(0x000001fe, x19); 576 ASSERT_EQUAL_64(0xfffffffe, x20); 577 ASSERT_EQUAL_64(0x0001fffe, x21); 578 ASSERT_EQUAL_64(0xfffffffe, x22); 579 580 ASSERT_EQUAL_64(0xfffffffffffffffe, x23); 581 ASSERT_EQUAL_64(0x7fffffffffffffff, x24); 582 ASSERT_EQUAL_64(0xffffffffffffffff, x25); 583 ASSERT_EQUAL_64(0xffffffffffffffff, x26); 584 ASSERT_EQUAL_64(0x000000000001fffe, x27); 585 ASSERT_EQUAL_64(0xfffffffffffffffe, x28); 586 ASSERT_EQUAL_64(0x00000001fffffffe, x29); 587 ASSERT_EQUAL_64(0xfffffffffffffffe, x30); 588 589 TEARDOWN(); 590 } 591 592 593 TEST(orr) { 594 SETUP(); 595 596 START(); 597 __ Mov(x0, 0xf0f0); 598 __ Mov(x1, 0xf00000ff); 599 600 __ Orr(x2, x0, Operand(x1)); 601 __ Orr(w3, w0, Operand(w1, LSL, 28)); 602 __ Orr(x4, x0, Operand(x1, LSL, 32)); 603 __ Orr(x5, x0, Operand(x1, LSR, 4)); 604 __ Orr(w6, w0, Operand(w1, ASR, 4)); 605 __ Orr(x7, x0, Operand(x1, ASR, 4)); 606 __ Orr(w8, w0, Operand(w1, ROR, 12)); 607 __ Orr(x9, x0, Operand(x1, ROR, 12)); 608 __ Orr(w10, w0, 0xf); 609 __ Orr(x11, x0, 0xf0000000f0000000); 610 END(); 611 612 RUN(); 613 614 ASSERT_EQUAL_64(0x00000000f000f0ff, x2); 615 ASSERT_EQUAL_64(0xf000f0f0, x3); 616 ASSERT_EQUAL_64(0xf00000ff0000f0f0, x4); 617 ASSERT_EQUAL_64(0x000000000f00f0ff, x5); 618 ASSERT_EQUAL_64(0xff00f0ff, x6); 619 ASSERT_EQUAL_64(0x000000000f00f0ff, x7); 620 ASSERT_EQUAL_64(0x0ffff0f0, x8); 621 ASSERT_EQUAL_64(0x0ff00000000ff0f0, x9); 622 ASSERT_EQUAL_64(0x0000f0ff, x10); 623 ASSERT_EQUAL_64(0xf0000000f000f0f0, x11); 624 625 TEARDOWN(); 626 } 627 628 629 TEST(orr_extend) { 630 SETUP(); 631 632 START(); 633 __ Mov(x0, 1); 634 __ Mov(x1, 0x8000000080008080); 635 __ Orr(w6, w0, Operand(w1, UXTB)); 636 __ Orr(x7, x0, Operand(x1, UXTH, 1)); 637 __ Orr(w8, w0, Operand(w1, UXTW, 2)); 638 __ Orr(x9, x0, Operand(x1, UXTX, 3)); 639 __ Orr(w10, w0, Operand(w1, SXTB)); 640 __ Orr(x11, x0, Operand(x1, SXTH, 1)); 641 __ Orr(x12, x0, Operand(x1, SXTW, 2)); 642 __ Orr(x13, x0, Operand(x1, SXTX, 3)); 643 END(); 644 645 RUN(); 646 647 ASSERT_EQUAL_64(0x00000081, x6); 648 ASSERT_EQUAL_64(0x0000000000010101, x7); 649 ASSERT_EQUAL_64(0x00020201, x8); 650 ASSERT_EQUAL_64(0x0000000400040401, x9); 651 ASSERT_EQUAL_64(0xffffff81, x10); 652 ASSERT_EQUAL_64(0xffffffffffff0101, x11); 653 ASSERT_EQUAL_64(0xfffffffe00020201, x12); 654 ASSERT_EQUAL_64(0x0000000400040401, x13); 655 656 TEARDOWN(); 657 } 658 659 660 TEST(bitwise_wide_imm) { 661 SETUP(); 662 663 START(); 664 __ Mov(x0, 0); 665 __ Mov(x1, 0xf0f0f0f0f0f0f0f0); 666 667 __ Orr(x10, x0, 0x1234567890abcdef); 668 __ Orr(w11, w1, 0x90abcdef); 669 670 __ Orr(w12, w0, kWMinInt); 671 __ Eor(w13, w0, kWMinInt); 672 END(); 673 674 RUN(); 675 676 ASSERT_EQUAL_64(0, x0); 677 ASSERT_EQUAL_64(0xf0f0f0f0f0f0f0f0, x1); 678 ASSERT_EQUAL_64(0x1234567890abcdef, x10); 679 ASSERT_EQUAL_64(0x00000000f0fbfdff, x11); 680 ASSERT_EQUAL_32(kWMinInt, w12); 681 ASSERT_EQUAL_32(kWMinInt, w13); 682 683 TEARDOWN(); 684 } 685 686 687 TEST(orn) { 688 SETUP(); 689 690 START(); 691 __ Mov(x0, 0xf0f0); 692 __ Mov(x1, 0xf00000ff); 693 694 __ Orn(x2, x0, Operand(x1)); 695 __ Orn(w3, w0, Operand(w1, LSL, 4)); 696 __ Orn(x4, x0, Operand(x1, LSL, 4)); 697 __ Orn(x5, x0, Operand(x1, LSR, 1)); 698 __ Orn(w6, w0, Operand(w1, ASR, 1)); 699 __ Orn(x7, x0, Operand(x1, ASR, 1)); 700 __ Orn(w8, w0, Operand(w1, ROR, 16)); 701 __ Orn(x9, x0, Operand(x1, ROR, 16)); 702 __ Orn(w10, w0, 0x0000ffff); 703 __ Orn(x11, x0, 0x0000ffff0000ffff); 704 END(); 705 706 RUN(); 707 708 ASSERT_EQUAL_64(0xffffffff0ffffff0, x2); 709 ASSERT_EQUAL_64(0xfffff0ff, x3); 710 ASSERT_EQUAL_64(0xfffffff0fffff0ff, x4); 711 ASSERT_EQUAL_64(0xffffffff87fffff0, x5); 712 ASSERT_EQUAL_64(0x07fffff0, x6); 713 ASSERT_EQUAL_64(0xffffffff87fffff0, x7); 714 ASSERT_EQUAL_64(0xff00ffff, x8); 715 ASSERT_EQUAL_64(0xff00ffffffffffff, x9); 716 ASSERT_EQUAL_64(0xfffff0f0, x10); 717 ASSERT_EQUAL_64(0xffff0000fffff0f0, x11); 718 719 TEARDOWN(); 720 } 721 722 723 TEST(orn_extend) { 724 SETUP(); 725 726 START(); 727 __ Mov(x0, 1); 728 __ Mov(x1, 0x8000000080008081); 729 __ Orn(w6, w0, Operand(w1, UXTB)); 730 __ Orn(x7, x0, Operand(x1, UXTH, 1)); 731 __ Orn(w8, w0, Operand(w1, UXTW, 2)); 732 __ Orn(x9, x0, Operand(x1, UXTX, 3)); 733 __ Orn(w10, w0, Operand(w1, SXTB)); 734 __ Orn(x11, x0, Operand(x1, SXTH, 1)); 735 __ Orn(x12, x0, Operand(x1, SXTW, 2)); 736 __ Orn(x13, x0, Operand(x1, SXTX, 3)); 737 END(); 738 739 RUN(); 740 741 ASSERT_EQUAL_64(0xffffff7f, x6); 742 ASSERT_EQUAL_64(0xfffffffffffefefd, x7); 743 ASSERT_EQUAL_64(0xfffdfdfb, x8); 744 ASSERT_EQUAL_64(0xfffffffbfffbfbf7, x9); 745 ASSERT_EQUAL_64(0x0000007f, x10); 746 ASSERT_EQUAL_64(0x000000000000fefd, x11); 747 ASSERT_EQUAL_64(0x00000001fffdfdfb, x12); 748 ASSERT_EQUAL_64(0xfffffffbfffbfbf7, x13); 749 750 TEARDOWN(); 751 } 752 753 754 TEST(and_) { 755 SETUP(); 756 757 START(); 758 __ Mov(x0, 0xfff0); 759 __ Mov(x1, 0xf00000ff); 760 761 __ And(x2, x0, Operand(x1)); 762 __ And(w3, w0, Operand(w1, LSL, 4)); 763 __ And(x4, x0, Operand(x1, LSL, 4)); 764 __ And(x5, x0, Operand(x1, LSR, 1)); 765 __ And(w6, w0, Operand(w1, ASR, 20)); 766 __ And(x7, x0, Operand(x1, ASR, 20)); 767 __ And(w8, w0, Operand(w1, ROR, 28)); 768 __ And(x9, x0, Operand(x1, ROR, 28)); 769 __ And(w10, w0, Operand(0xff00)); 770 __ And(x11, x0, Operand(0xff)); 771 END(); 772 773 RUN(); 774 775 ASSERT_EQUAL_64(0x000000f0, x2); 776 ASSERT_EQUAL_64(0x00000ff0, x3); 777 ASSERT_EQUAL_64(0x00000ff0, x4); 778 ASSERT_EQUAL_64(0x00000070, x5); 779 ASSERT_EQUAL_64(0x0000ff00, x6); 780 ASSERT_EQUAL_64(0x00000f00, x7); 781 ASSERT_EQUAL_64(0x00000ff0, x8); 782 ASSERT_EQUAL_64(0x00000000, x9); 783 ASSERT_EQUAL_64(0x0000ff00, x10); 784 ASSERT_EQUAL_64(0x000000f0, x11); 785 786 TEARDOWN(); 787 } 788 789 790 TEST(and_extend) { 791 SETUP(); 792 793 START(); 794 __ Mov(x0, 0xffffffffffffffff); 795 __ Mov(x1, 0x8000000080008081); 796 __ And(w6, w0, Operand(w1, UXTB)); 797 __ And(x7, x0, Operand(x1, UXTH, 1)); 798 __ And(w8, w0, Operand(w1, UXTW, 2)); 799 __ And(x9, x0, Operand(x1, UXTX, 3)); 800 __ And(w10, w0, Operand(w1, SXTB)); 801 __ And(x11, x0, Operand(x1, SXTH, 1)); 802 __ And(x12, x0, Operand(x1, SXTW, 2)); 803 __ And(x13, x0, Operand(x1, SXTX, 3)); 804 END(); 805 806 RUN(); 807 808 ASSERT_EQUAL_64(0x00000081, x6); 809 ASSERT_EQUAL_64(0x0000000000010102, x7); 810 ASSERT_EQUAL_64(0x00020204, x8); 811 ASSERT_EQUAL_64(0x0000000400040408, x9); 812 ASSERT_EQUAL_64(0xffffff81, x10); 813 ASSERT_EQUAL_64(0xffffffffffff0102, x11); 814 ASSERT_EQUAL_64(0xfffffffe00020204, x12); 815 ASSERT_EQUAL_64(0x0000000400040408, x13); 816 817 TEARDOWN(); 818 } 819 820 821 TEST(ands) { 822 SETUP(); 823 824 START(); 825 __ Mov(x1, 0xf00000ff); 826 __ Ands(w0, w1, Operand(w1)); 827 END(); 828 829 RUN(); 830 831 ASSERT_EQUAL_NZCV(NFlag); 832 ASSERT_EQUAL_64(0xf00000ff, x0); 833 834 START(); 835 __ Mov(x0, 0xfff0); 836 __ Mov(x1, 0xf00000ff); 837 __ Ands(w0, w0, Operand(w1, LSR, 4)); 838 END(); 839 840 RUN(); 841 842 ASSERT_EQUAL_NZCV(ZFlag); 843 ASSERT_EQUAL_64(0x00000000, x0); 844 845 START(); 846 __ Mov(x0, 0x8000000000000000); 847 __ Mov(x1, 0x00000001); 848 __ Ands(x0, x0, Operand(x1, ROR, 1)); 849 END(); 850 851 RUN(); 852 853 ASSERT_EQUAL_NZCV(NFlag); 854 ASSERT_EQUAL_64(0x8000000000000000, x0); 855 856 START(); 857 __ Mov(x0, 0xfff0); 858 __ Ands(w0, w0, Operand(0xf)); 859 END(); 860 861 RUN(); 862 863 ASSERT_EQUAL_NZCV(ZFlag); 864 ASSERT_EQUAL_64(0x00000000, x0); 865 866 START(); 867 __ Mov(x0, 0xff000000); 868 __ Ands(w0, w0, Operand(0x80000000)); 869 END(); 870 871 RUN(); 872 873 ASSERT_EQUAL_NZCV(NFlag); 874 ASSERT_EQUAL_64(0x80000000, x0); 875 876 TEARDOWN(); 877 } 878 879 880 TEST(bic) { 881 SETUP(); 882 883 START(); 884 __ Mov(x0, 0xfff0); 885 __ Mov(x1, 0xf00000ff); 886 887 __ Bic(x2, x0, Operand(x1)); 888 __ Bic(w3, w0, Operand(w1, LSL, 4)); 889 __ Bic(x4, x0, Operand(x1, LSL, 4)); 890 __ Bic(x5, x0, Operand(x1, LSR, 1)); 891 __ Bic(w6, w0, Operand(w1, ASR, 20)); 892 __ Bic(x7, x0, Operand(x1, ASR, 20)); 893 __ Bic(w8, w0, Operand(w1, ROR, 28)); 894 __ Bic(x9, x0, Operand(x1, ROR, 24)); 895 __ Bic(x10, x0, Operand(0x1f)); 896 __ Bic(x11, x0, Operand(0x100)); 897 898 // Test bic into sp when the constant cannot be encoded in the immediate 899 // field. 900 // Use x20 to preserve sp. We check for the result via x21 because the 901 // test infrastructure requires that sp be restored to its original value. 902 __ Mov(x20, sp); 903 __ Mov(x0, 0xffffff); 904 __ Bic(sp, x0, Operand(0xabcdef)); 905 __ Mov(x21, sp); 906 __ Mov(sp, x20); 907 END(); 908 909 RUN(); 910 911 ASSERT_EQUAL_64(0x0000ff00, x2); 912 ASSERT_EQUAL_64(0x0000f000, x3); 913 ASSERT_EQUAL_64(0x0000f000, x4); 914 ASSERT_EQUAL_64(0x0000ff80, x5); 915 ASSERT_EQUAL_64(0x000000f0, x6); 916 ASSERT_EQUAL_64(0x0000f0f0, x7); 917 ASSERT_EQUAL_64(0x0000f000, x8); 918 ASSERT_EQUAL_64(0x0000ff00, x9); 919 ASSERT_EQUAL_64(0x0000ffe0, x10); 920 ASSERT_EQUAL_64(0x0000fef0, x11); 921 922 ASSERT_EQUAL_64(0x543210, x21); 923 924 TEARDOWN(); 925 } 926 927 928 TEST(bic_extend) { 929 SETUP(); 930 931 START(); 932 __ Mov(x0, 0xffffffffffffffff); 933 __ Mov(x1, 0x8000000080008081); 934 __ Bic(w6, w0, Operand(w1, UXTB)); 935 __ Bic(x7, x0, Operand(x1, UXTH, 1)); 936 __ Bic(w8, w0, Operand(w1, UXTW, 2)); 937 __ Bic(x9, x0, Operand(x1, UXTX, 3)); 938 __ Bic(w10, w0, Operand(w1, SXTB)); 939 __ Bic(x11, x0, Operand(x1, SXTH, 1)); 940 __ Bic(x12, x0, Operand(x1, SXTW, 2)); 941 __ Bic(x13, x0, Operand(x1, SXTX, 3)); 942 END(); 943 944 RUN(); 945 946 ASSERT_EQUAL_64(0xffffff7e, x6); 947 ASSERT_EQUAL_64(0xfffffffffffefefd, x7); 948 ASSERT_EQUAL_64(0xfffdfdfb, x8); 949 ASSERT_EQUAL_64(0xfffffffbfffbfbf7, x9); 950 ASSERT_EQUAL_64(0x0000007e, x10); 951 ASSERT_EQUAL_64(0x000000000000fefd, x11); 952 ASSERT_EQUAL_64(0x00000001fffdfdfb, x12); 953 ASSERT_EQUAL_64(0xfffffffbfffbfbf7, x13); 954 955 TEARDOWN(); 956 } 957 958 959 TEST(bics) { 960 SETUP(); 961 962 START(); 963 __ Mov(x1, 0xffff); 964 __ Bics(w0, w1, Operand(w1)); 965 END(); 966 967 RUN(); 968 969 ASSERT_EQUAL_NZCV(ZFlag); 970 ASSERT_EQUAL_64(0x00000000, x0); 971 972 START(); 973 __ Mov(x0, 0xffffffff); 974 __ Bics(w0, w0, Operand(w0, LSR, 1)); 975 END(); 976 977 RUN(); 978 979 ASSERT_EQUAL_NZCV(NFlag); 980 ASSERT_EQUAL_64(0x80000000, x0); 981 982 START(); 983 __ Mov(x0, 0x8000000000000000); 984 __ Mov(x1, 0x00000001); 985 __ Bics(x0, x0, Operand(x1, ROR, 1)); 986 END(); 987 988 RUN(); 989 990 ASSERT_EQUAL_NZCV(ZFlag); 991 ASSERT_EQUAL_64(0x00000000, x0); 992 993 START(); 994 __ Mov(x0, 0xffffffffffffffff); 995 __ Bics(x0, x0, 0x7fffffffffffffff); 996 END(); 997 998 RUN(); 999 1000 ASSERT_EQUAL_NZCV(NFlag); 1001 ASSERT_EQUAL_64(0x8000000000000000, x0); 1002 1003 START(); 1004 __ Mov(w0, 0xffff0000); 1005 __ Bics(w0, w0, 0xfffffff0); 1006 END(); 1007 1008 RUN(); 1009 1010 ASSERT_EQUAL_NZCV(ZFlag); 1011 ASSERT_EQUAL_64(0x00000000, x0); 1012 1013 TEARDOWN(); 1014 } 1015 1016 1017 TEST(eor) { 1018 SETUP(); 1019 1020 START(); 1021 __ Mov(x0, 0xfff0); 1022 __ Mov(x1, 0xf00000ff); 1023 1024 __ Eor(x2, x0, Operand(x1)); 1025 __ Eor(w3, w0, Operand(w1, LSL, 4)); 1026 __ Eor(x4, x0, Operand(x1, LSL, 4)); 1027 __ Eor(x5, x0, Operand(x1, LSR, 1)); 1028 __ Eor(w6, w0, Operand(w1, ASR, 20)); 1029 __ Eor(x7, x0, Operand(x1, ASR, 20)); 1030 __ Eor(w8, w0, Operand(w1, ROR, 28)); 1031 __ Eor(x9, x0, Operand(x1, ROR, 28)); 1032 __ Eor(w10, w0, 0xff00ff00); 1033 __ Eor(x11, x0, 0xff00ff00ff00ff00); 1034 END(); 1035 1036 RUN(); 1037 1038 ASSERT_EQUAL_64(0x00000000f000ff0f, x2); 1039 ASSERT_EQUAL_64(0x0000f000, x3); 1040 ASSERT_EQUAL_64(0x0000000f0000f000, x4); 1041 ASSERT_EQUAL_64(0x000000007800ff8f, x5); 1042 ASSERT_EQUAL_64(0xffff00f0, x6); 1043 ASSERT_EQUAL_64(0x000000000000f0f0, x7); 1044 ASSERT_EQUAL_64(0x0000f00f, x8); 1045 ASSERT_EQUAL_64(0x00000ff00000ffff, x9); 1046 ASSERT_EQUAL_64(0xff0000f0, x10); 1047 ASSERT_EQUAL_64(0xff00ff00ff0000f0, x11); 1048 1049 TEARDOWN(); 1050 } 1051 1052 TEST(eor_extend) { 1053 SETUP(); 1054 1055 START(); 1056 __ Mov(x0, 0x1111111111111111); 1057 __ Mov(x1, 0x8000000080008081); 1058 __ Eor(w6, w0, Operand(w1, UXTB)); 1059 __ Eor(x7, x0, Operand(x1, UXTH, 1)); 1060 __ Eor(w8, w0, Operand(w1, UXTW, 2)); 1061 __ Eor(x9, x0, Operand(x1, UXTX, 3)); 1062 __ Eor(w10, w0, Operand(w1, SXTB)); 1063 __ Eor(x11, x0, Operand(x1, SXTH, 1)); 1064 __ Eor(x12, x0, Operand(x1, SXTW, 2)); 1065 __ Eor(x13, x0, Operand(x1, SXTX, 3)); 1066 END(); 1067 1068 RUN(); 1069 1070 ASSERT_EQUAL_64(0x11111190, x6); 1071 ASSERT_EQUAL_64(0x1111111111101013, x7); 1072 ASSERT_EQUAL_64(0x11131315, x8); 1073 ASSERT_EQUAL_64(0x1111111511151519, x9); 1074 ASSERT_EQUAL_64(0xeeeeee90, x10); 1075 ASSERT_EQUAL_64(0xeeeeeeeeeeee1013, x11); 1076 ASSERT_EQUAL_64(0xeeeeeeef11131315, x12); 1077 ASSERT_EQUAL_64(0x1111111511151519, x13); 1078 1079 TEARDOWN(); 1080 } 1081 1082 1083 TEST(eon) { 1084 SETUP(); 1085 1086 START(); 1087 __ Mov(x0, 0xfff0); 1088 __ Mov(x1, 0xf00000ff); 1089 1090 __ Eon(x2, x0, Operand(x1)); 1091 __ Eon(w3, w0, Operand(w1, LSL, 4)); 1092 __ Eon(x4, x0, Operand(x1, LSL, 4)); 1093 __ Eon(x5, x0, Operand(x1, LSR, 1)); 1094 __ Eon(w6, w0, Operand(w1, ASR, 20)); 1095 __ Eon(x7, x0, Operand(x1, ASR, 20)); 1096 __ Eon(w8, w0, Operand(w1, ROR, 28)); 1097 __ Eon(x9, x0, Operand(x1, ROR, 28)); 1098 __ Eon(w10, w0, 0x03c003c0); 1099 __ Eon(x11, x0, 0x0000100000001000); 1100 END(); 1101 1102 RUN(); 1103 1104 ASSERT_EQUAL_64(0xffffffff0fff00f0, x2); 1105 ASSERT_EQUAL_64(0xffff0fff, x3); 1106 ASSERT_EQUAL_64(0xfffffff0ffff0fff, x4); 1107 ASSERT_EQUAL_64(0xffffffff87ff0070, x5); 1108 ASSERT_EQUAL_64(0x0000ff0f, x6); 1109 ASSERT_EQUAL_64(0xffffffffffff0f0f, x7); 1110 ASSERT_EQUAL_64(0xffff0ff0, x8); 1111 ASSERT_EQUAL_64(0xfffff00fffff0000, x9); 1112 ASSERT_EQUAL_64(0xfc3f03cf, x10); 1113 ASSERT_EQUAL_64(0xffffefffffff100f, x11); 1114 1115 TEARDOWN(); 1116 } 1117 1118 1119 TEST(eon_extend) { 1120 SETUP(); 1121 1122 START(); 1123 __ Mov(x0, 0x1111111111111111); 1124 __ Mov(x1, 0x8000000080008081); 1125 __ Eon(w6, w0, Operand(w1, UXTB)); 1126 __ Eon(x7, x0, Operand(x1, UXTH, 1)); 1127 __ Eon(w8, w0, Operand(w1, UXTW, 2)); 1128 __ Eon(x9, x0, Operand(x1, UXTX, 3)); 1129 __ Eon(w10, w0, Operand(w1, SXTB)); 1130 __ Eon(x11, x0, Operand(x1, SXTH, 1)); 1131 __ Eon(x12, x0, Operand(x1, SXTW, 2)); 1132 __ Eon(x13, x0, Operand(x1, SXTX, 3)); 1133 END(); 1134 1135 RUN(); 1136 1137 ASSERT_EQUAL_64(0xeeeeee6f, x6); 1138 ASSERT_EQUAL_64(0xeeeeeeeeeeefefec, x7); 1139 ASSERT_EQUAL_64(0xeeececea, x8); 1140 ASSERT_EQUAL_64(0xeeeeeeeaeeeaeae6, x9); 1141 ASSERT_EQUAL_64(0x1111116f, x10); 1142 ASSERT_EQUAL_64(0x111111111111efec, x11); 1143 ASSERT_EQUAL_64(0x11111110eeececea, x12); 1144 ASSERT_EQUAL_64(0xeeeeeeeaeeeaeae6, x13); 1145 1146 TEARDOWN(); 1147 } 1148 1149 1150 TEST(mul) { 1151 SETUP(); 1152 1153 START(); 1154 __ Mov(x25, 0); 1155 __ Mov(x26, 1); 1156 __ Mov(x18, 0xffffffff); 1157 __ Mov(x19, 0xffffffffffffffff); 1158 1159 __ Mul(w0, w25, w25); 1160 __ Mul(w1, w25, w26); 1161 __ Mul(w2, w26, w18); 1162 __ Mul(w3, w18, w19); 1163 __ Mul(x4, x25, x25); 1164 __ Mul(x5, x26, x18); 1165 __ Mul(x6, x18, x19); 1166 __ Mul(x7, x19, x19); 1167 __ Smull(x8, w26, w18); 1168 __ Smull(x9, w18, w18); 1169 __ Smull(x10, w19, w19); 1170 __ Mneg(w11, w25, w25); 1171 __ Mneg(w12, w25, w26); 1172 __ Mneg(w13, w26, w18); 1173 __ Mneg(w14, w18, w19); 1174 __ Mneg(x20, x25, x25); 1175 __ Mneg(x21, x26, x18); 1176 __ Mneg(x22, x18, x19); 1177 __ Mneg(x23, x19, x19); 1178 END(); 1179 1180 RUN(); 1181 1182 ASSERT_EQUAL_64(0, x0); 1183 ASSERT_EQUAL_64(0, x1); 1184 ASSERT_EQUAL_64(0xffffffff, x2); 1185 ASSERT_EQUAL_64(1, x3); 1186 ASSERT_EQUAL_64(0, x4); 1187 ASSERT_EQUAL_64(0xffffffff, x5); 1188 ASSERT_EQUAL_64(0xffffffff00000001, x6); 1189 ASSERT_EQUAL_64(1, x7); 1190 ASSERT_EQUAL_64(0xffffffffffffffff, x8); 1191 ASSERT_EQUAL_64(1, x9); 1192 ASSERT_EQUAL_64(1, x10); 1193 ASSERT_EQUAL_64(0, x11); 1194 ASSERT_EQUAL_64(0, x12); 1195 ASSERT_EQUAL_64(1, x13); 1196 ASSERT_EQUAL_64(0xffffffff, x14); 1197 ASSERT_EQUAL_64(0, x20); 1198 ASSERT_EQUAL_64(0xffffffff00000001, x21); 1199 ASSERT_EQUAL_64(0xffffffff, x22); 1200 ASSERT_EQUAL_64(0xffffffffffffffff, x23); 1201 1202 TEARDOWN(); 1203 } 1204 1205 1206 static void SmullHelper(int64_t expected, int64_t a, int64_t b) { 1207 SETUP(); 1208 START(); 1209 __ Mov(w0, a); 1210 __ Mov(w1, b); 1211 __ Smull(x2, w0, w1); 1212 END(); 1213 RUN(); 1214 ASSERT_EQUAL_64(expected, x2); 1215 TEARDOWN(); 1216 } 1217 1218 1219 TEST(smull) { 1220 SmullHelper(0, 0, 0); 1221 SmullHelper(1, 1, 1); 1222 SmullHelper(-1, -1, 1); 1223 SmullHelper(1, -1, -1); 1224 SmullHelper(0xffffffff80000000, 0x80000000, 1); 1225 SmullHelper(0x0000000080000000, 0x00010000, 0x00008000); 1226 } 1227 1228 1229 TEST(madd) { 1230 SETUP(); 1231 1232 START(); 1233 __ Mov(x16, 0); 1234 __ Mov(x17, 1); 1235 __ Mov(x18, 0xffffffff); 1236 __ Mov(x19, 0xffffffffffffffff); 1237 1238 __ Madd(w0, w16, w16, w16); 1239 __ Madd(w1, w16, w16, w17); 1240 __ Madd(w2, w16, w16, w18); 1241 __ Madd(w3, w16, w16, w19); 1242 __ Madd(w4, w16, w17, w17); 1243 __ Madd(w5, w17, w17, w18); 1244 __ Madd(w6, w17, w17, w19); 1245 __ Madd(w7, w17, w18, w16); 1246 __ Madd(w8, w17, w18, w18); 1247 __ Madd(w9, w18, w18, w17); 1248 __ Madd(w10, w18, w19, w18); 1249 __ Madd(w11, w19, w19, w19); 1250 1251 __ Madd(x12, x16, x16, x16); 1252 __ Madd(x13, x16, x16, x17); 1253 __ Madd(x14, x16, x16, x18); 1254 __ Madd(x15, x16, x16, x19); 1255 __ Madd(x20, x16, x17, x17); 1256 __ Madd(x21, x17, x17, x18); 1257 __ Madd(x22, x17, x17, x19); 1258 __ Madd(x23, x17, x18, x16); 1259 __ Madd(x24, x17, x18, x18); 1260 __ Madd(x25, x18, x18, x17); 1261 __ Madd(x26, x18, x19, x18); 1262 __ Madd(x27, x19, x19, x19); 1263 1264 END(); 1265 1266 RUN(); 1267 1268 ASSERT_EQUAL_64(0, x0); 1269 ASSERT_EQUAL_64(1, x1); 1270 ASSERT_EQUAL_64(0xffffffff, x2); 1271 ASSERT_EQUAL_64(0xffffffff, x3); 1272 ASSERT_EQUAL_64(1, x4); 1273 ASSERT_EQUAL_64(0, x5); 1274 ASSERT_EQUAL_64(0, x6); 1275 ASSERT_EQUAL_64(0xffffffff, x7); 1276 ASSERT_EQUAL_64(0xfffffffe, x8); 1277 ASSERT_EQUAL_64(2, x9); 1278 ASSERT_EQUAL_64(0, x10); 1279 ASSERT_EQUAL_64(0, x11); 1280 1281 ASSERT_EQUAL_64(0, x12); 1282 ASSERT_EQUAL_64(1, x13); 1283 ASSERT_EQUAL_64(0x00000000ffffffff, x14); 1284 ASSERT_EQUAL_64(0xffffffffffffffff, x15); 1285 ASSERT_EQUAL_64(1, x20); 1286 ASSERT_EQUAL_64(0x0000000100000000, x21); 1287 ASSERT_EQUAL_64(0, x22); 1288 ASSERT_EQUAL_64(0x00000000ffffffff, x23); 1289 ASSERT_EQUAL_64(0x00000001fffffffe, x24); 1290 ASSERT_EQUAL_64(0xfffffffe00000002, x25); 1291 ASSERT_EQUAL_64(0, x26); 1292 ASSERT_EQUAL_64(0, x27); 1293 1294 TEARDOWN(); 1295 } 1296 1297 1298 TEST(msub) { 1299 SETUP(); 1300 1301 START(); 1302 __ Mov(x16, 0); 1303 __ Mov(x17, 1); 1304 __ Mov(x18, 0xffffffff); 1305 __ Mov(x19, 0xffffffffffffffff); 1306 1307 __ Msub(w0, w16, w16, w16); 1308 __ Msub(w1, w16, w16, w17); 1309 __ Msub(w2, w16, w16, w18); 1310 __ Msub(w3, w16, w16, w19); 1311 __ Msub(w4, w16, w17, w17); 1312 __ Msub(w5, w17, w17, w18); 1313 __ Msub(w6, w17, w17, w19); 1314 __ Msub(w7, w17, w18, w16); 1315 __ Msub(w8, w17, w18, w18); 1316 __ Msub(w9, w18, w18, w17); 1317 __ Msub(w10, w18, w19, w18); 1318 __ Msub(w11, w19, w19, w19); 1319 1320 __ Msub(x12, x16, x16, x16); 1321 __ Msub(x13, x16, x16, x17); 1322 __ Msub(x14, x16, x16, x18); 1323 __ Msub(x15, x16, x16, x19); 1324 __ Msub(x20, x16, x17, x17); 1325 __ Msub(x21, x17, x17, x18); 1326 __ Msub(x22, x17, x17, x19); 1327 __ Msub(x23, x17, x18, x16); 1328 __ Msub(x24, x17, x18, x18); 1329 __ Msub(x25, x18, x18, x17); 1330 __ Msub(x26, x18, x19, x18); 1331 __ Msub(x27, x19, x19, x19); 1332 1333 END(); 1334 1335 RUN(); 1336 1337 ASSERT_EQUAL_64(0, x0); 1338 ASSERT_EQUAL_64(1, x1); 1339 ASSERT_EQUAL_64(0xffffffff, x2); 1340 ASSERT_EQUAL_64(0xffffffff, x3); 1341 ASSERT_EQUAL_64(1, x4); 1342 ASSERT_EQUAL_64(0xfffffffe, x5); 1343 ASSERT_EQUAL_64(0xfffffffe, x6); 1344 ASSERT_EQUAL_64(1, x7); 1345 ASSERT_EQUAL_64(0, x8); 1346 ASSERT_EQUAL_64(0, x9); 1347 ASSERT_EQUAL_64(0xfffffffe, x10); 1348 ASSERT_EQUAL_64(0xfffffffe, x11); 1349 1350 ASSERT_EQUAL_64(0, x12); 1351 ASSERT_EQUAL_64(1, x13); 1352 ASSERT_EQUAL_64(0x00000000ffffffff, x14); 1353 ASSERT_EQUAL_64(0xffffffffffffffff, x15); 1354 ASSERT_EQUAL_64(1, x20); 1355 ASSERT_EQUAL_64(0x00000000fffffffe, x21); 1356 ASSERT_EQUAL_64(0xfffffffffffffffe, x22); 1357 ASSERT_EQUAL_64(0xffffffff00000001, x23); 1358 ASSERT_EQUAL_64(0, x24); 1359 ASSERT_EQUAL_64(0x0000000200000000, x25); 1360 ASSERT_EQUAL_64(0x00000001fffffffe, x26); 1361 ASSERT_EQUAL_64(0xfffffffffffffffe, x27); 1362 1363 TEARDOWN(); 1364 } 1365 1366 1367 TEST(smulh) { 1368 SETUP(); 1369 1370 START(); 1371 __ Mov(x20, 0); 1372 __ Mov(x21, 1); 1373 __ Mov(x22, 0x0000000100000000); 1374 __ Mov(x23, 0x0000000012345678); 1375 __ Mov(x24, 0x0123456789abcdef); 1376 __ Mov(x25, 0x0000000200000000); 1377 __ Mov(x26, 0x8000000000000000); 1378 __ Mov(x27, 0xffffffffffffffff); 1379 __ Mov(x28, 0x5555555555555555); 1380 __ Mov(x29, 0xaaaaaaaaaaaaaaaa); 1381 1382 __ Smulh(x0, x20, x24); 1383 __ Smulh(x1, x21, x24); 1384 __ Smulh(x2, x22, x23); 1385 __ Smulh(x3, x22, x24); 1386 __ Smulh(x4, x24, x25); 1387 __ Smulh(x5, x23, x27); 1388 __ Smulh(x6, x26, x26); 1389 __ Smulh(x7, x26, x27); 1390 __ Smulh(x8, x27, x27); 1391 __ Smulh(x9, x28, x28); 1392 __ Smulh(x10, x28, x29); 1393 __ Smulh(x11, x29, x29); 1394 END(); 1395 1396 RUN(); 1397 1398 ASSERT_EQUAL_64(0, x0); 1399 ASSERT_EQUAL_64(0, x1); 1400 ASSERT_EQUAL_64(0, x2); 1401 ASSERT_EQUAL_64(0x0000000001234567, x3); 1402 ASSERT_EQUAL_64(0x0000000002468acf, x4); 1403 ASSERT_EQUAL_64(0xffffffffffffffff, x5); 1404 ASSERT_EQUAL_64(0x4000000000000000, x6); 1405 ASSERT_EQUAL_64(0, x7); 1406 ASSERT_EQUAL_64(0, x8); 1407 ASSERT_EQUAL_64(0x1c71c71c71c71c71, x9); 1408 ASSERT_EQUAL_64(0xe38e38e38e38e38e, x10); 1409 ASSERT_EQUAL_64(0x1c71c71c71c71c72, x11); 1410 1411 TEARDOWN(); 1412 } 1413 1414 1415 TEST(umulh) { 1416 SETUP(); 1417 1418 START(); 1419 __ Mov(x20, 0); 1420 __ Mov(x21, 1); 1421 __ Mov(x22, 0x0000000100000000); 1422 __ Mov(x23, 0x0000000012345678); 1423 __ Mov(x24, 0x0123456789abcdef); 1424 __ Mov(x25, 0x0000000200000000); 1425 __ Mov(x26, 0x8000000000000000); 1426 __ Mov(x27, 0xffffffffffffffff); 1427 __ Mov(x28, 0x5555555555555555); 1428 __ Mov(x29, 0xaaaaaaaaaaaaaaaa); 1429 1430 __ Umulh(x0, x20, x24); 1431 __ Umulh(x1, x21, x24); 1432 __ Umulh(x2, x22, x23); 1433 __ Umulh(x3, x22, x24); 1434 __ Umulh(x4, x24, x25); 1435 __ Umulh(x5, x23, x27); 1436 __ Umulh(x6, x26, x26); 1437 __ Umulh(x7, x26, x27); 1438 __ Umulh(x8, x27, x27); 1439 __ Umulh(x9, x28, x28); 1440 __ Umulh(x10, x28, x29); 1441 __ Umulh(x11, x29, x29); 1442 END(); 1443 1444 RUN(); 1445 1446 ASSERT_EQUAL_64(0, x0); 1447 ASSERT_EQUAL_64(0, x1); 1448 ASSERT_EQUAL_64(0, x2); 1449 ASSERT_EQUAL_64(0x0000000001234567, x3); 1450 ASSERT_EQUAL_64(0x0000000002468acf, x4); 1451 ASSERT_EQUAL_64(0x0000000012345677, x5); 1452 ASSERT_EQUAL_64(0x4000000000000000, x6); 1453 ASSERT_EQUAL_64(0x7fffffffffffffff, x7); 1454 ASSERT_EQUAL_64(0xfffffffffffffffe, x8); 1455 ASSERT_EQUAL_64(0x1c71c71c71c71c71, x9); 1456 ASSERT_EQUAL_64(0x38e38e38e38e38e3, x10); 1457 ASSERT_EQUAL_64(0x71c71c71c71c71c6, x11); 1458 1459 TEARDOWN(); 1460 } 1461 1462 1463 TEST(smaddl_umaddl_umull) { 1464 SETUP(); 1465 1466 START(); 1467 __ Mov(x17, 1); 1468 __ Mov(x18, 0x00000000ffffffff); 1469 __ Mov(x19, 0xffffffffffffffff); 1470 __ Mov(x20, 4); 1471 __ Mov(x21, 0x0000000200000000); 1472 1473 __ Smaddl(x9, w17, w18, x20); 1474 __ Smaddl(x10, w18, w18, x20); 1475 __ Smaddl(x11, w19, w19, x20); 1476 __ Smaddl(x12, w19, w19, x21); 1477 __ Umaddl(x13, w17, w18, x20); 1478 __ Umaddl(x14, w18, w18, x20); 1479 __ Umaddl(x15, w19, w19, x20); 1480 __ Umaddl(x22, w19, w19, x21); 1481 __ Umull(x24, w19, w19); 1482 __ Umull(x25, w17, w18); 1483 END(); 1484 1485 RUN(); 1486 1487 ASSERT_EQUAL_64(3, x9); 1488 ASSERT_EQUAL_64(5, x10); 1489 ASSERT_EQUAL_64(5, x11); 1490 ASSERT_EQUAL_64(0x0000000200000001, x12); 1491 ASSERT_EQUAL_64(0x0000000100000003, x13); 1492 ASSERT_EQUAL_64(0xfffffffe00000005, x14); 1493 ASSERT_EQUAL_64(0xfffffffe00000005, x15); 1494 ASSERT_EQUAL_64(1, x22); 1495 ASSERT_EQUAL_64(0xfffffffe00000001, x24); 1496 ASSERT_EQUAL_64(0x00000000ffffffff, x25); 1497 1498 TEARDOWN(); 1499 } 1500 1501 1502 TEST(smsubl_umsubl) { 1503 SETUP(); 1504 1505 START(); 1506 __ Mov(x17, 1); 1507 __ Mov(x18, 0x00000000ffffffff); 1508 __ Mov(x19, 0xffffffffffffffff); 1509 __ Mov(x20, 4); 1510 __ Mov(x21, 0x0000000200000000); 1511 1512 __ Smsubl(x9, w17, w18, x20); 1513 __ Smsubl(x10, w18, w18, x20); 1514 __ Smsubl(x11, w19, w19, x20); 1515 __ Smsubl(x12, w19, w19, x21); 1516 __ Umsubl(x13, w17, w18, x20); 1517 __ Umsubl(x14, w18, w18, x20); 1518 __ Umsubl(x15, w19, w19, x20); 1519 __ Umsubl(x22, w19, w19, x21); 1520 END(); 1521 1522 RUN(); 1523 1524 ASSERT_EQUAL_64(5, x9); 1525 ASSERT_EQUAL_64(3, x10); 1526 ASSERT_EQUAL_64(3, x11); 1527 ASSERT_EQUAL_64(0x00000001ffffffff, x12); 1528 ASSERT_EQUAL_64(0xffffffff00000005, x13); 1529 ASSERT_EQUAL_64(0x0000000200000003, x14); 1530 ASSERT_EQUAL_64(0x0000000200000003, x15); 1531 ASSERT_EQUAL_64(0x00000003ffffffff, x22); 1532 1533 TEARDOWN(); 1534 } 1535 1536 1537 TEST(div) { 1538 SETUP(); 1539 1540 START(); 1541 __ Mov(x16, 1); 1542 __ Mov(x17, 0xffffffff); 1543 __ Mov(x18, 0xffffffffffffffff); 1544 __ Mov(x19, 0x80000000); 1545 __ Mov(x20, 0x8000000000000000); 1546 __ Mov(x21, 2); 1547 1548 __ Udiv(w0, w16, w16); 1549 __ Udiv(w1, w17, w16); 1550 __ Sdiv(w2, w16, w16); 1551 __ Sdiv(w3, w16, w17); 1552 __ Sdiv(w4, w17, w18); 1553 1554 __ Udiv(x5, x16, x16); 1555 __ Udiv(x6, x17, x18); 1556 __ Sdiv(x7, x16, x16); 1557 __ Sdiv(x8, x16, x17); 1558 __ Sdiv(x9, x17, x18); 1559 1560 __ Udiv(w10, w19, w21); 1561 __ Sdiv(w11, w19, w21); 1562 __ Udiv(x12, x19, x21); 1563 __ Sdiv(x13, x19, x21); 1564 __ Udiv(x14, x20, x21); 1565 __ Sdiv(x15, x20, x21); 1566 1567 __ Udiv(w22, w19, w17); 1568 __ Sdiv(w23, w19, w17); 1569 __ Udiv(x24, x20, x18); 1570 __ Sdiv(x25, x20, x18); 1571 1572 __ Udiv(x26, x16, x21); 1573 __ Sdiv(x27, x16, x21); 1574 __ Udiv(x28, x18, x21); 1575 __ Sdiv(x29, x18, x21); 1576 1577 __ Mov(x17, 0); 1578 __ Udiv(w18, w16, w17); 1579 __ Sdiv(w19, w16, w17); 1580 __ Udiv(x20, x16, x17); 1581 __ Sdiv(x21, x16, x17); 1582 END(); 1583 1584 RUN(); 1585 1586 ASSERT_EQUAL_64(1, x0); 1587 ASSERT_EQUAL_64(0xffffffff, x1); 1588 ASSERT_EQUAL_64(1, x2); 1589 ASSERT_EQUAL_64(0xffffffff, x3); 1590 ASSERT_EQUAL_64(1, x4); 1591 ASSERT_EQUAL_64(1, x5); 1592 ASSERT_EQUAL_64(0, x6); 1593 ASSERT_EQUAL_64(1, x7); 1594 ASSERT_EQUAL_64(0, x8); 1595 ASSERT_EQUAL_64(0xffffffff00000001, x9); 1596 ASSERT_EQUAL_64(0x40000000, x10); 1597 ASSERT_EQUAL_64(0xc0000000, x11); 1598 ASSERT_EQUAL_64(0x0000000040000000, x12); 1599 ASSERT_EQUAL_64(0x0000000040000000, x13); 1600 ASSERT_EQUAL_64(0x4000000000000000, x14); 1601 ASSERT_EQUAL_64(0xc000000000000000, x15); 1602 ASSERT_EQUAL_64(0, x22); 1603 ASSERT_EQUAL_64(0x80000000, x23); 1604 ASSERT_EQUAL_64(0, x24); 1605 ASSERT_EQUAL_64(0x8000000000000000, x25); 1606 ASSERT_EQUAL_64(0, x26); 1607 ASSERT_EQUAL_64(0, x27); 1608 ASSERT_EQUAL_64(0x7fffffffffffffff, x28); 1609 ASSERT_EQUAL_64(0, x29); 1610 ASSERT_EQUAL_64(0, x18); 1611 ASSERT_EQUAL_64(0, x19); 1612 ASSERT_EQUAL_64(0, x20); 1613 ASSERT_EQUAL_64(0, x21); 1614 1615 TEARDOWN(); 1616 } 1617 1618 1619 TEST(rbit_rev) { 1620 SETUP(); 1621 1622 START(); 1623 __ Mov(x24, 0xfedcba9876543210); 1624 __ Rbit(w0, w24); 1625 __ Rbit(x1, x24); 1626 __ Rev16(w2, w24); 1627 __ Rev16(x3, x24); 1628 __ Rev(w4, w24); 1629 __ Rev32(x5, x24); 1630 __ Rev(x6, x24); 1631 END(); 1632 1633 RUN(); 1634 1635 ASSERT_EQUAL_64(0x084c2a6e, x0); 1636 ASSERT_EQUAL_64(0x084c2a6e195d3b7f, x1); 1637 ASSERT_EQUAL_64(0x54761032, x2); 1638 ASSERT_EQUAL_64(0xdcfe98ba54761032, x3); 1639 ASSERT_EQUAL_64(0x10325476, x4); 1640 ASSERT_EQUAL_64(0x98badcfe10325476, x5); 1641 ASSERT_EQUAL_64(0x1032547698badcfe, x6); 1642 1643 TEARDOWN(); 1644 } 1645 1646 1647 TEST(clz_cls) { 1648 SETUP(); 1649 1650 START(); 1651 __ Mov(x24, 0x0008000000800000); 1652 __ Mov(x25, 0xff800000fff80000); 1653 __ Mov(x26, 0); 1654 __ Clz(w0, w24); 1655 __ Clz(x1, x24); 1656 __ Clz(w2, w25); 1657 __ Clz(x3, x25); 1658 __ Clz(w4, w26); 1659 __ Clz(x5, x26); 1660 __ Cls(w6, w24); 1661 __ Cls(x7, x24); 1662 __ Cls(w8, w25); 1663 __ Cls(x9, x25); 1664 __ Cls(w10, w26); 1665 __ Cls(x11, x26); 1666 END(); 1667 1668 RUN(); 1669 1670 ASSERT_EQUAL_64(8, x0); 1671 ASSERT_EQUAL_64(12, x1); 1672 ASSERT_EQUAL_64(0, x2); 1673 ASSERT_EQUAL_64(0, x3); 1674 ASSERT_EQUAL_64(32, x4); 1675 ASSERT_EQUAL_64(64, x5); 1676 ASSERT_EQUAL_64(7, x6); 1677 ASSERT_EQUAL_64(11, x7); 1678 ASSERT_EQUAL_64(12, x8); 1679 ASSERT_EQUAL_64(8, x9); 1680 ASSERT_EQUAL_64(31, x10); 1681 ASSERT_EQUAL_64(63, x11); 1682 1683 TEARDOWN(); 1684 } 1685 1686 1687 TEST(label) { 1688 SETUP(); 1689 1690 Label label_1, label_2, label_3, label_4; 1691 1692 START(); 1693 __ Mov(x0, 0x1); 1694 __ Mov(x1, 0x0); 1695 __ Mov(x22, lr); // Save lr. 1696 1697 __ B(&label_1); 1698 __ B(&label_1); 1699 __ B(&label_1); // Multiple branches to the same label. 1700 __ Mov(x0, 0x0); 1701 __ Bind(&label_2); 1702 __ B(&label_3); // Forward branch. 1703 __ Mov(x0, 0x0); 1704 __ Bind(&label_1); 1705 __ B(&label_2); // Backward branch. 1706 __ Mov(x0, 0x0); 1707 __ Bind(&label_3); 1708 __ Bl(&label_4); 1709 END(); 1710 1711 __ Bind(&label_4); 1712 __ Mov(x1, 0x1); 1713 __ Mov(lr, x22); 1714 END(); 1715 1716 RUN(); 1717 1718 ASSERT_EQUAL_64(0x1, x0); 1719 ASSERT_EQUAL_64(0x1, x1); 1720 1721 TEARDOWN(); 1722 } 1723 1724 1725 TEST(label_2) { 1726 SETUP(); 1727 1728 Label label_1, label_2, label_3; 1729 Label first_jump_to_3; 1730 1731 START(); 1732 __ Mov(x0, 0x0); 1733 1734 __ B(&label_1); 1735 ptrdiff_t offset_2 = masm.GetCursorOffset(); 1736 __ Orr(x0, x0, 1 << 1); 1737 __ B(&label_3); 1738 ptrdiff_t offset_1 = masm.GetCursorOffset(); 1739 __ Orr(x0, x0, 1 << 0); 1740 __ B(&label_2); 1741 ptrdiff_t offset_3 = masm.GetCursorOffset(); 1742 __ Tbz(x0, 2, &first_jump_to_3); 1743 __ Orr(x0, x0, 1 << 3); 1744 __ Bind(&first_jump_to_3); 1745 __ Orr(x0, x0, 1 << 2); 1746 __ Tbz(x0, 3, &label_3); 1747 1748 // Labels 1, 2, and 3 are bound before the current buffer offset. Branches to 1749 // label_1 and label_2 branch respectively forward and backward. Branches to 1750 // label 3 include both forward and backward branches. 1751 masm.BindToOffset(&label_1, offset_1); 1752 masm.BindToOffset(&label_2, offset_2); 1753 masm.BindToOffset(&label_3, offset_3); 1754 1755 END(); 1756 1757 RUN(); 1758 1759 ASSERT_EQUAL_64(0xf, x0); 1760 1761 TEARDOWN(); 1762 } 1763 1764 1765 TEST(adr) { 1766 SETUP(); 1767 1768 Label label_1, label_2, label_3, label_4; 1769 1770 START(); 1771 __ Mov(x0, 0x0); // Set to non-zero to indicate failure. 1772 __ Adr(x1, &label_3); // Set to zero to indicate success. 1773 1774 __ Adr(x2, &label_1); // Multiple forward references to the same label. 1775 __ Adr(x3, &label_1); 1776 __ Adr(x4, &label_1); 1777 1778 __ Bind(&label_2); 1779 __ Eor(x5, x2, Operand(x3)); // Ensure that x2,x3 and x4 are identical. 1780 __ Eor(x6, x2, Operand(x4)); 1781 __ Orr(x0, x0, Operand(x5)); 1782 __ Orr(x0, x0, Operand(x6)); 1783 __ Br(x2); // label_1, label_3 1784 1785 __ Bind(&label_3); 1786 __ Adr(x2, &label_3); // Self-reference (offset 0). 1787 __ Eor(x1, x1, Operand(x2)); 1788 __ Adr(x2, &label_4); // Simple forward reference. 1789 __ Br(x2); // label_4 1790 1791 __ Bind(&label_1); 1792 __ Adr(x2, &label_3); // Multiple reverse references to the same label. 1793 __ Adr(x3, &label_3); 1794 __ Adr(x4, &label_3); 1795 __ Adr(x5, &label_2); // Simple reverse reference. 1796 __ Br(x5); // label_2 1797 1798 __ Bind(&label_4); 1799 END(); 1800 1801 RUN(); 1802 1803 ASSERT_EQUAL_64(0x0, x0); 1804 ASSERT_EQUAL_64(0x0, x1); 1805 1806 TEARDOWN(); 1807 } 1808 1809 1810 // Simple adrp tests: check that labels are linked and handled properly. 1811 // This is similar to the adr test, but all the adrp instructions are put on the 1812 // same page so that they return the same value. 1813 TEST(adrp) { 1814 Label start; 1815 Label label_1, label_2, label_3; 1816 1817 SETUP_CUSTOM(2 * kPageSize, PageOffsetDependentCode); 1818 START(); 1819 1820 // Waste space until the start of a page. 1821 { 1822 ExactAssemblyScope scope(&masm, 1823 kPageSize, 1824 ExactAssemblyScope::kMaximumSize); 1825 const uintptr_t kPageOffsetMask = kPageSize - 1; 1826 while ((masm.GetCursorAddress<uintptr_t>() & kPageOffsetMask) != 0) { 1827 __ b(&start); 1828 } 1829 __ bind(&start); 1830 } 1831 1832 // Simple forward reference. 1833 __ Adrp(x0, &label_2); 1834 1835 __ Bind(&label_1); 1836 1837 // Multiple forward references to the same label. 1838 __ Adrp(x1, &label_3); 1839 __ Adrp(x2, &label_3); 1840 __ Adrp(x3, &label_3); 1841 1842 __ Bind(&label_2); 1843 1844 // Self-reference (offset 0). 1845 __ Adrp(x4, &label_2); 1846 1847 __ Bind(&label_3); 1848 1849 // Simple reverse reference. 1850 __ Adrp(x5, &label_1); 1851 1852 // Multiple reverse references to the same label. 1853 __ Adrp(x6, &label_2); 1854 __ Adrp(x7, &label_2); 1855 __ Adrp(x8, &label_2); 1856 1857 VIXL_ASSERT(masm.GetSizeOfCodeGeneratedSince(&start) < kPageSize); 1858 END(); 1859 RUN_CUSTOM(); 1860 1861 uint64_t expected = reinterpret_cast<uint64_t>( 1862 AlignDown(masm.GetLabelAddress<uint64_t*>(&start), kPageSize)); 1863 ASSERT_EQUAL_64(expected, x0); 1864 ASSERT_EQUAL_64(expected, x1); 1865 ASSERT_EQUAL_64(expected, x2); 1866 ASSERT_EQUAL_64(expected, x3); 1867 ASSERT_EQUAL_64(expected, x4); 1868 ASSERT_EQUAL_64(expected, x5); 1869 ASSERT_EQUAL_64(expected, x6); 1870 ASSERT_EQUAL_64(expected, x7); 1871 ASSERT_EQUAL_64(expected, x8); 1872 1873 TEARDOWN_CUSTOM(); 1874 } 1875 1876 1877 static void AdrpPageBoundaryHelper(unsigned offset_into_page) { 1878 VIXL_ASSERT(offset_into_page < kPageSize); 1879 VIXL_ASSERT((offset_into_page % kInstructionSize) == 0); 1880 1881 const uintptr_t kPageOffsetMask = kPageSize - 1; 1882 1883 // The test label is always bound on page 0. Adrp instructions are generated 1884 // on pages from kStartPage to kEndPage (inclusive). 1885 const int kStartPage = -16; 1886 const int kEndPage = 16; 1887 const int kMaxCodeSize = (kEndPage - kStartPage + 2) * kPageSize; 1888 1889 SETUP_CUSTOM(kMaxCodeSize, PageOffsetDependentCode); 1890 START(); 1891 1892 Label test; 1893 Label start; 1894 1895 { 1896 ExactAssemblyScope scope(&masm, 1897 kMaxCodeSize, 1898 ExactAssemblyScope::kMaximumSize); 1899 // Initialize NZCV with `eq` flags. 1900 __ cmp(wzr, wzr); 1901 // Waste space until the start of a page. 1902 while ((masm.GetCursorAddress<uintptr_t>() & kPageOffsetMask) != 0) { 1903 __ b(&start); 1904 } 1905 1906 // The first page. 1907 VIXL_STATIC_ASSERT(kStartPage < 0); 1908 { 1909 ExactAssemblyScope scope_page(&masm, kPageSize); 1910 __ bind(&start); 1911 __ adrp(x0, &test); 1912 __ adrp(x1, &test); 1913 for (size_t i = 2; i < (kPageSize / kInstructionSize); i += 2) { 1914 __ ccmp(x0, x1, NoFlag, eq); 1915 __ adrp(x1, &test); 1916 } 1917 } 1918 1919 // Subsequent pages. 1920 VIXL_STATIC_ASSERT(kEndPage >= 0); 1921 for (int page = (kStartPage + 1); page <= kEndPage; page++) { 1922 ExactAssemblyScope scope_page(&masm, kPageSize); 1923 if (page == 0) { 1924 for (size_t i = 0; i < (kPageSize / kInstructionSize);) { 1925 if (i++ == (offset_into_page / kInstructionSize)) __ bind(&test); 1926 __ ccmp(x0, x1, NoFlag, eq); 1927 if (i++ == (offset_into_page / kInstructionSize)) __ bind(&test); 1928 __ adrp(x1, &test); 1929 } 1930 } else { 1931 for (size_t i = 0; i < (kPageSize / kInstructionSize); i += 2) { 1932 __ ccmp(x0, x1, NoFlag, eq); 1933 __ adrp(x1, &test); 1934 } 1935 } 1936 } 1937 } 1938 1939 // Every adrp instruction pointed to the same label (`test`), so they should 1940 // all have produced the same result. 1941 1942 END(); 1943 RUN_CUSTOM(); 1944 1945 uintptr_t expected = 1946 AlignDown(masm.GetLabelAddress<uintptr_t>(&test), kPageSize); 1947 ASSERT_EQUAL_64(expected, x0); 1948 ASSERT_EQUAL_64(expected, x1); 1949 ASSERT_EQUAL_NZCV(ZCFlag); 1950 1951 TEARDOWN_CUSTOM(); 1952 } 1953 1954 1955 // Test that labels are correctly referenced by adrp across page boundaries. 1956 TEST(adrp_page_boundaries) { 1957 VIXL_STATIC_ASSERT(kPageSize == 4096); 1958 AdrpPageBoundaryHelper(kInstructionSize * 0); 1959 AdrpPageBoundaryHelper(kInstructionSize * 1); 1960 AdrpPageBoundaryHelper(kInstructionSize * 512); 1961 AdrpPageBoundaryHelper(kInstructionSize * 1022); 1962 AdrpPageBoundaryHelper(kInstructionSize * 1023); 1963 } 1964 1965 1966 static void AdrpOffsetHelper(int64_t offset) { 1967 const size_t kPageOffsetMask = kPageSize - 1; 1968 const int kMaxCodeSize = 2 * kPageSize; 1969 1970 SETUP_CUSTOM(kMaxCodeSize, PageOffsetDependentCode); 1971 START(); 1972 1973 Label page; 1974 1975 { 1976 ExactAssemblyScope scope(&masm, 1977 kMaxCodeSize, 1978 ExactAssemblyScope::kMaximumSize); 1979 // Initialize NZCV with `eq` flags. 1980 __ cmp(wzr, wzr); 1981 // Waste space until the start of a page. 1982 while ((masm.GetCursorAddress<uintptr_t>() & kPageOffsetMask) != 0) { 1983 __ b(&page); 1984 } 1985 __ bind(&page); 1986 1987 { 1988 ExactAssemblyScope scope_page(&masm, kPageSize); 1989 // Every adrp instruction on this page should return the same value. 1990 __ adrp(x0, offset); 1991 __ adrp(x1, offset); 1992 for (size_t i = 2; i < kPageSize / kInstructionSize; i += 2) { 1993 __ ccmp(x0, x1, NoFlag, eq); 1994 __ adrp(x1, offset); 1995 } 1996 } 1997 } 1998 1999 END(); 2000 RUN_CUSTOM(); 2001 2002 uintptr_t expected = 2003 masm.GetLabelAddress<uintptr_t>(&page) + (kPageSize * offset); 2004 ASSERT_EQUAL_64(expected, x0); 2005 ASSERT_EQUAL_64(expected, x1); 2006 ASSERT_EQUAL_NZCV(ZCFlag); 2007 2008 TEARDOWN_CUSTOM(); 2009 } 2010 2011 2012 // Check that adrp produces the correct result for a specific offset. 2013 TEST(adrp_offset) { 2014 AdrpOffsetHelper(0); 2015 AdrpOffsetHelper(1); 2016 AdrpOffsetHelper(-1); 2017 AdrpOffsetHelper(4); 2018 AdrpOffsetHelper(-4); 2019 AdrpOffsetHelper(0x000fffff); 2020 AdrpOffsetHelper(-0x000fffff); 2021 AdrpOffsetHelper(-0x00100000); 2022 } 2023 2024 2025 TEST(branch_cond) { 2026 SETUP(); 2027 2028 Label done, wrong; 2029 2030 START(); 2031 __ Mov(x0, 0x1); 2032 __ Mov(x1, 0x1); 2033 __ Mov(x2, 0x8000000000000000); 2034 2035 // For each 'cmp' instruction below, condition codes other than the ones 2036 // following it would branch. 2037 2038 __ Cmp(x1, 0); 2039 __ B(&wrong, eq); 2040 __ B(&wrong, lo); 2041 __ B(&wrong, mi); 2042 __ B(&wrong, vs); 2043 __ B(&wrong, ls); 2044 __ B(&wrong, lt); 2045 __ B(&wrong, le); 2046 Label ok_1; 2047 __ B(&ok_1, ne); 2048 __ Mov(x0, 0x0); 2049 __ Bind(&ok_1); 2050 2051 __ Cmp(x1, 1); 2052 __ B(&wrong, ne); 2053 __ B(&wrong, lo); 2054 __ B(&wrong, mi); 2055 __ B(&wrong, vs); 2056 __ B(&wrong, hi); 2057 __ B(&wrong, lt); 2058 __ B(&wrong, gt); 2059 Label ok_2; 2060 __ B(&ok_2, pl); 2061 __ Mov(x0, 0x0); 2062 __ Bind(&ok_2); 2063 2064 __ Cmp(x1, 2); 2065 __ B(&wrong, eq); 2066 __ B(&wrong, hs); 2067 __ B(&wrong, pl); 2068 __ B(&wrong, vs); 2069 __ B(&wrong, hi); 2070 __ B(&wrong, ge); 2071 __ B(&wrong, gt); 2072 Label ok_3; 2073 __ B(&ok_3, vc); 2074 __ Mov(x0, 0x0); 2075 __ Bind(&ok_3); 2076 2077 __ Cmp(x2, 1); 2078 __ B(&wrong, eq); 2079 __ B(&wrong, lo); 2080 __ B(&wrong, mi); 2081 __ B(&wrong, vc); 2082 __ B(&wrong, ls); 2083 __ B(&wrong, ge); 2084 __ B(&wrong, gt); 2085 Label ok_4; 2086 __ B(&ok_4, le); 2087 __ Mov(x0, 0x0); 2088 __ Bind(&ok_4); 2089 2090 // The MacroAssembler does not allow al as a branch condition. 2091 Label ok_5; 2092 { 2093 ExactAssemblyScope scope(&masm, kInstructionSize); 2094 __ b(&ok_5, al); 2095 } 2096 __ Mov(x0, 0x0); 2097 __ Bind(&ok_5); 2098 2099 // The MacroAssembler does not allow nv as a branch condition. 2100 Label ok_6; 2101 { 2102 ExactAssemblyScope scope(&masm, kInstructionSize); 2103 __ b(&ok_6, nv); 2104 } 2105 __ Mov(x0, 0x0); 2106 __ Bind(&ok_6); 2107 2108 __ B(&done); 2109 2110 __ Bind(&wrong); 2111 __ Mov(x0, 0x0); 2112 2113 __ Bind(&done); 2114 END(); 2115 2116 RUN(); 2117 2118 ASSERT_EQUAL_64(0x1, x0); 2119 2120 TEARDOWN(); 2121 } 2122 2123 2124 TEST(branch_to_reg) { 2125 SETUP(); 2126 2127 // Test br. 2128 Label fn1, after_fn1; 2129 2130 START(); 2131 __ Mov(x29, lr); 2132 2133 __ Mov(x1, 0); 2134 __ B(&after_fn1); 2135 2136 __ Bind(&fn1); 2137 __ Mov(x0, lr); 2138 __ Mov(x1, 42); 2139 __ Br(x0); 2140 2141 __ Bind(&after_fn1); 2142 __ Bl(&fn1); 2143 2144 // Test blr. 2145 Label fn2, after_fn2; 2146 2147 __ Mov(x2, 0); 2148 __ B(&after_fn2); 2149 2150 __ Bind(&fn2); 2151 __ Mov(x0, lr); 2152 __ Mov(x2, 84); 2153 __ Blr(x0); 2154 2155 __ Bind(&after_fn2); 2156 __ Bl(&fn2); 2157 __ Mov(x3, lr); 2158 2159 __ Mov(lr, x29); 2160 END(); 2161 2162 RUN(); 2163 2164 ASSERT_EQUAL_64(core.xreg(3) + kInstructionSize, x0); 2165 ASSERT_EQUAL_64(42, x1); 2166 ASSERT_EQUAL_64(84, x2); 2167 2168 TEARDOWN(); 2169 } 2170 2171 2172 TEST(compare_branch) { 2173 SETUP(); 2174 2175 START(); 2176 __ Mov(x0, 0); 2177 __ Mov(x1, 0); 2178 __ Mov(x2, 0); 2179 __ Mov(x3, 0); 2180 __ Mov(x4, 0); 2181 __ Mov(x5, 0); 2182 __ Mov(x16, 0); 2183 __ Mov(x17, 42); 2184 2185 Label zt, zt_end; 2186 __ Cbz(w16, &zt); 2187 __ B(&zt_end); 2188 __ Bind(&zt); 2189 __ Mov(x0, 1); 2190 __ Bind(&zt_end); 2191 2192 Label zf, zf_end; 2193 __ Cbz(x17, &zf); 2194 __ B(&zf_end); 2195 __ Bind(&zf); 2196 __ Mov(x1, 1); 2197 __ Bind(&zf_end); 2198 2199 Label nzt, nzt_end; 2200 __ Cbnz(w17, &nzt); 2201 __ B(&nzt_end); 2202 __ Bind(&nzt); 2203 __ Mov(x2, 1); 2204 __ Bind(&nzt_end); 2205 2206 Label nzf, nzf_end; 2207 __ Cbnz(x16, &nzf); 2208 __ B(&nzf_end); 2209 __ Bind(&nzf); 2210 __ Mov(x3, 1); 2211 __ Bind(&nzf_end); 2212 2213 __ Mov(x18, 0xffffffff00000000); 2214 2215 Label a, a_end; 2216 __ Cbz(w18, &a); 2217 __ B(&a_end); 2218 __ Bind(&a); 2219 __ Mov(x4, 1); 2220 __ Bind(&a_end); 2221 2222 Label b, b_end; 2223 __ Cbnz(w18, &b); 2224 __ B(&b_end); 2225 __ Bind(&b); 2226 __ Mov(x5, 1); 2227 __ Bind(&b_end); 2228 2229 END(); 2230 2231 RUN(); 2232 2233 ASSERT_EQUAL_64(1, x0); 2234 ASSERT_EQUAL_64(0, x1); 2235 ASSERT_EQUAL_64(1, x2); 2236 ASSERT_EQUAL_64(0, x3); 2237 ASSERT_EQUAL_64(1, x4); 2238 ASSERT_EQUAL_64(0, x5); 2239 2240 TEARDOWN(); 2241 } 2242 2243 2244 TEST(test_branch) { 2245 SETUP(); 2246 2247 START(); 2248 __ Mov(x0, 0); 2249 __ Mov(x1, 0); 2250 __ Mov(x2, 0); 2251 __ Mov(x3, 0); 2252 __ Mov(x16, 0xaaaaaaaaaaaaaaaa); 2253 2254 Label bz, bz_end; 2255 __ Tbz(w16, 0, &bz); 2256 __ B(&bz_end); 2257 __ Bind(&bz); 2258 __ Mov(x0, 1); 2259 __ Bind(&bz_end); 2260 2261 Label bo, bo_end; 2262 __ Tbz(x16, 63, &bo); 2263 __ B(&bo_end); 2264 __ Bind(&bo); 2265 __ Mov(x1, 1); 2266 __ Bind(&bo_end); 2267 2268 Label nbz, nbz_end; 2269 __ Tbnz(x16, 61, &nbz); 2270 __ B(&nbz_end); 2271 __ Bind(&nbz); 2272 __ Mov(x2, 1); 2273 __ Bind(&nbz_end); 2274 2275 Label nbo, nbo_end; 2276 __ Tbnz(w16, 2, &nbo); 2277 __ B(&nbo_end); 2278 __ Bind(&nbo); 2279 __ Mov(x3, 1); 2280 __ Bind(&nbo_end); 2281 END(); 2282 2283 RUN(); 2284 2285 ASSERT_EQUAL_64(1, x0); 2286 ASSERT_EQUAL_64(0, x1); 2287 ASSERT_EQUAL_64(1, x2); 2288 ASSERT_EQUAL_64(0, x3); 2289 2290 TEARDOWN(); 2291 } 2292 2293 2294 TEST(branch_type) { 2295 SETUP(); 2296 2297 Label fail, done; 2298 2299 START(); 2300 __ Mov(x0, 0x0); 2301 __ Mov(x10, 0x7); 2302 __ Mov(x11, 0x0); 2303 2304 // Test non taken branches. 2305 __ Cmp(x10, 0x7); 2306 __ B(&fail, ne); 2307 __ B(&fail, never); 2308 __ B(&fail, reg_zero, x10); 2309 __ B(&fail, reg_not_zero, x11); 2310 __ B(&fail, reg_bit_clear, x10, 0); 2311 __ B(&fail, reg_bit_set, x10, 3); 2312 2313 // Test taken branches. 2314 Label l1, l2, l3, l4, l5; 2315 __ Cmp(x10, 0x7); 2316 __ B(&l1, eq); 2317 __ B(&fail); 2318 __ Bind(&l1); 2319 __ B(&l2, always); 2320 __ B(&fail); 2321 __ Bind(&l2); 2322 __ B(&l3, reg_not_zero, x10); 2323 __ B(&fail); 2324 __ Bind(&l3); 2325 __ B(&l4, reg_bit_clear, x10, 15); 2326 __ B(&fail); 2327 __ Bind(&l4); 2328 __ B(&l5, reg_bit_set, x10, 1); 2329 __ B(&fail); 2330 __ Bind(&l5); 2331 2332 __ B(&done); 2333 2334 __ Bind(&fail); 2335 __ Mov(x0, 0x1); 2336 2337 __ Bind(&done); 2338 2339 END(); 2340 2341 RUN(); 2342 2343 ASSERT_EQUAL_64(0x0, x0); 2344 2345 TEARDOWN(); 2346 } 2347 2348 2349 TEST(ldr_str_offset) { 2350 SETUP(); 2351 2352 uint64_t src[2] = {0xfedcba9876543210, 0x0123456789abcdef}; 2353 uint64_t dst[5] = {0, 0, 0, 0, 0}; 2354 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2355 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 2356 2357 START(); 2358 __ Mov(x17, src_base); 2359 __ Mov(x18, dst_base); 2360 __ Ldr(w0, MemOperand(x17)); 2361 __ Str(w0, MemOperand(x18)); 2362 __ Ldr(w1, MemOperand(x17, 4)); 2363 __ Str(w1, MemOperand(x18, 12)); 2364 __ Ldr(x2, MemOperand(x17, 8)); 2365 __ Str(x2, MemOperand(x18, 16)); 2366 __ Ldrb(w3, MemOperand(x17, 1)); 2367 __ Strb(w3, MemOperand(x18, 25)); 2368 __ Ldrh(w4, MemOperand(x17, 2)); 2369 __ Strh(w4, MemOperand(x18, 33)); 2370 END(); 2371 2372 RUN(); 2373 2374 ASSERT_EQUAL_64(0x76543210, x0); 2375 ASSERT_EQUAL_64(0x76543210, dst[0]); 2376 ASSERT_EQUAL_64(0xfedcba98, x1); 2377 ASSERT_EQUAL_64(0xfedcba9800000000, dst[1]); 2378 ASSERT_EQUAL_64(0x0123456789abcdef, x2); 2379 ASSERT_EQUAL_64(0x0123456789abcdef, dst[2]); 2380 ASSERT_EQUAL_64(0x32, x3); 2381 ASSERT_EQUAL_64(0x3200, dst[3]); 2382 ASSERT_EQUAL_64(0x7654, x4); 2383 ASSERT_EQUAL_64(0x765400, dst[4]); 2384 ASSERT_EQUAL_64(src_base, x17); 2385 ASSERT_EQUAL_64(dst_base, x18); 2386 2387 TEARDOWN(); 2388 } 2389 2390 2391 TEST(ldr_str_wide) { 2392 SETUP(); 2393 2394 uint32_t src[8192]; 2395 uint32_t dst[8192]; 2396 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2397 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 2398 memset(src, 0xaa, 8192 * sizeof(src[0])); 2399 memset(dst, 0xaa, 8192 * sizeof(dst[0])); 2400 src[0] = 0; 2401 src[6144] = 6144; 2402 src[8191] = 8191; 2403 2404 START(); 2405 __ Mov(x22, src_base); 2406 __ Mov(x23, dst_base); 2407 __ Mov(x24, src_base); 2408 __ Mov(x25, dst_base); 2409 __ Mov(x26, src_base); 2410 __ Mov(x27, dst_base); 2411 2412 __ Ldr(w0, MemOperand(x22, 8191 * sizeof(src[0]))); 2413 __ Str(w0, MemOperand(x23, 8191 * sizeof(dst[0]))); 2414 __ Ldr(w1, MemOperand(x24, 4096 * sizeof(src[0]), PostIndex)); 2415 __ Str(w1, MemOperand(x25, 4096 * sizeof(dst[0]), PostIndex)); 2416 __ Ldr(w2, MemOperand(x26, 6144 * sizeof(src[0]), PreIndex)); 2417 __ Str(w2, MemOperand(x27, 6144 * sizeof(dst[0]), PreIndex)); 2418 END(); 2419 2420 RUN(); 2421 2422 ASSERT_EQUAL_32(8191, w0); 2423 ASSERT_EQUAL_32(8191, dst[8191]); 2424 ASSERT_EQUAL_64(src_base, x22); 2425 ASSERT_EQUAL_64(dst_base, x23); 2426 ASSERT_EQUAL_32(0, w1); 2427 ASSERT_EQUAL_32(0, dst[0]); 2428 ASSERT_EQUAL_64(src_base + 4096 * sizeof(src[0]), x24); 2429 ASSERT_EQUAL_64(dst_base + 4096 * sizeof(dst[0]), x25); 2430 ASSERT_EQUAL_32(6144, w2); 2431 ASSERT_EQUAL_32(6144, dst[6144]); 2432 ASSERT_EQUAL_64(src_base + 6144 * sizeof(src[0]), x26); 2433 ASSERT_EQUAL_64(dst_base + 6144 * sizeof(dst[0]), x27); 2434 2435 TEARDOWN(); 2436 } 2437 2438 2439 TEST(ldr_str_preindex) { 2440 SETUP(); 2441 2442 uint64_t src[2] = {0xfedcba9876543210, 0x0123456789abcdef}; 2443 uint64_t dst[6] = {0, 0, 0, 0, 0, 0}; 2444 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2445 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 2446 2447 START(); 2448 __ Mov(x17, src_base); 2449 __ Mov(x18, dst_base); 2450 __ Mov(x19, src_base); 2451 __ Mov(x20, dst_base); 2452 __ Mov(x21, src_base + 16); 2453 __ Mov(x22, dst_base + 40); 2454 __ Mov(x23, src_base); 2455 __ Mov(x24, dst_base); 2456 __ Mov(x25, src_base); 2457 __ Mov(x26, dst_base); 2458 __ Ldr(w0, MemOperand(x17, 4, PreIndex)); 2459 __ Str(w0, MemOperand(x18, 12, PreIndex)); 2460 __ Ldr(x1, MemOperand(x19, 8, PreIndex)); 2461 __ Str(x1, MemOperand(x20, 16, PreIndex)); 2462 __ Ldr(w2, MemOperand(x21, -4, PreIndex)); 2463 __ Str(w2, MemOperand(x22, -4, PreIndex)); 2464 __ Ldrb(w3, MemOperand(x23, 1, PreIndex)); 2465 __ Strb(w3, MemOperand(x24, 25, PreIndex)); 2466 __ Ldrh(w4, MemOperand(x25, 3, PreIndex)); 2467 __ Strh(w4, MemOperand(x26, 41, PreIndex)); 2468 END(); 2469 2470 RUN(); 2471 2472 ASSERT_EQUAL_64(0xfedcba98, x0); 2473 ASSERT_EQUAL_64(0xfedcba9800000000, dst[1]); 2474 ASSERT_EQUAL_64(0x0123456789abcdef, x1); 2475 ASSERT_EQUAL_64(0x0123456789abcdef, dst[2]); 2476 ASSERT_EQUAL_64(0x01234567, x2); 2477 ASSERT_EQUAL_64(0x0123456700000000, dst[4]); 2478 ASSERT_EQUAL_64(0x32, x3); 2479 ASSERT_EQUAL_64(0x3200, dst[3]); 2480 ASSERT_EQUAL_64(0x9876, x4); 2481 ASSERT_EQUAL_64(0x987600, dst[5]); 2482 ASSERT_EQUAL_64(src_base + 4, x17); 2483 ASSERT_EQUAL_64(dst_base + 12, x18); 2484 ASSERT_EQUAL_64(src_base + 8, x19); 2485 ASSERT_EQUAL_64(dst_base + 16, x20); 2486 ASSERT_EQUAL_64(src_base + 12, x21); 2487 ASSERT_EQUAL_64(dst_base + 36, x22); 2488 ASSERT_EQUAL_64(src_base + 1, x23); 2489 ASSERT_EQUAL_64(dst_base + 25, x24); 2490 ASSERT_EQUAL_64(src_base + 3, x25); 2491 ASSERT_EQUAL_64(dst_base + 41, x26); 2492 2493 TEARDOWN(); 2494 } 2495 2496 2497 TEST(ldr_str_postindex) { 2498 SETUP(); 2499 2500 uint64_t src[2] = {0xfedcba9876543210, 0x0123456789abcdef}; 2501 uint64_t dst[6] = {0, 0, 0, 0, 0, 0}; 2502 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2503 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 2504 2505 START(); 2506 __ Mov(x17, src_base + 4); 2507 __ Mov(x18, dst_base + 12); 2508 __ Mov(x19, src_base + 8); 2509 __ Mov(x20, dst_base + 16); 2510 __ Mov(x21, src_base + 8); 2511 __ Mov(x22, dst_base + 32); 2512 __ Mov(x23, src_base + 1); 2513 __ Mov(x24, dst_base + 25); 2514 __ Mov(x25, src_base + 3); 2515 __ Mov(x26, dst_base + 41); 2516 __ Ldr(w0, MemOperand(x17, 4, PostIndex)); 2517 __ Str(w0, MemOperand(x18, 12, PostIndex)); 2518 __ Ldr(x1, MemOperand(x19, 8, PostIndex)); 2519 __ Str(x1, MemOperand(x20, 16, PostIndex)); 2520 __ Ldr(x2, MemOperand(x21, -8, PostIndex)); 2521 __ Str(x2, MemOperand(x22, -32, PostIndex)); 2522 __ Ldrb(w3, MemOperand(x23, 1, PostIndex)); 2523 __ Strb(w3, MemOperand(x24, 5, PostIndex)); 2524 __ Ldrh(w4, MemOperand(x25, -3, PostIndex)); 2525 __ Strh(w4, MemOperand(x26, -41, PostIndex)); 2526 END(); 2527 2528 RUN(); 2529 2530 ASSERT_EQUAL_64(0xfedcba98, x0); 2531 ASSERT_EQUAL_64(0xfedcba9800000000, dst[1]); 2532 ASSERT_EQUAL_64(0x0123456789abcdef, x1); 2533 ASSERT_EQUAL_64(0x0123456789abcdef, dst[2]); 2534 ASSERT_EQUAL_64(0x0123456789abcdef, x2); 2535 ASSERT_EQUAL_64(0x0123456789abcdef, dst[4]); 2536 ASSERT_EQUAL_64(0x32, x3); 2537 ASSERT_EQUAL_64(0x3200, dst[3]); 2538 ASSERT_EQUAL_64(0x9876, x4); 2539 ASSERT_EQUAL_64(0x987600, dst[5]); 2540 ASSERT_EQUAL_64(src_base + 8, x17); 2541 ASSERT_EQUAL_64(dst_base + 24, x18); 2542 ASSERT_EQUAL_64(src_base + 16, x19); 2543 ASSERT_EQUAL_64(dst_base + 32, x20); 2544 ASSERT_EQUAL_64(src_base, x21); 2545 ASSERT_EQUAL_64(dst_base, x22); 2546 ASSERT_EQUAL_64(src_base + 2, x23); 2547 ASSERT_EQUAL_64(dst_base + 30, x24); 2548 ASSERT_EQUAL_64(src_base, x25); 2549 ASSERT_EQUAL_64(dst_base, x26); 2550 2551 TEARDOWN(); 2552 } 2553 2554 2555 TEST(ldr_str_largeindex) { 2556 SETUP(); 2557 2558 // This value won't fit in the immediate offset field of ldr/str instructions. 2559 int largeoffset = 0xabcdef; 2560 2561 int64_t data[3] = {0x1122334455667788, 0, 0}; 2562 uint64_t base_addr = reinterpret_cast<uintptr_t>(data); 2563 uint64_t drifted_addr = base_addr - largeoffset; 2564 2565 // This test checks that we we can use large immediate offsets when 2566 // using PreIndex or PostIndex addressing mode of the MacroAssembler 2567 // Ldr/Str instructions. 2568 2569 START(); 2570 __ Mov(x19, drifted_addr); 2571 __ Ldr(x0, MemOperand(x19, largeoffset, PreIndex)); 2572 2573 __ Mov(x20, base_addr); 2574 __ Ldr(x1, MemOperand(x20, largeoffset, PostIndex)); 2575 2576 __ Mov(x21, drifted_addr); 2577 __ Str(x0, MemOperand(x21, largeoffset + 8, PreIndex)); 2578 2579 __ Mov(x22, base_addr + 16); 2580 __ Str(x0, MemOperand(x22, largeoffset, PostIndex)); 2581 END(); 2582 2583 RUN(); 2584 2585 ASSERT_EQUAL_64(0x1122334455667788, data[0]); 2586 ASSERT_EQUAL_64(0x1122334455667788, data[1]); 2587 ASSERT_EQUAL_64(0x1122334455667788, data[2]); 2588 ASSERT_EQUAL_64(0x1122334455667788, x0); 2589 ASSERT_EQUAL_64(0x1122334455667788, x1); 2590 2591 ASSERT_EQUAL_64(base_addr, x19); 2592 ASSERT_EQUAL_64(base_addr + largeoffset, x20); 2593 ASSERT_EQUAL_64(base_addr + 8, x21); 2594 ASSERT_EQUAL_64(base_addr + 16 + largeoffset, x22); 2595 2596 TEARDOWN(); 2597 } 2598 2599 2600 TEST(load_signed) { 2601 SETUP(); 2602 2603 uint32_t src[2] = {0x80008080, 0x7fff7f7f}; 2604 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2605 2606 START(); 2607 __ Mov(x24, src_base); 2608 __ Ldrsb(w0, MemOperand(x24)); 2609 __ Ldrsb(w1, MemOperand(x24, 4)); 2610 __ Ldrsh(w2, MemOperand(x24)); 2611 __ Ldrsh(w3, MemOperand(x24, 4)); 2612 __ Ldrsb(x4, MemOperand(x24)); 2613 __ Ldrsb(x5, MemOperand(x24, 4)); 2614 __ Ldrsh(x6, MemOperand(x24)); 2615 __ Ldrsh(x7, MemOperand(x24, 4)); 2616 __ Ldrsw(x8, MemOperand(x24)); 2617 __ Ldrsw(x9, MemOperand(x24, 4)); 2618 END(); 2619 2620 RUN(); 2621 2622 ASSERT_EQUAL_64(0xffffff80, x0); 2623 ASSERT_EQUAL_64(0x0000007f, x1); 2624 ASSERT_EQUAL_64(0xffff8080, x2); 2625 ASSERT_EQUAL_64(0x00007f7f, x3); 2626 ASSERT_EQUAL_64(0xffffffffffffff80, x4); 2627 ASSERT_EQUAL_64(0x000000000000007f, x5); 2628 ASSERT_EQUAL_64(0xffffffffffff8080, x6); 2629 ASSERT_EQUAL_64(0x0000000000007f7f, x7); 2630 ASSERT_EQUAL_64(0xffffffff80008080, x8); 2631 ASSERT_EQUAL_64(0x000000007fff7f7f, x9); 2632 2633 TEARDOWN(); 2634 } 2635 2636 2637 TEST(load_store_regoffset) { 2638 SETUP(); 2639 2640 uint32_t src[3] = {1, 2, 3}; 2641 uint32_t dst[4] = {0, 0, 0, 0}; 2642 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2643 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 2644 2645 START(); 2646 __ Mov(x16, src_base); 2647 __ Mov(x17, dst_base); 2648 __ Mov(x18, src_base + 3 * sizeof(src[0])); 2649 __ Mov(x19, dst_base + 3 * sizeof(dst[0])); 2650 __ Mov(x20, dst_base + 4 * sizeof(dst[0])); 2651 __ Mov(x24, 0); 2652 __ Mov(x25, 4); 2653 __ Mov(x26, -4); 2654 __ Mov(x27, 0xfffffffc); // 32-bit -4. 2655 __ Mov(x28, 0xfffffffe); // 32-bit -2. 2656 __ Mov(x29, 0xffffffff); // 32-bit -1. 2657 2658 __ Ldr(w0, MemOperand(x16, x24)); 2659 __ Ldr(x1, MemOperand(x16, x25)); 2660 __ Ldr(w2, MemOperand(x18, x26)); 2661 __ Ldr(w3, MemOperand(x18, x27, SXTW)); 2662 __ Ldr(w4, MemOperand(x18, x28, SXTW, 2)); 2663 __ Str(w0, MemOperand(x17, x24)); 2664 __ Str(x1, MemOperand(x17, x25)); 2665 __ Str(w2, MemOperand(x20, x29, SXTW, 2)); 2666 END(); 2667 2668 RUN(); 2669 2670 ASSERT_EQUAL_64(1, x0); 2671 ASSERT_EQUAL_64(0x0000000300000002, x1); 2672 ASSERT_EQUAL_64(3, x2); 2673 ASSERT_EQUAL_64(3, x3); 2674 ASSERT_EQUAL_64(2, x4); 2675 ASSERT_EQUAL_32(1, dst[0]); 2676 ASSERT_EQUAL_32(2, dst[1]); 2677 ASSERT_EQUAL_32(3, dst[2]); 2678 ASSERT_EQUAL_32(3, dst[3]); 2679 2680 TEARDOWN(); 2681 } 2682 2683 2684 TEST(load_store_float) { 2685 SETUP(); 2686 2687 float src[3] = {1.0, 2.0, 3.0}; 2688 float dst[3] = {0.0, 0.0, 0.0}; 2689 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2690 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 2691 2692 START(); 2693 __ Mov(x17, src_base); 2694 __ Mov(x18, dst_base); 2695 __ Mov(x19, src_base); 2696 __ Mov(x20, dst_base); 2697 __ Mov(x21, src_base); 2698 __ Mov(x22, dst_base); 2699 __ Ldr(s0, MemOperand(x17, sizeof(src[0]))); 2700 __ Str(s0, MemOperand(x18, sizeof(dst[0]), PostIndex)); 2701 __ Ldr(s1, MemOperand(x19, sizeof(src[0]), PostIndex)); 2702 __ Str(s1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex)); 2703 __ Ldr(s2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex)); 2704 __ Str(s2, MemOperand(x22, sizeof(dst[0]))); 2705 END(); 2706 2707 RUN(); 2708 2709 ASSERT_EQUAL_FP32(2.0, s0); 2710 ASSERT_EQUAL_FP32(2.0, dst[0]); 2711 ASSERT_EQUAL_FP32(1.0, s1); 2712 ASSERT_EQUAL_FP32(1.0, dst[2]); 2713 ASSERT_EQUAL_FP32(3.0, s2); 2714 ASSERT_EQUAL_FP32(3.0, dst[1]); 2715 ASSERT_EQUAL_64(src_base, x17); 2716 ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18); 2717 ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19); 2718 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20); 2719 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21); 2720 ASSERT_EQUAL_64(dst_base, x22); 2721 2722 TEARDOWN(); 2723 } 2724 2725 2726 TEST(load_store_double) { 2727 SETUP(); 2728 2729 double src[3] = {1.0, 2.0, 3.0}; 2730 double dst[3] = {0.0, 0.0, 0.0}; 2731 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2732 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 2733 2734 START(); 2735 __ Mov(x17, src_base); 2736 __ Mov(x18, dst_base); 2737 __ Mov(x19, src_base); 2738 __ Mov(x20, dst_base); 2739 __ Mov(x21, src_base); 2740 __ Mov(x22, dst_base); 2741 __ Ldr(d0, MemOperand(x17, sizeof(src[0]))); 2742 __ Str(d0, MemOperand(x18, sizeof(dst[0]), PostIndex)); 2743 __ Ldr(d1, MemOperand(x19, sizeof(src[0]), PostIndex)); 2744 __ Str(d1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex)); 2745 __ Ldr(d2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex)); 2746 __ Str(d2, MemOperand(x22, sizeof(dst[0]))); 2747 END(); 2748 2749 RUN(); 2750 2751 ASSERT_EQUAL_FP64(2.0, d0); 2752 ASSERT_EQUAL_FP64(2.0, dst[0]); 2753 ASSERT_EQUAL_FP64(1.0, d1); 2754 ASSERT_EQUAL_FP64(1.0, dst[2]); 2755 ASSERT_EQUAL_FP64(3.0, d2); 2756 ASSERT_EQUAL_FP64(3.0, dst[1]); 2757 ASSERT_EQUAL_64(src_base, x17); 2758 ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18); 2759 ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19); 2760 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20); 2761 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21); 2762 ASSERT_EQUAL_64(dst_base, x22); 2763 2764 TEARDOWN(); 2765 } 2766 2767 2768 TEST(load_store_b) { 2769 SETUP(); 2770 2771 uint8_t src[3] = {0x12, 0x23, 0x34}; 2772 uint8_t dst[3] = {0, 0, 0}; 2773 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2774 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 2775 2776 START(); 2777 __ Mov(x17, src_base); 2778 __ Mov(x18, dst_base); 2779 __ Mov(x19, src_base); 2780 __ Mov(x20, dst_base); 2781 __ Mov(x21, src_base); 2782 __ Mov(x22, dst_base); 2783 __ Ldr(b0, MemOperand(x17, sizeof(src[0]))); 2784 __ Str(b0, MemOperand(x18, sizeof(dst[0]), PostIndex)); 2785 __ Ldr(b1, MemOperand(x19, sizeof(src[0]), PostIndex)); 2786 __ Str(b1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex)); 2787 __ Ldr(b2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex)); 2788 __ Str(b2, MemOperand(x22, sizeof(dst[0]))); 2789 END(); 2790 2791 RUN(); 2792 2793 ASSERT_EQUAL_128(0, 0x23, q0); 2794 ASSERT_EQUAL_64(0x23, dst[0]); 2795 ASSERT_EQUAL_128(0, 0x12, q1); 2796 ASSERT_EQUAL_64(0x12, dst[2]); 2797 ASSERT_EQUAL_128(0, 0x34, q2); 2798 ASSERT_EQUAL_64(0x34, dst[1]); 2799 ASSERT_EQUAL_64(src_base, x17); 2800 ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18); 2801 ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19); 2802 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20); 2803 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21); 2804 ASSERT_EQUAL_64(dst_base, x22); 2805 2806 TEARDOWN(); 2807 } 2808 2809 2810 TEST(load_store_h) { 2811 SETUP(); 2812 2813 uint16_t src[3] = {0x1234, 0x2345, 0x3456}; 2814 uint16_t dst[3] = {0, 0, 0}; 2815 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2816 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 2817 2818 START(); 2819 __ Mov(x17, src_base); 2820 __ Mov(x18, dst_base); 2821 __ Mov(x19, src_base); 2822 __ Mov(x20, dst_base); 2823 __ Mov(x21, src_base); 2824 __ Mov(x22, dst_base); 2825 __ Ldr(h0, MemOperand(x17, sizeof(src[0]))); 2826 __ Str(h0, MemOperand(x18, sizeof(dst[0]), PostIndex)); 2827 __ Ldr(h1, MemOperand(x19, sizeof(src[0]), PostIndex)); 2828 __ Str(h1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex)); 2829 __ Ldr(h2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex)); 2830 __ Str(h2, MemOperand(x22, sizeof(dst[0]))); 2831 END(); 2832 2833 RUN(); 2834 2835 ASSERT_EQUAL_128(0, 0x2345, q0); 2836 ASSERT_EQUAL_64(0x2345, dst[0]); 2837 ASSERT_EQUAL_128(0, 0x1234, q1); 2838 ASSERT_EQUAL_64(0x1234, dst[2]); 2839 ASSERT_EQUAL_128(0, 0x3456, q2); 2840 ASSERT_EQUAL_64(0x3456, dst[1]); 2841 ASSERT_EQUAL_64(src_base, x17); 2842 ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18); 2843 ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19); 2844 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20); 2845 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21); 2846 ASSERT_EQUAL_64(dst_base, x22); 2847 2848 TEARDOWN(); 2849 } 2850 2851 2852 TEST(load_store_q) { 2853 SETUP(); 2854 2855 uint8_t src[48] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe, 0x01, 0x23, 2856 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x21, 0x43, 0x65, 0x87, 2857 0xa9, 0xcb, 0xed, 0x0f, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 2858 0xde, 0xf0, 0x24, 0x46, 0x68, 0x8a, 0xac, 0xce, 0xe0, 0x02, 2859 0x42, 0x64, 0x86, 0xa8, 0xca, 0xec, 0x0e, 0x20}; 2860 2861 uint64_t dst[6] = {0, 0, 0, 0, 0, 0}; 2862 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2863 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 2864 2865 START(); 2866 __ Mov(x17, src_base); 2867 __ Mov(x18, dst_base); 2868 __ Mov(x19, src_base); 2869 __ Mov(x20, dst_base); 2870 __ Mov(x21, src_base); 2871 __ Mov(x22, dst_base); 2872 __ Ldr(q0, MemOperand(x17, 16)); 2873 __ Str(q0, MemOperand(x18, 16, PostIndex)); 2874 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 2875 __ Str(q1, MemOperand(x20, 32, PreIndex)); 2876 __ Ldr(q2, MemOperand(x21, 32, PreIndex)); 2877 __ Str(q2, MemOperand(x22, 16)); 2878 END(); 2879 2880 RUN(); 2881 2882 ASSERT_EQUAL_128(0xf0debc9a78563412, 0x0fedcba987654321, q0); 2883 ASSERT_EQUAL_64(0x0fedcba987654321, dst[0]); 2884 ASSERT_EQUAL_64(0xf0debc9a78563412, dst[1]); 2885 ASSERT_EQUAL_128(0xefcdab8967452301, 0xfedcba9876543210, q1); 2886 ASSERT_EQUAL_64(0xfedcba9876543210, dst[4]); 2887 ASSERT_EQUAL_64(0xefcdab8967452301, dst[5]); 2888 ASSERT_EQUAL_128(0x200eeccaa8866442, 0x02e0ceac8a684624, q2); 2889 ASSERT_EQUAL_64(0x02e0ceac8a684624, dst[2]); 2890 ASSERT_EQUAL_64(0x200eeccaa8866442, dst[3]); 2891 ASSERT_EQUAL_64(src_base, x17); 2892 ASSERT_EQUAL_64(dst_base + 16, x18); 2893 ASSERT_EQUAL_64(src_base + 16, x19); 2894 ASSERT_EQUAL_64(dst_base + 32, x20); 2895 ASSERT_EQUAL_64(src_base + 32, x21); 2896 ASSERT_EQUAL_64(dst_base, x22); 2897 2898 TEARDOWN(); 2899 } 2900 2901 2902 TEST(load_store_v_regoffset) { 2903 SETUP(); 2904 2905 uint8_t src[64]; 2906 for (unsigned i = 0; i < sizeof(src); i++) { 2907 src[i] = i; 2908 } 2909 uint8_t dst[64]; 2910 memset(dst, 0, sizeof(dst)); 2911 2912 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2913 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 2914 2915 START(); 2916 __ Mov(x17, src_base + 16); 2917 __ Mov(x18, 1); 2918 __ Mov(w19, -1); 2919 __ Mov(x20, dst_base - 1); 2920 2921 __ Ldr(b0, MemOperand(x17, x18)); 2922 __ Ldr(b1, MemOperand(x17, x19, SXTW)); 2923 2924 __ Ldr(h2, MemOperand(x17, x18)); 2925 __ Ldr(h3, MemOperand(x17, x18, UXTW, 1)); 2926 __ Ldr(h4, MemOperand(x17, x19, SXTW, 1)); 2927 __ Ldr(h5, MemOperand(x17, x18, LSL, 1)); 2928 2929 __ Ldr(s16, MemOperand(x17, x18)); 2930 __ Ldr(s17, MemOperand(x17, x18, UXTW, 2)); 2931 __ Ldr(s18, MemOperand(x17, x19, SXTW, 2)); 2932 __ Ldr(s19, MemOperand(x17, x18, LSL, 2)); 2933 2934 __ Ldr(d20, MemOperand(x17, x18)); 2935 __ Ldr(d21, MemOperand(x17, x18, UXTW, 3)); 2936 __ Ldr(d22, MemOperand(x17, x19, SXTW, 3)); 2937 __ Ldr(d23, MemOperand(x17, x18, LSL, 3)); 2938 2939 __ Ldr(q24, MemOperand(x17, x18)); 2940 __ Ldr(q25, MemOperand(x17, x18, UXTW, 4)); 2941 __ Ldr(q26, MemOperand(x17, x19, SXTW, 4)); 2942 __ Ldr(q27, MemOperand(x17, x18, LSL, 4)); 2943 2944 // Store [bhsdq]27 to adjacent memory locations, then load again to check. 2945 __ Str(b27, MemOperand(x20, x18)); 2946 __ Str(h27, MemOperand(x20, x18, UXTW, 1)); 2947 __ Add(x20, x20, 8); 2948 __ Str(s27, MemOperand(x20, x19, SXTW, 2)); 2949 __ Sub(x20, x20, 8); 2950 __ Str(d27, MemOperand(x20, x18, LSL, 3)); 2951 __ Add(x20, x20, 32); 2952 __ Str(q27, MemOperand(x20, x19, SXTW, 4)); 2953 2954 __ Sub(x20, x20, 32); 2955 __ Ldr(q6, MemOperand(x20, x18)); 2956 __ Ldr(q7, MemOperand(x20, x18, LSL, 4)); 2957 2958 END(); 2959 2960 RUN(); 2961 2962 ASSERT_EQUAL_128(0, 0x11, q0); 2963 ASSERT_EQUAL_128(0, 0x0f, q1); 2964 ASSERT_EQUAL_128(0, 0x1211, q2); 2965 ASSERT_EQUAL_128(0, 0x1312, q3); 2966 ASSERT_EQUAL_128(0, 0x0f0e, q4); 2967 ASSERT_EQUAL_128(0, 0x1312, q5); 2968 ASSERT_EQUAL_128(0, 0x14131211, q16); 2969 ASSERT_EQUAL_128(0, 0x17161514, q17); 2970 ASSERT_EQUAL_128(0, 0x0f0e0d0c, q18); 2971 ASSERT_EQUAL_128(0, 0x17161514, q19); 2972 ASSERT_EQUAL_128(0, 0x1817161514131211, q20); 2973 ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q21); 2974 ASSERT_EQUAL_128(0, 0x0f0e0d0c0b0a0908, q22); 2975 ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q23); 2976 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q24); 2977 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q25); 2978 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q26); 2979 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q27); 2980 ASSERT_EQUAL_128(0x2027262524232221, 0x2023222120212020, q6); 2981 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q7); 2982 2983 TEARDOWN(); 2984 } 2985 2986 2987 TEST(neon_ld1_d) { 2988 SETUP(); 2989 2990 uint8_t src[32 + 5]; 2991 for (unsigned i = 0; i < sizeof(src); i++) { 2992 src[i] = i; 2993 } 2994 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 2995 2996 START(); 2997 __ Mov(x17, src_base); 2998 __ Ldr(q2, MemOperand(x17)); // Initialise top 64-bits of Q register. 2999 __ Ld1(v2.V8B(), MemOperand(x17)); 3000 __ Add(x17, x17, 1); 3001 __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x17)); 3002 __ Add(x17, x17, 1); 3003 __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x17)); 3004 __ Add(x17, x17, 1); 3005 __ Ld1(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17)); 3006 __ Add(x17, x17, 1); 3007 __ Ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17)); 3008 __ Add(x17, x17, 1); 3009 __ Ld1(v20.V1D(), v21.V1D(), v22.V1D(), v23.V1D(), MemOperand(x17)); 3010 END(); 3011 3012 RUN(); 3013 3014 ASSERT_EQUAL_128(0, 0x0706050403020100, q2); 3015 ASSERT_EQUAL_128(0, 0x0807060504030201, q3); 3016 ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4); 3017 ASSERT_EQUAL_128(0, 0x0908070605040302, q5); 3018 ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6); 3019 ASSERT_EQUAL_128(0, 0x1918171615141312, q7); 3020 ASSERT_EQUAL_128(0, 0x0a09080706050403, q16); 3021 ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17); 3022 ASSERT_EQUAL_128(0, 0x1a19181716151413, q18); 3023 ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19); 3024 ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30); 3025 ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31); 3026 ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0); 3027 ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1); 3028 ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20); 3029 ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21); 3030 ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22); 3031 ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23); 3032 3033 TEARDOWN(); 3034 } 3035 3036 3037 TEST(neon_ld1_d_postindex) { 3038 SETUP(); 3039 3040 uint8_t src[32 + 5]; 3041 for (unsigned i = 0; i < sizeof(src); i++) { 3042 src[i] = i; 3043 } 3044 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3045 3046 START(); 3047 __ Mov(x17, src_base); 3048 __ Mov(x18, src_base + 1); 3049 __ Mov(x19, src_base + 2); 3050 __ Mov(x20, src_base + 3); 3051 __ Mov(x21, src_base + 4); 3052 __ Mov(x22, src_base + 5); 3053 __ Mov(x23, 1); 3054 __ Ldr(q2, MemOperand(x17)); // Initialise top 64-bits of Q register. 3055 __ Ld1(v2.V8B(), MemOperand(x17, x23, PostIndex)); 3056 __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x18, 16, PostIndex)); 3057 __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x19, 24, PostIndex)); 3058 __ Ld1(v16.V2S(), 3059 v17.V2S(), 3060 v18.V2S(), 3061 v19.V2S(), 3062 MemOperand(x20, 32, PostIndex)); 3063 __ Ld1(v30.V2S(), 3064 v31.V2S(), 3065 v0.V2S(), 3066 v1.V2S(), 3067 MemOperand(x21, 32, PostIndex)); 3068 __ Ld1(v20.V1D(), 3069 v21.V1D(), 3070 v22.V1D(), 3071 v23.V1D(), 3072 MemOperand(x22, 32, PostIndex)); 3073 END(); 3074 3075 RUN(); 3076 3077 ASSERT_EQUAL_128(0, 0x0706050403020100, q2); 3078 ASSERT_EQUAL_128(0, 0x0807060504030201, q3); 3079 ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4); 3080 ASSERT_EQUAL_128(0, 0x0908070605040302, q5); 3081 ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6); 3082 ASSERT_EQUAL_128(0, 0x1918171615141312, q7); 3083 ASSERT_EQUAL_128(0, 0x0a09080706050403, q16); 3084 ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17); 3085 ASSERT_EQUAL_128(0, 0x1a19181716151413, q18); 3086 ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19); 3087 ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30); 3088 ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31); 3089 ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0); 3090 ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1); 3091 ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20); 3092 ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21); 3093 ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22); 3094 ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23); 3095 ASSERT_EQUAL_64(src_base + 1, x17); 3096 ASSERT_EQUAL_64(src_base + 1 + 16, x18); 3097 ASSERT_EQUAL_64(src_base + 2 + 24, x19); 3098 ASSERT_EQUAL_64(src_base + 3 + 32, x20); 3099 ASSERT_EQUAL_64(src_base + 4 + 32, x21); 3100 ASSERT_EQUAL_64(src_base + 5 + 32, x22); 3101 3102 TEARDOWN(); 3103 } 3104 3105 3106 TEST(neon_ld1_q) { 3107 SETUP(); 3108 3109 uint8_t src[64 + 4]; 3110 for (unsigned i = 0; i < sizeof(src); i++) { 3111 src[i] = i; 3112 } 3113 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3114 3115 START(); 3116 __ Mov(x17, src_base); 3117 __ Ld1(v2.V16B(), MemOperand(x17)); 3118 __ Add(x17, x17, 1); 3119 __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x17)); 3120 __ Add(x17, x17, 1); 3121 __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x17)); 3122 __ Add(x17, x17, 1); 3123 __ Ld1(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x17)); 3124 __ Add(x17, x17, 1); 3125 __ Ld1(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17)); 3126 END(); 3127 3128 RUN(); 3129 3130 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2); 3131 ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3); 3132 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4); 3133 ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5); 3134 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6); 3135 ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7); 3136 ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16); 3137 ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17); 3138 ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18); 3139 ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19); 3140 ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30); 3141 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31); 3142 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0); 3143 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1); 3144 3145 TEARDOWN(); 3146 } 3147 3148 3149 TEST(neon_ld1_q_postindex) { 3150 SETUP(); 3151 3152 uint8_t src[64 + 4]; 3153 for (unsigned i = 0; i < sizeof(src); i++) { 3154 src[i] = i; 3155 } 3156 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3157 3158 START(); 3159 __ Mov(x17, src_base); 3160 __ Mov(x18, src_base + 1); 3161 __ Mov(x19, src_base + 2); 3162 __ Mov(x20, src_base + 3); 3163 __ Mov(x21, src_base + 4); 3164 __ Mov(x22, 1); 3165 __ Ld1(v2.V16B(), MemOperand(x17, x22, PostIndex)); 3166 __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x18, 32, PostIndex)); 3167 __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x19, 48, PostIndex)); 3168 __ Ld1(v16.V4S(), 3169 v17.V4S(), 3170 v18.V4S(), 3171 v19.V4S(), 3172 MemOperand(x20, 64, PostIndex)); 3173 __ Ld1(v30.V2D(), 3174 v31.V2D(), 3175 v0.V2D(), 3176 v1.V2D(), 3177 MemOperand(x21, 64, PostIndex)); 3178 END(); 3179 3180 RUN(); 3181 3182 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2); 3183 ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3); 3184 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4); 3185 ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5); 3186 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6); 3187 ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7); 3188 ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16); 3189 ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17); 3190 ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18); 3191 ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19); 3192 ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30); 3193 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31); 3194 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0); 3195 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1); 3196 ASSERT_EQUAL_64(src_base + 1, x17); 3197 ASSERT_EQUAL_64(src_base + 1 + 32, x18); 3198 ASSERT_EQUAL_64(src_base + 2 + 48, x19); 3199 ASSERT_EQUAL_64(src_base + 3 + 64, x20); 3200 ASSERT_EQUAL_64(src_base + 4 + 64, x21); 3201 3202 TEARDOWN(); 3203 } 3204 3205 3206 TEST(neon_ld1_lane) { 3207 SETUP(); 3208 3209 uint8_t src[64]; 3210 for (unsigned i = 0; i < sizeof(src); i++) { 3211 src[i] = i; 3212 } 3213 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3214 3215 START(); 3216 3217 // Test loading whole register by element. 3218 __ Mov(x17, src_base); 3219 for (int i = 15; i >= 0; i--) { 3220 __ Ld1(v0.B(), i, MemOperand(x17)); 3221 __ Add(x17, x17, 1); 3222 } 3223 3224 __ Mov(x17, src_base); 3225 for (int i = 7; i >= 0; i--) { 3226 __ Ld1(v1.H(), i, MemOperand(x17)); 3227 __ Add(x17, x17, 1); 3228 } 3229 3230 __ Mov(x17, src_base); 3231 for (int i = 3; i >= 0; i--) { 3232 __ Ld1(v2.S(), i, MemOperand(x17)); 3233 __ Add(x17, x17, 1); 3234 } 3235 3236 __ Mov(x17, src_base); 3237 for (int i = 1; i >= 0; i--) { 3238 __ Ld1(v3.D(), i, MemOperand(x17)); 3239 __ Add(x17, x17, 1); 3240 } 3241 3242 // Test loading a single element into an initialised register. 3243 __ Mov(x17, src_base); 3244 __ Ldr(q4, MemOperand(x17)); 3245 __ Ld1(v4.B(), 4, MemOperand(x17)); 3246 __ Ldr(q5, MemOperand(x17)); 3247 __ Ld1(v5.H(), 3, MemOperand(x17)); 3248 __ Ldr(q6, MemOperand(x17)); 3249 __ Ld1(v6.S(), 2, MemOperand(x17)); 3250 __ Ldr(q7, MemOperand(x17)); 3251 __ Ld1(v7.D(), 1, MemOperand(x17)); 3252 3253 END(); 3254 3255 RUN(); 3256 3257 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0); 3258 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q1); 3259 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q2); 3260 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q3); 3261 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4); 3262 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5); 3263 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6); 3264 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7); 3265 3266 TEARDOWN(); 3267 } 3268 3269 TEST(neon_ld2_d) { 3270 SETUP(); 3271 3272 uint8_t src[64 + 4]; 3273 for (unsigned i = 0; i < sizeof(src); i++) { 3274 src[i] = i; 3275 } 3276 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3277 3278 START(); 3279 __ Mov(x17, src_base); 3280 __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17)); 3281 __ Add(x17, x17, 1); 3282 __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x17)); 3283 __ Add(x17, x17, 1); 3284 __ Ld2(v6.V4H(), v7.V4H(), MemOperand(x17)); 3285 __ Add(x17, x17, 1); 3286 __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x17)); 3287 END(); 3288 3289 RUN(); 3290 3291 ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2); 3292 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3); 3293 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4); 3294 ASSERT_EQUAL_128(0, 0x100e0c0a08060402, q5); 3295 ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q6); 3296 ASSERT_EQUAL_128(0, 0x11100d0c09080504, q7); 3297 ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q31); 3298 ASSERT_EQUAL_128(0, 0x1211100f0a090807, q0); 3299 3300 TEARDOWN(); 3301 } 3302 3303 TEST(neon_ld2_d_postindex) { 3304 SETUP(); 3305 3306 uint8_t src[32 + 4]; 3307 for (unsigned i = 0; i < sizeof(src); i++) { 3308 src[i] = i; 3309 } 3310 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3311 3312 START(); 3313 __ Mov(x17, src_base); 3314 __ Mov(x18, src_base + 1); 3315 __ Mov(x19, src_base + 2); 3316 __ Mov(x20, src_base + 3); 3317 __ Mov(x21, src_base + 4); 3318 __ Mov(x22, 1); 3319 __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17, x22, PostIndex)); 3320 __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x18, 16, PostIndex)); 3321 __ Ld2(v5.V4H(), v6.V4H(), MemOperand(x19, 16, PostIndex)); 3322 __ Ld2(v16.V2S(), v17.V2S(), MemOperand(x20, 16, PostIndex)); 3323 __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x21, 16, PostIndex)); 3324 END(); 3325 3326 RUN(); 3327 3328 ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2); 3329 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3); 3330 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4); 3331 ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q5); 3332 ASSERT_EQUAL_128(0, 0x11100d0c09080504, q6); 3333 ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q16); 3334 ASSERT_EQUAL_128(0, 0x1211100f0a090807, q17); 3335 ASSERT_EQUAL_128(0, 0x0f0e0d0c07060504, q31); 3336 ASSERT_EQUAL_128(0, 0x131211100b0a0908, q0); 3337 3338 ASSERT_EQUAL_64(src_base + 1, x17); 3339 ASSERT_EQUAL_64(src_base + 1 + 16, x18); 3340 ASSERT_EQUAL_64(src_base + 2 + 16, x19); 3341 ASSERT_EQUAL_64(src_base + 3 + 16, x20); 3342 ASSERT_EQUAL_64(src_base + 4 + 16, x21); 3343 3344 TEARDOWN(); 3345 } 3346 3347 3348 TEST(neon_ld2_q) { 3349 SETUP(); 3350 3351 uint8_t src[64 + 4]; 3352 for (unsigned i = 0; i < sizeof(src); i++) { 3353 src[i] = i; 3354 } 3355 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3356 3357 START(); 3358 __ Mov(x17, src_base); 3359 __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17)); 3360 __ Add(x17, x17, 1); 3361 __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x17)); 3362 __ Add(x17, x17, 1); 3363 __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x17)); 3364 __ Add(x17, x17, 1); 3365 __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x17)); 3366 __ Add(x17, x17, 1); 3367 __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x17)); 3368 END(); 3369 3370 RUN(); 3371 3372 ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2); 3373 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3); 3374 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4); 3375 ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5); 3376 ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6); 3377 ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7); 3378 ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16); 3379 ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17); 3380 ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31); 3381 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0); 3382 3383 TEARDOWN(); 3384 } 3385 3386 3387 TEST(neon_ld2_q_postindex) { 3388 SETUP(); 3389 3390 uint8_t src[64 + 4]; 3391 for (unsigned i = 0; i < sizeof(src); i++) { 3392 src[i] = i; 3393 } 3394 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3395 3396 START(); 3397 __ Mov(x17, src_base); 3398 __ Mov(x18, src_base + 1); 3399 __ Mov(x19, src_base + 2); 3400 __ Mov(x20, src_base + 3); 3401 __ Mov(x21, src_base + 4); 3402 __ Mov(x22, 1); 3403 __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17, x22, PostIndex)); 3404 __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x18, 32, PostIndex)); 3405 __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x19, 32, PostIndex)); 3406 __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x20, 32, PostIndex)); 3407 __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x21, 32, PostIndex)); 3408 END(); 3409 3410 RUN(); 3411 3412 ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2); 3413 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3); 3414 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4); 3415 ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5); 3416 ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6); 3417 ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7); 3418 ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16); 3419 ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17); 3420 ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31); 3421 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0); 3422 3423 3424 ASSERT_EQUAL_64(src_base + 1, x17); 3425 ASSERT_EQUAL_64(src_base + 1 + 32, x18); 3426 ASSERT_EQUAL_64(src_base + 2 + 32, x19); 3427 ASSERT_EQUAL_64(src_base + 3 + 32, x20); 3428 ASSERT_EQUAL_64(src_base + 4 + 32, x21); 3429 3430 TEARDOWN(); 3431 } 3432 3433 3434 TEST(neon_ld2_lane) { 3435 SETUP(); 3436 3437 uint8_t src[64]; 3438 for (unsigned i = 0; i < sizeof(src); i++) { 3439 src[i] = i; 3440 } 3441 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3442 3443 START(); 3444 3445 // Test loading whole register by element. 3446 __ Mov(x17, src_base); 3447 for (int i = 15; i >= 0; i--) { 3448 __ Ld2(v0.B(), v1.B(), i, MemOperand(x17)); 3449 __ Add(x17, x17, 1); 3450 } 3451 3452 __ Mov(x17, src_base); 3453 for (int i = 7; i >= 0; i--) { 3454 __ Ld2(v2.H(), v3.H(), i, MemOperand(x17)); 3455 __ Add(x17, x17, 1); 3456 } 3457 3458 __ Mov(x17, src_base); 3459 for (int i = 3; i >= 0; i--) { 3460 __ Ld2(v4.S(), v5.S(), i, MemOperand(x17)); 3461 __ Add(x17, x17, 1); 3462 } 3463 3464 __ Mov(x17, src_base); 3465 for (int i = 1; i >= 0; i--) { 3466 __ Ld2(v6.D(), v7.D(), i, MemOperand(x17)); 3467 __ Add(x17, x17, 1); 3468 } 3469 3470 // Test loading a single element into an initialised register. 3471 __ Mov(x17, src_base); 3472 __ Mov(x4, x17); 3473 __ Ldr(q8, MemOperand(x4, 16, PostIndex)); 3474 __ Ldr(q9, MemOperand(x4)); 3475 __ Ld2(v8.B(), v9.B(), 4, MemOperand(x17)); 3476 __ Mov(x5, x17); 3477 __ Ldr(q10, MemOperand(x5, 16, PostIndex)); 3478 __ Ldr(q11, MemOperand(x5)); 3479 __ Ld2(v10.H(), v11.H(), 3, MemOperand(x17)); 3480 __ Mov(x6, x17); 3481 __ Ldr(q12, MemOperand(x6, 16, PostIndex)); 3482 __ Ldr(q13, MemOperand(x6)); 3483 __ Ld2(v12.S(), v13.S(), 2, MemOperand(x17)); 3484 __ Mov(x7, x17); 3485 __ Ldr(q14, MemOperand(x7, 16, PostIndex)); 3486 __ Ldr(q15, MemOperand(x7)); 3487 __ Ld2(v14.D(), v15.D(), 1, MemOperand(x17)); 3488 3489 END(); 3490 3491 RUN(); 3492 3493 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0); 3494 ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1); 3495 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q2); 3496 ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q3); 3497 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q4); 3498 ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q5); 3499 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q6); 3500 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q7); 3501 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8); 3502 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9); 3503 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10); 3504 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11); 3505 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12); 3506 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13); 3507 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14); 3508 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15); 3509 3510 TEARDOWN(); 3511 } 3512 3513 3514 TEST(neon_ld2_lane_postindex) { 3515 SETUP(); 3516 3517 uint8_t src[64]; 3518 for (unsigned i = 0; i < sizeof(src); i++) { 3519 src[i] = i; 3520 } 3521 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3522 3523 START(); 3524 __ Mov(x17, src_base); 3525 __ Mov(x18, src_base); 3526 __ Mov(x19, src_base); 3527 __ Mov(x20, src_base); 3528 __ Mov(x21, src_base); 3529 __ Mov(x22, src_base); 3530 __ Mov(x23, src_base); 3531 __ Mov(x24, src_base); 3532 3533 // Test loading whole register by element. 3534 for (int i = 15; i >= 0; i--) { 3535 __ Ld2(v0.B(), v1.B(), i, MemOperand(x17, 2, PostIndex)); 3536 } 3537 3538 for (int i = 7; i >= 0; i--) { 3539 __ Ld2(v2.H(), v3.H(), i, MemOperand(x18, 4, PostIndex)); 3540 } 3541 3542 for (int i = 3; i >= 0; i--) { 3543 __ Ld2(v4.S(), v5.S(), i, MemOperand(x19, 8, PostIndex)); 3544 } 3545 3546 for (int i = 1; i >= 0; i--) { 3547 __ Ld2(v6.D(), v7.D(), i, MemOperand(x20, 16, PostIndex)); 3548 } 3549 3550 // Test loading a single element into an initialised register. 3551 __ Mov(x25, 1); 3552 __ Mov(x4, x21); 3553 __ Ldr(q8, MemOperand(x4, 16, PostIndex)); 3554 __ Ldr(q9, MemOperand(x4)); 3555 __ Ld2(v8.B(), v9.B(), 4, MemOperand(x21, x25, PostIndex)); 3556 __ Add(x25, x25, 1); 3557 3558 __ Mov(x5, x22); 3559 __ Ldr(q10, MemOperand(x5, 16, PostIndex)); 3560 __ Ldr(q11, MemOperand(x5)); 3561 __ Ld2(v10.H(), v11.H(), 3, MemOperand(x22, x25, PostIndex)); 3562 __ Add(x25, x25, 1); 3563 3564 __ Mov(x6, x23); 3565 __ Ldr(q12, MemOperand(x6, 16, PostIndex)); 3566 __ Ldr(q13, MemOperand(x6)); 3567 __ Ld2(v12.S(), v13.S(), 2, MemOperand(x23, x25, PostIndex)); 3568 __ Add(x25, x25, 1); 3569 3570 __ Mov(x7, x24); 3571 __ Ldr(q14, MemOperand(x7, 16, PostIndex)); 3572 __ Ldr(q15, MemOperand(x7)); 3573 __ Ld2(v14.D(), v15.D(), 1, MemOperand(x24, x25, PostIndex)); 3574 3575 END(); 3576 3577 RUN(); 3578 3579 ASSERT_EQUAL_128(0x00020406080a0c0e, 0x10121416181a1c1e, q0); 3580 ASSERT_EQUAL_128(0x01030507090b0d0f, 0x11131517191b1d1f, q1); 3581 ASSERT_EQUAL_128(0x0100050409080d0c, 0x1110151419181d1c, q2); 3582 ASSERT_EQUAL_128(0x030207060b0a0f0e, 0x131217161b1a1f1e, q3); 3583 ASSERT_EQUAL_128(0x030201000b0a0908, 0x131211101b1a1918, q4); 3584 ASSERT_EQUAL_128(0x070605040f0e0d0c, 0x171615141f1e1d1c, q5); 3585 ASSERT_EQUAL_128(0x0706050403020100, 0x1716151413121110, q6); 3586 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1f1e1d1c1b1a1918, q7); 3587 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8); 3588 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9); 3589 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10); 3590 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11); 3591 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12); 3592 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13); 3593 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14); 3594 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15); 3595 3596 3597 ASSERT_EQUAL_64(src_base + 32, x17); 3598 ASSERT_EQUAL_64(src_base + 32, x18); 3599 ASSERT_EQUAL_64(src_base + 32, x19); 3600 ASSERT_EQUAL_64(src_base + 32, x20); 3601 ASSERT_EQUAL_64(src_base + 1, x21); 3602 ASSERT_EQUAL_64(src_base + 2, x22); 3603 ASSERT_EQUAL_64(src_base + 3, x23); 3604 ASSERT_EQUAL_64(src_base + 4, x24); 3605 3606 TEARDOWN(); 3607 } 3608 3609 3610 TEST(neon_ld2_alllanes) { 3611 SETUP(); 3612 3613 uint8_t src[64]; 3614 for (unsigned i = 0; i < sizeof(src); i++) { 3615 src[i] = i; 3616 } 3617 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3618 3619 START(); 3620 __ Mov(x17, src_base + 1); 3621 __ Mov(x18, 1); 3622 __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17)); 3623 __ Add(x17, x17, 2); 3624 __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17)); 3625 __ Add(x17, x17, 1); 3626 __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17)); 3627 __ Add(x17, x17, 1); 3628 __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17)); 3629 __ Add(x17, x17, 4); 3630 __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17)); 3631 __ Add(x17, x17, 1); 3632 __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17)); 3633 __ Add(x17, x17, 8); 3634 __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17)); 3635 END(); 3636 3637 RUN(); 3638 3639 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0); 3640 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1); 3641 ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2); 3642 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3); 3643 ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4); 3644 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5); 3645 ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6); 3646 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7); 3647 ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8); 3648 ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9); 3649 ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10); 3650 ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11); 3651 ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12); 3652 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13); 3653 3654 TEARDOWN(); 3655 } 3656 3657 3658 TEST(neon_ld2_alllanes_postindex) { 3659 SETUP(); 3660 3661 uint8_t src[64]; 3662 for (unsigned i = 0; i < sizeof(src); i++) { 3663 src[i] = i; 3664 } 3665 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3666 3667 START(); 3668 __ Mov(x17, src_base + 1); 3669 __ Mov(x18, 1); 3670 __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17, 2, PostIndex)); 3671 __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17, x18, PostIndex)); 3672 __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17, x18, PostIndex)); 3673 __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17, 4, PostIndex)); 3674 __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17, x18, PostIndex)); 3675 __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17, 8, PostIndex)); 3676 __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17, 16, PostIndex)); 3677 END(); 3678 3679 RUN(); 3680 3681 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0); 3682 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1); 3683 ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2); 3684 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3); 3685 ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4); 3686 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5); 3687 ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6); 3688 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7); 3689 ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8); 3690 ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9); 3691 ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10); 3692 ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11); 3693 ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12); 3694 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13); 3695 ASSERT_EQUAL_64(src_base + 34, x17); 3696 3697 TEARDOWN(); 3698 } 3699 3700 3701 TEST(neon_ld3_d) { 3702 SETUP(); 3703 3704 uint8_t src[64 + 4]; 3705 for (unsigned i = 0; i < sizeof(src); i++) { 3706 src[i] = i; 3707 } 3708 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3709 3710 START(); 3711 __ Mov(x17, src_base); 3712 __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17)); 3713 __ Add(x17, x17, 1); 3714 __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x17)); 3715 __ Add(x17, x17, 1); 3716 __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x17)); 3717 __ Add(x17, x17, 1); 3718 __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17)); 3719 END(); 3720 3721 RUN(); 3722 3723 ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2); 3724 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3); 3725 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4); 3726 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5); 3727 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6); 3728 ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7); 3729 ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8); 3730 ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9); 3731 ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10); 3732 ASSERT_EQUAL_128(0, 0x1211100f06050403, q31); 3733 ASSERT_EQUAL_128(0, 0x161514130a090807, q0); 3734 ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q1); 3735 3736 TEARDOWN(); 3737 } 3738 3739 3740 TEST(neon_ld3_d_postindex) { 3741 SETUP(); 3742 3743 uint8_t src[32 + 4]; 3744 for (unsigned i = 0; i < sizeof(src); i++) { 3745 src[i] = i; 3746 } 3747 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3748 3749 START(); 3750 __ Mov(x17, src_base); 3751 __ Mov(x18, src_base + 1); 3752 __ Mov(x19, src_base + 2); 3753 __ Mov(x20, src_base + 3); 3754 __ Mov(x21, src_base + 4); 3755 __ Mov(x22, 1); 3756 __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17, x22, PostIndex)); 3757 __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x18, 24, PostIndex)); 3758 __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x19, 24, PostIndex)); 3759 __ Ld3(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x20, 24, PostIndex)); 3760 __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x21, 24, PostIndex)); 3761 END(); 3762 3763 RUN(); 3764 3765 ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2); 3766 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3); 3767 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4); 3768 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5); 3769 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6); 3770 ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7); 3771 ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8); 3772 ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9); 3773 ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10); 3774 ASSERT_EQUAL_128(0, 0x1211100f06050403, q11); 3775 ASSERT_EQUAL_128(0, 0x161514130a090807, q12); 3776 ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q13); 3777 ASSERT_EQUAL_128(0, 0x1312111007060504, q31); 3778 ASSERT_EQUAL_128(0, 0x171615140b0a0908, q0); 3779 ASSERT_EQUAL_128(0, 0x1b1a19180f0e0d0c, q1); 3780 3781 ASSERT_EQUAL_64(src_base + 1, x17); 3782 ASSERT_EQUAL_64(src_base + 1 + 24, x18); 3783 ASSERT_EQUAL_64(src_base + 2 + 24, x19); 3784 ASSERT_EQUAL_64(src_base + 3 + 24, x20); 3785 ASSERT_EQUAL_64(src_base + 4 + 24, x21); 3786 3787 TEARDOWN(); 3788 } 3789 3790 3791 TEST(neon_ld3_q) { 3792 SETUP(); 3793 3794 uint8_t src[64 + 4]; 3795 for (unsigned i = 0; i < sizeof(src); i++) { 3796 src[i] = i; 3797 } 3798 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3799 3800 START(); 3801 __ Mov(x17, src_base); 3802 __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17)); 3803 __ Add(x17, x17, 1); 3804 __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17)); 3805 __ Add(x17, x17, 1); 3806 __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x17)); 3807 __ Add(x17, x17, 1); 3808 __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x17)); 3809 __ Add(x17, x17, 1); 3810 __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17)); 3811 END(); 3812 3813 RUN(); 3814 3815 ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2); 3816 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3); 3817 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4); 3818 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5); 3819 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6); 3820 ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7); 3821 ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8); 3822 ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9); 3823 ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10); 3824 ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11); 3825 ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12); 3826 ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13); 3827 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31); 3828 ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0); 3829 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1); 3830 3831 TEARDOWN(); 3832 } 3833 3834 3835 TEST(neon_ld3_q_postindex) { 3836 SETUP(); 3837 3838 uint8_t src[64 + 4]; 3839 for (unsigned i = 0; i < sizeof(src); i++) { 3840 src[i] = i; 3841 } 3842 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3843 3844 START(); 3845 __ Mov(x17, src_base); 3846 __ Mov(x18, src_base + 1); 3847 __ Mov(x19, src_base + 2); 3848 __ Mov(x20, src_base + 3); 3849 __ Mov(x21, src_base + 4); 3850 __ Mov(x22, 1); 3851 3852 __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17, x22, PostIndex)); 3853 __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x18, 48, PostIndex)); 3854 __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x19, 48, PostIndex)); 3855 __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x20, 48, PostIndex)); 3856 __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x21, 48, PostIndex)); 3857 END(); 3858 3859 RUN(); 3860 3861 ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2); 3862 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3); 3863 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4); 3864 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5); 3865 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6); 3866 ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7); 3867 ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8); 3868 ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9); 3869 ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10); 3870 ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11); 3871 ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12); 3872 ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13); 3873 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31); 3874 ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0); 3875 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1); 3876 3877 ASSERT_EQUAL_64(src_base + 1, x17); 3878 ASSERT_EQUAL_64(src_base + 1 + 48, x18); 3879 ASSERT_EQUAL_64(src_base + 2 + 48, x19); 3880 ASSERT_EQUAL_64(src_base + 3 + 48, x20); 3881 ASSERT_EQUAL_64(src_base + 4 + 48, x21); 3882 3883 TEARDOWN(); 3884 } 3885 3886 3887 TEST(neon_ld3_lane) { 3888 SETUP(); 3889 3890 uint8_t src[64]; 3891 for (unsigned i = 0; i < sizeof(src); i++) { 3892 src[i] = i; 3893 } 3894 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3895 3896 START(); 3897 3898 // Test loading whole register by element. 3899 __ Mov(x17, src_base); 3900 for (int i = 15; i >= 0; i--) { 3901 __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17)); 3902 __ Add(x17, x17, 1); 3903 } 3904 3905 __ Mov(x17, src_base); 3906 for (int i = 7; i >= 0; i--) { 3907 __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x17)); 3908 __ Add(x17, x17, 1); 3909 } 3910 3911 __ Mov(x17, src_base); 3912 for (int i = 3; i >= 0; i--) { 3913 __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x17)); 3914 __ Add(x17, x17, 1); 3915 } 3916 3917 __ Mov(x17, src_base); 3918 for (int i = 1; i >= 0; i--) { 3919 __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x17)); 3920 __ Add(x17, x17, 1); 3921 } 3922 3923 // Test loading a single element into an initialised register. 3924 __ Mov(x17, src_base); 3925 __ Mov(x4, x17); 3926 __ Ldr(q12, MemOperand(x4, 16, PostIndex)); 3927 __ Ldr(q13, MemOperand(x4, 16, PostIndex)); 3928 __ Ldr(q14, MemOperand(x4)); 3929 __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x17)); 3930 __ Mov(x5, x17); 3931 __ Ldr(q15, MemOperand(x5, 16, PostIndex)); 3932 __ Ldr(q16, MemOperand(x5, 16, PostIndex)); 3933 __ Ldr(q17, MemOperand(x5)); 3934 __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x17)); 3935 __ Mov(x6, x17); 3936 __ Ldr(q18, MemOperand(x6, 16, PostIndex)); 3937 __ Ldr(q19, MemOperand(x6, 16, PostIndex)); 3938 __ Ldr(q20, MemOperand(x6)); 3939 __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x17)); 3940 __ Mov(x7, x17); 3941 __ Ldr(q21, MemOperand(x7, 16, PostIndex)); 3942 __ Ldr(q22, MemOperand(x7, 16, PostIndex)); 3943 __ Ldr(q23, MemOperand(x7)); 3944 __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x17)); 3945 3946 END(); 3947 3948 RUN(); 3949 3950 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0); 3951 ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1); 3952 ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2); 3953 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q3); 3954 ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q4); 3955 ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q5); 3956 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q6); 3957 ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q7); 3958 ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q8); 3959 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q9); 3960 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q10); 3961 ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q11); 3962 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12); 3963 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13); 3964 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14); 3965 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15); 3966 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16); 3967 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17); 3968 3969 TEARDOWN(); 3970 } 3971 3972 3973 TEST(neon_ld3_lane_postindex) { 3974 SETUP(); 3975 3976 uint8_t src[64]; 3977 for (unsigned i = 0; i < sizeof(src); i++) { 3978 src[i] = i; 3979 } 3980 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 3981 3982 START(); 3983 3984 // Test loading whole register by element. 3985 __ Mov(x17, src_base); 3986 __ Mov(x18, src_base); 3987 __ Mov(x19, src_base); 3988 __ Mov(x20, src_base); 3989 __ Mov(x21, src_base); 3990 __ Mov(x22, src_base); 3991 __ Mov(x23, src_base); 3992 __ Mov(x24, src_base); 3993 for (int i = 15; i >= 0; i--) { 3994 __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17, 3, PostIndex)); 3995 } 3996 3997 for (int i = 7; i >= 0; i--) { 3998 __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x18, 6, PostIndex)); 3999 } 4000 4001 for (int i = 3; i >= 0; i--) { 4002 __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x19, 12, PostIndex)); 4003 } 4004 4005 for (int i = 1; i >= 0; i--) { 4006 __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x20, 24, PostIndex)); 4007 } 4008 4009 4010 // Test loading a single element into an initialised register. 4011 __ Mov(x25, 1); 4012 __ Mov(x4, x21); 4013 __ Ldr(q12, MemOperand(x4, 16, PostIndex)); 4014 __ Ldr(q13, MemOperand(x4, 16, PostIndex)); 4015 __ Ldr(q14, MemOperand(x4)); 4016 __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x21, x25, PostIndex)); 4017 __ Add(x25, x25, 1); 4018 4019 __ Mov(x5, x22); 4020 __ Ldr(q15, MemOperand(x5, 16, PostIndex)); 4021 __ Ldr(q16, MemOperand(x5, 16, PostIndex)); 4022 __ Ldr(q17, MemOperand(x5)); 4023 __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x22, x25, PostIndex)); 4024 __ Add(x25, x25, 1); 4025 4026 __ Mov(x6, x23); 4027 __ Ldr(q18, MemOperand(x6, 16, PostIndex)); 4028 __ Ldr(q19, MemOperand(x6, 16, PostIndex)); 4029 __ Ldr(q20, MemOperand(x6)); 4030 __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x23, x25, PostIndex)); 4031 __ Add(x25, x25, 1); 4032 4033 __ Mov(x7, x24); 4034 __ Ldr(q21, MemOperand(x7, 16, PostIndex)); 4035 __ Ldr(q22, MemOperand(x7, 16, PostIndex)); 4036 __ Ldr(q23, MemOperand(x7)); 4037 __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x24, x25, PostIndex)); 4038 4039 END(); 4040 4041 RUN(); 4042 4043 ASSERT_EQUAL_128(0x000306090c0f1215, 0x181b1e2124272a2d, q0); 4044 ASSERT_EQUAL_128(0x0104070a0d101316, 0x191c1f2225282b2e, q1); 4045 ASSERT_EQUAL_128(0x0205080b0e111417, 0x1a1d202326292c2f, q2); 4046 ASSERT_EQUAL_128(0x010007060d0c1312, 0x19181f1e25242b2a, q3); 4047 ASSERT_EQUAL_128(0x030209080f0e1514, 0x1b1a212027262d2c, q4); 4048 ASSERT_EQUAL_128(0x05040b0a11101716, 0x1d1c232229282f2e, q5); 4049 ASSERT_EQUAL_128(0x030201000f0e0d0c, 0x1b1a191827262524, q6); 4050 ASSERT_EQUAL_128(0x0706050413121110, 0x1f1e1d1c2b2a2928, q7); 4051 ASSERT_EQUAL_128(0x0b0a090817161514, 0x232221202f2e2d2c, q8); 4052 ASSERT_EQUAL_128(0x0706050403020100, 0x1f1e1d1c1b1a1918, q9); 4053 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2726252423222120, q10); 4054 ASSERT_EQUAL_128(0x1716151413121110, 0x2f2e2d2c2b2a2928, q11); 4055 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12); 4056 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13); 4057 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14); 4058 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15); 4059 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16); 4060 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17); 4061 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q18); 4062 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q19); 4063 ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q20); 4064 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q21); 4065 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q22); 4066 ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q23); 4067 4068 ASSERT_EQUAL_64(src_base + 48, x17); 4069 ASSERT_EQUAL_64(src_base + 48, x18); 4070 ASSERT_EQUAL_64(src_base + 48, x19); 4071 ASSERT_EQUAL_64(src_base + 48, x20); 4072 ASSERT_EQUAL_64(src_base + 1, x21); 4073 ASSERT_EQUAL_64(src_base + 2, x22); 4074 ASSERT_EQUAL_64(src_base + 3, x23); 4075 ASSERT_EQUAL_64(src_base + 4, x24); 4076 4077 TEARDOWN(); 4078 } 4079 4080 4081 TEST(neon_ld3_alllanes) { 4082 SETUP(); 4083 4084 uint8_t src[64]; 4085 for (unsigned i = 0; i < sizeof(src); i++) { 4086 src[i] = i; 4087 } 4088 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4089 4090 START(); 4091 __ Mov(x17, src_base + 1); 4092 __ Mov(x18, 1); 4093 __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17)); 4094 __ Add(x17, x17, 3); 4095 __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17)); 4096 __ Add(x17, x17, 1); 4097 __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17)); 4098 __ Add(x17, x17, 1); 4099 __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17)); 4100 __ Add(x17, x17, 6); 4101 __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17)); 4102 __ Add(x17, x17, 1); 4103 __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17)); 4104 __ Add(x17, x17, 12); 4105 __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17)); 4106 END(); 4107 4108 RUN(); 4109 4110 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0); 4111 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1); 4112 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2); 4113 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3); 4114 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4); 4115 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5); 4116 ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6); 4117 ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7); 4118 ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8); 4119 ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9); 4120 ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10); 4121 ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11); 4122 ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12); 4123 ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13); 4124 ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14); 4125 ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15); 4126 ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16); 4127 ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17); 4128 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18); 4129 ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19); 4130 ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20); 4131 4132 TEARDOWN(); 4133 } 4134 4135 4136 TEST(neon_ld3_alllanes_postindex) { 4137 SETUP(); 4138 4139 uint8_t src[64]; 4140 for (unsigned i = 0; i < sizeof(src); i++) { 4141 src[i] = i; 4142 } 4143 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4144 __ Mov(x17, src_base + 1); 4145 __ Mov(x18, 1); 4146 4147 START(); 4148 __ Mov(x17, src_base + 1); 4149 __ Mov(x18, 1); 4150 __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17, 3, PostIndex)); 4151 __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17, x18, PostIndex)); 4152 __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17, x18, PostIndex)); 4153 __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17, 6, PostIndex)); 4154 __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17, x18, PostIndex)); 4155 __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17, 12, PostIndex)); 4156 __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17, 24, PostIndex)); 4157 END(); 4158 4159 RUN(); 4160 4161 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0); 4162 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1); 4163 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2); 4164 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3); 4165 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4); 4166 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5); 4167 ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6); 4168 ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7); 4169 ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8); 4170 ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9); 4171 ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10); 4172 ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11); 4173 ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12); 4174 ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13); 4175 ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14); 4176 ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15); 4177 ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16); 4178 ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17); 4179 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18); 4180 ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19); 4181 ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20); 4182 4183 TEARDOWN(); 4184 } 4185 4186 4187 TEST(neon_ld4_d) { 4188 SETUP(); 4189 4190 uint8_t src[64 + 4]; 4191 for (unsigned i = 0; i < sizeof(src); i++) { 4192 src[i] = i; 4193 } 4194 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4195 4196 START(); 4197 __ Mov(x17, src_base); 4198 __ Ld4(v2.V8B(), v3.V8B(), v4.V8B(), v5.V8B(), MemOperand(x17)); 4199 __ Add(x17, x17, 1); 4200 __ Ld4(v6.V8B(), v7.V8B(), v8.V8B(), v9.V8B(), MemOperand(x17)); 4201 __ Add(x17, x17, 1); 4202 __ Ld4(v10.V4H(), v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x17)); 4203 __ Add(x17, x17, 1); 4204 __ Ld4(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17)); 4205 END(); 4206 4207 RUN(); 4208 4209 ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2); 4210 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3); 4211 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4); 4212 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5); 4213 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6); 4214 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7); 4215 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8); 4216 ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9); 4217 ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10); 4218 ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11); 4219 ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12); 4220 ASSERT_EQUAL_128(0, 0x2120191811100908, q13); 4221 ASSERT_EQUAL_128(0, 0x1615141306050403, q30); 4222 ASSERT_EQUAL_128(0, 0x1a1918170a090807, q31); 4223 ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q0); 4224 ASSERT_EQUAL_128(0, 0x2221201f1211100f, q1); 4225 4226 TEARDOWN(); 4227 } 4228 4229 4230 TEST(neon_ld4_d_postindex) { 4231 SETUP(); 4232 4233 uint8_t src[32 + 4]; 4234 for (unsigned i = 0; i < sizeof(src); i++) { 4235 src[i] = i; 4236 } 4237 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4238 4239 START(); 4240 __ Mov(x17, src_base); 4241 __ Mov(x18, src_base + 1); 4242 __ Mov(x19, src_base + 2); 4243 __ Mov(x20, src_base + 3); 4244 __ Mov(x21, src_base + 4); 4245 __ Mov(x22, 1); 4246 __ Ld4(v2.V8B(), 4247 v3.V8B(), 4248 v4.V8B(), 4249 v5.V8B(), 4250 MemOperand(x17, x22, PostIndex)); 4251 __ Ld4(v6.V8B(), 4252 v7.V8B(), 4253 v8.V8B(), 4254 v9.V8B(), 4255 MemOperand(x18, 32, PostIndex)); 4256 __ Ld4(v10.V4H(), 4257 v11.V4H(), 4258 v12.V4H(), 4259 v13.V4H(), 4260 MemOperand(x19, 32, PostIndex)); 4261 __ Ld4(v14.V2S(), 4262 v15.V2S(), 4263 v16.V2S(), 4264 v17.V2S(), 4265 MemOperand(x20, 32, PostIndex)); 4266 __ Ld4(v30.V2S(), 4267 v31.V2S(), 4268 v0.V2S(), 4269 v1.V2S(), 4270 MemOperand(x21, 32, PostIndex)); 4271 END(); 4272 4273 RUN(); 4274 4275 ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2); 4276 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3); 4277 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4); 4278 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5); 4279 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6); 4280 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7); 4281 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8); 4282 ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9); 4283 ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10); 4284 ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11); 4285 ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12); 4286 ASSERT_EQUAL_128(0, 0x2120191811100908, q13); 4287 ASSERT_EQUAL_128(0, 0x1615141306050403, q14); 4288 ASSERT_EQUAL_128(0, 0x1a1918170a090807, q15); 4289 ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q16); 4290 ASSERT_EQUAL_128(0, 0x2221201f1211100f, q17); 4291 ASSERT_EQUAL_128(0, 0x1716151407060504, q30); 4292 ASSERT_EQUAL_128(0, 0x1b1a19180b0a0908, q31); 4293 ASSERT_EQUAL_128(0, 0x1f1e1d1c0f0e0d0c, q0); 4294 ASSERT_EQUAL_128(0, 0x2322212013121110, q1); 4295 4296 4297 ASSERT_EQUAL_64(src_base + 1, x17); 4298 ASSERT_EQUAL_64(src_base + 1 + 32, x18); 4299 ASSERT_EQUAL_64(src_base + 2 + 32, x19); 4300 ASSERT_EQUAL_64(src_base + 3 + 32, x20); 4301 ASSERT_EQUAL_64(src_base + 4 + 32, x21); 4302 TEARDOWN(); 4303 } 4304 4305 4306 TEST(neon_ld4_q) { 4307 SETUP(); 4308 4309 uint8_t src[64 + 4]; 4310 for (unsigned i = 0; i < sizeof(src); i++) { 4311 src[i] = i; 4312 } 4313 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4314 4315 START(); 4316 __ Mov(x17, src_base); 4317 __ Ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17)); 4318 __ Add(x17, x17, 1); 4319 __ Ld4(v6.V16B(), v7.V16B(), v8.V16B(), v9.V16B(), MemOperand(x17)); 4320 __ Add(x17, x17, 1); 4321 __ Ld4(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(), MemOperand(x17)); 4322 __ Add(x17, x17, 1); 4323 __ Ld4(v14.V4S(), v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17)); 4324 __ Add(x17, x17, 1); 4325 __ Ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x17)); 4326 END(); 4327 4328 RUN(); 4329 4330 ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2); 4331 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3); 4332 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4); 4333 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5); 4334 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6); 4335 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7); 4336 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8); 4337 ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9); 4338 ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10); 4339 ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11); 4340 ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12); 4341 ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13); 4342 ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14); 4343 ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15); 4344 ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16); 4345 ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17); 4346 ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q18); 4347 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q19); 4348 ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q20); 4349 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q21); 4350 TEARDOWN(); 4351 } 4352 4353 4354 TEST(neon_ld4_q_postindex) { 4355 SETUP(); 4356 4357 uint8_t src[64 + 4]; 4358 for (unsigned i = 0; i < sizeof(src); i++) { 4359 src[i] = i; 4360 } 4361 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4362 4363 START(); 4364 __ Mov(x17, src_base); 4365 __ Mov(x18, src_base + 1); 4366 __ Mov(x19, src_base + 2); 4367 __ Mov(x20, src_base + 3); 4368 __ Mov(x21, src_base + 4); 4369 __ Mov(x22, 1); 4370 4371 __ Ld4(v2.V16B(), 4372 v3.V16B(), 4373 v4.V16B(), 4374 v5.V16B(), 4375 MemOperand(x17, x22, PostIndex)); 4376 __ Ld4(v6.V16B(), 4377 v7.V16B(), 4378 v8.V16B(), 4379 v9.V16B(), 4380 MemOperand(x18, 64, PostIndex)); 4381 __ Ld4(v10.V8H(), 4382 v11.V8H(), 4383 v12.V8H(), 4384 v13.V8H(), 4385 MemOperand(x19, 64, PostIndex)); 4386 __ Ld4(v14.V4S(), 4387 v15.V4S(), 4388 v16.V4S(), 4389 v17.V4S(), 4390 MemOperand(x20, 64, PostIndex)); 4391 __ Ld4(v30.V2D(), 4392 v31.V2D(), 4393 v0.V2D(), 4394 v1.V2D(), 4395 MemOperand(x21, 64, PostIndex)); 4396 END(); 4397 4398 RUN(); 4399 4400 ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2); 4401 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3); 4402 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4); 4403 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5); 4404 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6); 4405 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7); 4406 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8); 4407 ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9); 4408 ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10); 4409 ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11); 4410 ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12); 4411 ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13); 4412 ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14); 4413 ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15); 4414 ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16); 4415 ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17); 4416 ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q30); 4417 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q31); 4418 ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q0); 4419 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q1); 4420 4421 4422 ASSERT_EQUAL_64(src_base + 1, x17); 4423 ASSERT_EQUAL_64(src_base + 1 + 64, x18); 4424 ASSERT_EQUAL_64(src_base + 2 + 64, x19); 4425 ASSERT_EQUAL_64(src_base + 3 + 64, x20); 4426 ASSERT_EQUAL_64(src_base + 4 + 64, x21); 4427 4428 TEARDOWN(); 4429 } 4430 4431 4432 TEST(neon_ld4_lane) { 4433 SETUP(); 4434 4435 uint8_t src[64]; 4436 for (unsigned i = 0; i < sizeof(src); i++) { 4437 src[i] = i; 4438 } 4439 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4440 4441 START(); 4442 4443 // Test loading whole register by element. 4444 __ Mov(x17, src_base); 4445 for (int i = 15; i >= 0; i--) { 4446 __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17)); 4447 __ Add(x17, x17, 1); 4448 } 4449 4450 __ Mov(x17, src_base); 4451 for (int i = 7; i >= 0; i--) { 4452 __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x17)); 4453 __ Add(x17, x17, 1); 4454 } 4455 4456 __ Mov(x17, src_base); 4457 for (int i = 3; i >= 0; i--) { 4458 __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x17)); 4459 __ Add(x17, x17, 1); 4460 } 4461 4462 __ Mov(x17, src_base); 4463 for (int i = 1; i >= 0; i--) { 4464 __ Ld4(v12.D(), v13.D(), v14.D(), v15.D(), i, MemOperand(x17)); 4465 __ Add(x17, x17, 1); 4466 } 4467 4468 // Test loading a single element into an initialised register. 4469 __ Mov(x17, src_base); 4470 __ Mov(x4, x17); 4471 __ Ldr(q16, MemOperand(x4, 16, PostIndex)); 4472 __ Ldr(q17, MemOperand(x4, 16, PostIndex)); 4473 __ Ldr(q18, MemOperand(x4, 16, PostIndex)); 4474 __ Ldr(q19, MemOperand(x4)); 4475 __ Ld4(v16.B(), v17.B(), v18.B(), v19.B(), 4, MemOperand(x17)); 4476 4477 __ Mov(x5, x17); 4478 __ Ldr(q20, MemOperand(x5, 16, PostIndex)); 4479 __ Ldr(q21, MemOperand(x5, 16, PostIndex)); 4480 __ Ldr(q22, MemOperand(x5, 16, PostIndex)); 4481 __ Ldr(q23, MemOperand(x5)); 4482 __ Ld4(v20.H(), v21.H(), v22.H(), v23.H(), 3, MemOperand(x17)); 4483 4484 __ Mov(x6, x17); 4485 __ Ldr(q24, MemOperand(x6, 16, PostIndex)); 4486 __ Ldr(q25, MemOperand(x6, 16, PostIndex)); 4487 __ Ldr(q26, MemOperand(x6, 16, PostIndex)); 4488 __ Ldr(q27, MemOperand(x6)); 4489 __ Ld4(v24.S(), v25.S(), v26.S(), v27.S(), 2, MemOperand(x17)); 4490 4491 __ Mov(x7, x17); 4492 __ Ldr(q28, MemOperand(x7, 16, PostIndex)); 4493 __ Ldr(q29, MemOperand(x7, 16, PostIndex)); 4494 __ Ldr(q30, MemOperand(x7, 16, PostIndex)); 4495 __ Ldr(q31, MemOperand(x7)); 4496 __ Ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x17)); 4497 4498 END(); 4499 4500 RUN(); 4501 4502 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0); 4503 ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1); 4504 ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2); 4505 ASSERT_EQUAL_128(0x030405060708090a, 0x0b0c0d0e0f101112, q3); 4506 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q4); 4507 ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q5); 4508 ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q6); 4509 ASSERT_EQUAL_128(0x0706080709080a09, 0x0b0a0c0b0d0c0e0d, q7); 4510 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q8); 4511 ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q9); 4512 ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q10); 4513 ASSERT_EQUAL_128(0x0f0e0d0c100f0e0d, 0x11100f0e1211100f, q11); 4514 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q12); 4515 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q13); 4516 ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q14); 4517 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x201f1e1d1c1b1a19, q15); 4518 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16); 4519 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17); 4520 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18); 4521 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19); 4522 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20); 4523 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21); 4524 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22); 4525 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23); 4526 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24); 4527 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25); 4528 ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26); 4529 ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27); 4530 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28); 4531 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29); 4532 ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30); 4533 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31); 4534 4535 TEARDOWN(); 4536 } 4537 4538 4539 TEST(neon_ld4_lane_postindex) { 4540 SETUP(); 4541 4542 uint8_t src[64]; 4543 for (unsigned i = 0; i < sizeof(src); i++) { 4544 src[i] = i; 4545 } 4546 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4547 4548 START(); 4549 4550 // Test loading whole register by element. 4551 __ Mov(x17, src_base); 4552 for (int i = 15; i >= 0; i--) { 4553 __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17, 4, PostIndex)); 4554 } 4555 4556 __ Mov(x18, src_base); 4557 for (int i = 7; i >= 0; i--) { 4558 __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x18, 8, PostIndex)); 4559 } 4560 4561 __ Mov(x19, src_base); 4562 for (int i = 3; i >= 0; i--) { 4563 __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x19, 16, PostIndex)); 4564 } 4565 4566 __ Mov(x20, src_base); 4567 for (int i = 1; i >= 0; i--) { 4568 __ Ld4(v12.D(), 4569 v13.D(), 4570 v14.D(), 4571 v15.D(), 4572 i, 4573 MemOperand(x20, 32, PostIndex)); 4574 } 4575 4576 // Test loading a single element into an initialised register. 4577 __ Mov(x25, 1); 4578 __ Mov(x21, src_base); 4579 __ Mov(x22, src_base); 4580 __ Mov(x23, src_base); 4581 __ Mov(x24, src_base); 4582 4583 __ Mov(x4, x21); 4584 __ Ldr(q16, MemOperand(x4, 16, PostIndex)); 4585 __ Ldr(q17, MemOperand(x4, 16, PostIndex)); 4586 __ Ldr(q18, MemOperand(x4, 16, PostIndex)); 4587 __ Ldr(q19, MemOperand(x4)); 4588 __ Ld4(v16.B(), 4589 v17.B(), 4590 v18.B(), 4591 v19.B(), 4592 4, 4593 MemOperand(x21, x25, PostIndex)); 4594 __ Add(x25, x25, 1); 4595 4596 __ Mov(x5, x22); 4597 __ Ldr(q20, MemOperand(x5, 16, PostIndex)); 4598 __ Ldr(q21, MemOperand(x5, 16, PostIndex)); 4599 __ Ldr(q22, MemOperand(x5, 16, PostIndex)); 4600 __ Ldr(q23, MemOperand(x5)); 4601 __ Ld4(v20.H(), 4602 v21.H(), 4603 v22.H(), 4604 v23.H(), 4605 3, 4606 MemOperand(x22, x25, PostIndex)); 4607 __ Add(x25, x25, 1); 4608 4609 __ Mov(x6, x23); 4610 __ Ldr(q24, MemOperand(x6, 16, PostIndex)); 4611 __ Ldr(q25, MemOperand(x6, 16, PostIndex)); 4612 __ Ldr(q26, MemOperand(x6, 16, PostIndex)); 4613 __ Ldr(q27, MemOperand(x6)); 4614 __ Ld4(v24.S(), 4615 v25.S(), 4616 v26.S(), 4617 v27.S(), 4618 2, 4619 MemOperand(x23, x25, PostIndex)); 4620 __ Add(x25, x25, 1); 4621 4622 __ Mov(x7, x24); 4623 __ Ldr(q28, MemOperand(x7, 16, PostIndex)); 4624 __ Ldr(q29, MemOperand(x7, 16, PostIndex)); 4625 __ Ldr(q30, MemOperand(x7, 16, PostIndex)); 4626 __ Ldr(q31, MemOperand(x7)); 4627 __ Ld4(v28.D(), 4628 v29.D(), 4629 v30.D(), 4630 v31.D(), 4631 1, 4632 MemOperand(x24, x25, PostIndex)); 4633 4634 END(); 4635 4636 RUN(); 4637 4638 ASSERT_EQUAL_128(0x0004080c1014181c, 0x2024282c3034383c, q0); 4639 ASSERT_EQUAL_128(0x0105090d1115191d, 0x2125292d3135393d, q1); 4640 ASSERT_EQUAL_128(0x02060a0e12161a1e, 0x22262a2e32363a3e, q2); 4641 ASSERT_EQUAL_128(0x03070b0f13171b1f, 0x23272b2f33373b3f, q3); 4642 ASSERT_EQUAL_128(0x0100090811101918, 0x2120292831303938, q4); 4643 ASSERT_EQUAL_128(0x03020b0a13121b1a, 0x23222b2a33323b3a, q5); 4644 ASSERT_EQUAL_128(0x05040d0c15141d1c, 0x25242d2c35343d3c, q6); 4645 ASSERT_EQUAL_128(0x07060f0e17161f1e, 0x27262f2e37363f3e, q7); 4646 ASSERT_EQUAL_128(0x0302010013121110, 0x2322212033323130, q8); 4647 ASSERT_EQUAL_128(0x0706050417161514, 0x2726252437363534, q9); 4648 ASSERT_EQUAL_128(0x0b0a09081b1a1918, 0x2b2a29283b3a3938, q10); 4649 ASSERT_EQUAL_128(0x0f0e0d0c1f1e1d1c, 0x2f2e2d2c3f3e3d3c, q11); 4650 ASSERT_EQUAL_128(0x0706050403020100, 0x2726252423222120, q12); 4651 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2f2e2d2c2b2a2928, q13); 4652 ASSERT_EQUAL_128(0x1716151413121110, 0x3736353433323130, q14); 4653 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3f3e3d3c3b3a3938, q15); 4654 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16); 4655 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17); 4656 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18); 4657 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19); 4658 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20); 4659 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21); 4660 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22); 4661 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23); 4662 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24); 4663 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25); 4664 ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26); 4665 ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27); 4666 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28); 4667 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29); 4668 ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30); 4669 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31); 4670 4671 ASSERT_EQUAL_64(src_base + 64, x17); 4672 ASSERT_EQUAL_64(src_base + 64, x18); 4673 ASSERT_EQUAL_64(src_base + 64, x19); 4674 ASSERT_EQUAL_64(src_base + 64, x20); 4675 ASSERT_EQUAL_64(src_base + 1, x21); 4676 ASSERT_EQUAL_64(src_base + 2, x22); 4677 ASSERT_EQUAL_64(src_base + 3, x23); 4678 ASSERT_EQUAL_64(src_base + 4, x24); 4679 4680 TEARDOWN(); 4681 } 4682 4683 4684 TEST(neon_ld4_alllanes) { 4685 SETUP(); 4686 4687 uint8_t src[64]; 4688 for (unsigned i = 0; i < sizeof(src); i++) { 4689 src[i] = i; 4690 } 4691 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4692 4693 START(); 4694 __ Mov(x17, src_base + 1); 4695 __ Mov(x18, 1); 4696 __ Ld4r(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x17)); 4697 __ Add(x17, x17, 4); 4698 __ Ld4r(v4.V16B(), v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17)); 4699 __ Add(x17, x17, 1); 4700 __ Ld4r(v8.V4H(), v9.V4H(), v10.V4H(), v11.V4H(), MemOperand(x17)); 4701 __ Add(x17, x17, 1); 4702 __ Ld4r(v12.V8H(), v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x17)); 4703 __ Add(x17, x17, 8); 4704 __ Ld4r(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17)); 4705 __ Add(x17, x17, 1); 4706 __ Ld4r(v20.V4S(), v21.V4S(), v22.V4S(), v23.V4S(), MemOperand(x17)); 4707 __ Add(x17, x17, 16); 4708 __ Ld4r(v24.V2D(), v25.V2D(), v26.V2D(), v27.V2D(), MemOperand(x17)); 4709 4710 4711 END(); 4712 4713 RUN(); 4714 4715 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0); 4716 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1); 4717 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2); 4718 ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3); 4719 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4); 4720 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5); 4721 ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6); 4722 ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7); 4723 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8); 4724 ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9); 4725 ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10); 4726 ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11); 4727 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12); 4728 ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13); 4729 ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14); 4730 ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15); 4731 ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16); 4732 ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17); 4733 ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18); 4734 ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19); 4735 ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20); 4736 ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21); 4737 ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22); 4738 ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23); 4739 ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24); 4740 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25); 4741 ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26); 4742 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27); 4743 4744 TEARDOWN(); 4745 } 4746 4747 4748 TEST(neon_ld4_alllanes_postindex) { 4749 SETUP(); 4750 4751 uint8_t src[64]; 4752 for (unsigned i = 0; i < sizeof(src); i++) { 4753 src[i] = i; 4754 } 4755 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4756 __ Mov(x17, src_base + 1); 4757 __ Mov(x18, 1); 4758 4759 START(); 4760 __ Mov(x17, src_base + 1); 4761 __ Mov(x18, 1); 4762 __ Ld4r(v0.V8B(), 4763 v1.V8B(), 4764 v2.V8B(), 4765 v3.V8B(), 4766 MemOperand(x17, 4, PostIndex)); 4767 __ Ld4r(v4.V16B(), 4768 v5.V16B(), 4769 v6.V16B(), 4770 v7.V16B(), 4771 MemOperand(x17, x18, PostIndex)); 4772 __ Ld4r(v8.V4H(), 4773 v9.V4H(), 4774 v10.V4H(), 4775 v11.V4H(), 4776 MemOperand(x17, x18, PostIndex)); 4777 __ Ld4r(v12.V8H(), 4778 v13.V8H(), 4779 v14.V8H(), 4780 v15.V8H(), 4781 MemOperand(x17, 8, PostIndex)); 4782 __ Ld4r(v16.V2S(), 4783 v17.V2S(), 4784 v18.V2S(), 4785 v19.V2S(), 4786 MemOperand(x17, x18, PostIndex)); 4787 __ Ld4r(v20.V4S(), 4788 v21.V4S(), 4789 v22.V4S(), 4790 v23.V4S(), 4791 MemOperand(x17, 16, PostIndex)); 4792 __ Ld4r(v24.V2D(), 4793 v25.V2D(), 4794 v26.V2D(), 4795 v27.V2D(), 4796 MemOperand(x17, 32, PostIndex)); 4797 END(); 4798 4799 RUN(); 4800 4801 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0); 4802 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1); 4803 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2); 4804 ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3); 4805 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4); 4806 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5); 4807 ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6); 4808 ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7); 4809 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8); 4810 ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9); 4811 ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10); 4812 ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11); 4813 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12); 4814 ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13); 4815 ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14); 4816 ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15); 4817 ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16); 4818 ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17); 4819 ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18); 4820 ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19); 4821 ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20); 4822 ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21); 4823 ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22); 4824 ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23); 4825 ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24); 4826 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25); 4827 ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26); 4828 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27); 4829 ASSERT_EQUAL_64(src_base + 64, x17); 4830 4831 TEARDOWN(); 4832 } 4833 4834 4835 TEST(neon_st1_lane) { 4836 SETUP(); 4837 4838 uint8_t src[64]; 4839 for (unsigned i = 0; i < sizeof(src); i++) { 4840 src[i] = i; 4841 } 4842 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 4843 4844 START(); 4845 __ Mov(x17, src_base); 4846 __ Mov(x18, -16); 4847 __ Ldr(q0, MemOperand(x17)); 4848 4849 for (int i = 15; i >= 0; i--) { 4850 __ St1(v0.B(), i, MemOperand(x17)); 4851 __ Add(x17, x17, 1); 4852 } 4853 __ Ldr(q1, MemOperand(x17, x18)); 4854 4855 for (int i = 7; i >= 0; i--) { 4856 __ St1(v0.H(), i, MemOperand(x17)); 4857 __ Add(x17, x17, 2); 4858 } 4859 __ Ldr(q2, MemOperand(x17, x18)); 4860 4861 for (int i = 3; i >= 0; i--) { 4862 __ St1(v0.S(), i, MemOperand(x17)); 4863 __ Add(x17, x17, 4); 4864 } 4865 __ Ldr(q3, MemOperand(x17, x18)); 4866 4867 for (int i = 1; i >= 0; i--) { 4868 __ St1(v0.D(), i, MemOperand(x17)); 4869 __ Add(x17, x17, 8); 4870 } 4871 __ Ldr(q4, MemOperand(x17, x18)); 4872 4873 END(); 4874 4875 RUN(); 4876 4877 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1); 4878 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2); 4879 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3); 4880 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4); 4881 4882 TEARDOWN(); 4883 } 4884 4885 4886 TEST(neon_st2_lane) { 4887 SETUP(); 4888 4889 // Struct size * addressing modes * element sizes * vector size. 4890 uint8_t dst[2 * 2 * 4 * 16]; 4891 memset(dst, 0, sizeof(dst)); 4892 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 4893 4894 START(); 4895 __ Mov(x17, dst_base); 4896 __ Mov(x18, dst_base); 4897 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f); 4898 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f); 4899 4900 // Test B stores with and without post index. 4901 for (int i = 15; i >= 0; i--) { 4902 __ St2(v0.B(), v1.B(), i, MemOperand(x18)); 4903 __ Add(x18, x18, 2); 4904 } 4905 for (int i = 15; i >= 0; i--) { 4906 __ St2(v0.B(), v1.B(), i, MemOperand(x18, 2, PostIndex)); 4907 } 4908 __ Ldr(q2, MemOperand(x17, 0 * 16)); 4909 __ Ldr(q3, MemOperand(x17, 1 * 16)); 4910 __ Ldr(q4, MemOperand(x17, 2 * 16)); 4911 __ Ldr(q5, MemOperand(x17, 3 * 16)); 4912 4913 // Test H stores with and without post index. 4914 __ Mov(x0, 4); 4915 for (int i = 7; i >= 0; i--) { 4916 __ St2(v0.H(), v1.H(), i, MemOperand(x18)); 4917 __ Add(x18, x18, 4); 4918 } 4919 for (int i = 7; i >= 0; i--) { 4920 __ St2(v0.H(), v1.H(), i, MemOperand(x18, x0, PostIndex)); 4921 } 4922 __ Ldr(q6, MemOperand(x17, 4 * 16)); 4923 __ Ldr(q7, MemOperand(x17, 5 * 16)); 4924 __ Ldr(q16, MemOperand(x17, 6 * 16)); 4925 __ Ldr(q17, MemOperand(x17, 7 * 16)); 4926 4927 // Test S stores with and without post index. 4928 for (int i = 3; i >= 0; i--) { 4929 __ St2(v0.S(), v1.S(), i, MemOperand(x18)); 4930 __ Add(x18, x18, 8); 4931 } 4932 for (int i = 3; i >= 0; i--) { 4933 __ St2(v0.S(), v1.S(), i, MemOperand(x18, 8, PostIndex)); 4934 } 4935 __ Ldr(q18, MemOperand(x17, 8 * 16)); 4936 __ Ldr(q19, MemOperand(x17, 9 * 16)); 4937 __ Ldr(q20, MemOperand(x17, 10 * 16)); 4938 __ Ldr(q21, MemOperand(x17, 11 * 16)); 4939 4940 // Test D stores with and without post index. 4941 __ Mov(x0, 16); 4942 __ St2(v0.D(), v1.D(), 1, MemOperand(x18)); 4943 __ Add(x18, x18, 16); 4944 __ St2(v0.D(), v1.D(), 0, MemOperand(x18, 16, PostIndex)); 4945 __ St2(v0.D(), v1.D(), 1, MemOperand(x18, x0, PostIndex)); 4946 __ St2(v0.D(), v1.D(), 0, MemOperand(x18, x0, PostIndex)); 4947 __ Ldr(q22, MemOperand(x17, 12 * 16)); 4948 __ Ldr(q23, MemOperand(x17, 13 * 16)); 4949 __ Ldr(q24, MemOperand(x17, 14 * 16)); 4950 __ Ldr(q25, MemOperand(x17, 15 * 16)); 4951 END(); 4952 4953 RUN(); 4954 4955 ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q2); 4956 ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q3); 4957 ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q4); 4958 ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q5); 4959 4960 ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q6); 4961 ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q7); 4962 ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q16); 4963 ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q17); 4964 4965 ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q18); 4966 ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q19); 4967 ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q20); 4968 ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q21); 4969 4970 ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22); 4971 ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23); 4972 ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22); 4973 ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23); 4974 4975 TEARDOWN(); 4976 } 4977 4978 4979 TEST(neon_st3_lane) { 4980 SETUP(); 4981 4982 // Struct size * addressing modes * element sizes * vector size. 4983 uint8_t dst[3 * 2 * 4 * 16]; 4984 memset(dst, 0, sizeof(dst)); 4985 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 4986 4987 START(); 4988 __ Mov(x17, dst_base); 4989 __ Mov(x18, dst_base); 4990 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f); 4991 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f); 4992 __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f); 4993 4994 // Test B stores with and without post index. 4995 for (int i = 15; i >= 0; i--) { 4996 __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18)); 4997 __ Add(x18, x18, 3); 4998 } 4999 for (int i = 15; i >= 0; i--) { 5000 __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18, 3, PostIndex)); 5001 } 5002 __ Ldr(q3, MemOperand(x17, 0 * 16)); 5003 __ Ldr(q4, MemOperand(x17, 1 * 16)); 5004 __ Ldr(q5, MemOperand(x17, 2 * 16)); 5005 __ Ldr(q6, MemOperand(x17, 3 * 16)); 5006 __ Ldr(q7, MemOperand(x17, 4 * 16)); 5007 __ Ldr(q16, MemOperand(x17, 5 * 16)); 5008 5009 // Test H stores with and without post index. 5010 __ Mov(x0, 6); 5011 for (int i = 7; i >= 0; i--) { 5012 __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18)); 5013 __ Add(x18, x18, 6); 5014 } 5015 for (int i = 7; i >= 0; i--) { 5016 __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18, x0, PostIndex)); 5017 } 5018 __ Ldr(q17, MemOperand(x17, 6 * 16)); 5019 __ Ldr(q18, MemOperand(x17, 7 * 16)); 5020 __ Ldr(q19, MemOperand(x17, 8 * 16)); 5021 __ Ldr(q20, MemOperand(x17, 9 * 16)); 5022 __ Ldr(q21, MemOperand(x17, 10 * 16)); 5023 __ Ldr(q22, MemOperand(x17, 11 * 16)); 5024 5025 // Test S stores with and without post index. 5026 for (int i = 3; i >= 0; i--) { 5027 __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18)); 5028 __ Add(x18, x18, 12); 5029 } 5030 for (int i = 3; i >= 0; i--) { 5031 __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18, 12, PostIndex)); 5032 } 5033 __ Ldr(q23, MemOperand(x17, 12 * 16)); 5034 __ Ldr(q24, MemOperand(x17, 13 * 16)); 5035 __ Ldr(q25, MemOperand(x17, 14 * 16)); 5036 __ Ldr(q26, MemOperand(x17, 15 * 16)); 5037 __ Ldr(q27, MemOperand(x17, 16 * 16)); 5038 __ Ldr(q28, MemOperand(x17, 17 * 16)); 5039 5040 // Test D stores with and without post index. 5041 __ Mov(x0, 24); 5042 __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18)); 5043 __ Add(x18, x18, 24); 5044 __ St3(v0.D(), v1.D(), v2.D(), 0, MemOperand(x18, 24, PostIndex)); 5045 __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18, x0, PostIndex)); 5046 __ Ldr(q29, MemOperand(x17, 18 * 16)); 5047 __ Ldr(q30, MemOperand(x17, 19 * 16)); 5048 __ Ldr(q31, MemOperand(x17, 20 * 16)); 5049 END(); 5050 5051 RUN(); 5052 5053 ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q3); 5054 ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q4); 5055 ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q5); 5056 ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q6); 5057 ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q7); 5058 ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q16); 5059 5060 ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q17); 5061 ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q18); 5062 ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q19); 5063 ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q20); 5064 ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q21); 5065 ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q22); 5066 5067 ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q23); 5068 ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q24); 5069 ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q25); 5070 ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q26); 5071 ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q27); 5072 ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q28); 5073 5074 TEARDOWN(); 5075 } 5076 5077 5078 TEST(neon_st4_lane) { 5079 SETUP(); 5080 5081 // Struct size * element sizes * vector size. 5082 uint8_t dst[4 * 4 * 16]; 5083 memset(dst, 0, sizeof(dst)); 5084 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 5085 5086 START(); 5087 __ Mov(x17, dst_base); 5088 __ Mov(x18, dst_base); 5089 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f); 5090 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f); 5091 __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f); 5092 __ Movi(v3.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f); 5093 5094 // Test B stores without post index. 5095 for (int i = 15; i >= 0; i--) { 5096 __ St4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x18)); 5097 __ Add(x18, x18, 4); 5098 } 5099 __ Ldr(q4, MemOperand(x17, 0 * 16)); 5100 __ Ldr(q5, MemOperand(x17, 1 * 16)); 5101 __ Ldr(q6, MemOperand(x17, 2 * 16)); 5102 __ Ldr(q7, MemOperand(x17, 3 * 16)); 5103 5104 // Test H stores with post index. 5105 __ Mov(x0, 8); 5106 for (int i = 7; i >= 0; i--) { 5107 __ St4(v0.H(), v1.H(), v2.H(), v3.H(), i, MemOperand(x18, x0, PostIndex)); 5108 } 5109 __ Ldr(q16, MemOperand(x17, 4 * 16)); 5110 __ Ldr(q17, MemOperand(x17, 5 * 16)); 5111 __ Ldr(q18, MemOperand(x17, 6 * 16)); 5112 __ Ldr(q19, MemOperand(x17, 7 * 16)); 5113 5114 // Test S stores without post index. 5115 for (int i = 3; i >= 0; i--) { 5116 __ St4(v0.S(), v1.S(), v2.S(), v3.S(), i, MemOperand(x18)); 5117 __ Add(x18, x18, 16); 5118 } 5119 __ Ldr(q20, MemOperand(x17, 8 * 16)); 5120 __ Ldr(q21, MemOperand(x17, 9 * 16)); 5121 __ Ldr(q22, MemOperand(x17, 10 * 16)); 5122 __ Ldr(q23, MemOperand(x17, 11 * 16)); 5123 5124 // Test D stores with post index. 5125 __ Mov(x0, 32); 5126 __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 0, MemOperand(x18, 32, PostIndex)); 5127 __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 1, MemOperand(x18, x0, PostIndex)); 5128 5129 __ Ldr(q24, MemOperand(x17, 12 * 16)); 5130 __ Ldr(q25, MemOperand(x17, 13 * 16)); 5131 __ Ldr(q26, MemOperand(x17, 14 * 16)); 5132 __ Ldr(q27, MemOperand(x17, 15 * 16)); 5133 END(); 5134 5135 RUN(); 5136 5137 ASSERT_EQUAL_128(0x2323130322221202, 0x2121110120201000, q4); 5138 ASSERT_EQUAL_128(0x2727170726261606, 0x2525150524241404, q5); 5139 ASSERT_EQUAL_128(0x2b2b1b0b2a2a1a0a, 0x2929190928281808, q6); 5140 ASSERT_EQUAL_128(0x2f2f1f0f2e2e1e0e, 0x2d2d1d0d2c2c1c0c, q7); 5141 5142 ASSERT_EQUAL_128(0x2223222312130203, 0x2021202110110001, q16); 5143 ASSERT_EQUAL_128(0x2627262716170607, 0x2425242514150405, q17); 5144 ASSERT_EQUAL_128(0x2a2b2a2b1a1b0a0b, 0x2829282918190809, q18); 5145 ASSERT_EQUAL_128(0x2e2f2e2f1e1f0e0f, 0x2c2d2c2d1c1d0c0d, q19); 5146 5147 ASSERT_EQUAL_128(0x2021222320212223, 0x1011121300010203, q20); 5148 ASSERT_EQUAL_128(0x2425262724252627, 0x1415161704050607, q21); 5149 ASSERT_EQUAL_128(0x28292a2b28292a2b, 0x18191a1b08090a0b, q22); 5150 ASSERT_EQUAL_128(0x2c2d2e2f2c2d2e2f, 0x1c1d1e1f0c0d0e0f, q23); 5151 5152 ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q24); 5153 ASSERT_EQUAL_128(0x28292a2b2c2d2e2f, 0x28292a2b2c2d2e2f, q25); 5154 ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q26); 5155 ASSERT_EQUAL_128(0x2021222324252627, 0x2021222324252627, q27); 5156 5157 TEARDOWN(); 5158 } 5159 5160 5161 TEST(neon_ld1_lane_postindex) { 5162 SETUP(); 5163 5164 uint8_t src[64]; 5165 for (unsigned i = 0; i < sizeof(src); i++) { 5166 src[i] = i; 5167 } 5168 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5169 5170 START(); 5171 __ Mov(x17, src_base); 5172 __ Mov(x18, src_base); 5173 __ Mov(x19, src_base); 5174 __ Mov(x20, src_base); 5175 __ Mov(x21, src_base); 5176 __ Mov(x22, src_base); 5177 __ Mov(x23, src_base); 5178 __ Mov(x24, src_base); 5179 5180 // Test loading whole register by element. 5181 for (int i = 15; i >= 0; i--) { 5182 __ Ld1(v0.B(), i, MemOperand(x17, 1, PostIndex)); 5183 } 5184 5185 for (int i = 7; i >= 0; i--) { 5186 __ Ld1(v1.H(), i, MemOperand(x18, 2, PostIndex)); 5187 } 5188 5189 for (int i = 3; i >= 0; i--) { 5190 __ Ld1(v2.S(), i, MemOperand(x19, 4, PostIndex)); 5191 } 5192 5193 for (int i = 1; i >= 0; i--) { 5194 __ Ld1(v3.D(), i, MemOperand(x20, 8, PostIndex)); 5195 } 5196 5197 // Test loading a single element into an initialised register. 5198 __ Mov(x25, 1); 5199 __ Ldr(q4, MemOperand(x21)); 5200 __ Ld1(v4.B(), 4, MemOperand(x21, x25, PostIndex)); 5201 __ Add(x25, x25, 1); 5202 5203 __ Ldr(q5, MemOperand(x22)); 5204 __ Ld1(v5.H(), 3, MemOperand(x22, x25, PostIndex)); 5205 __ Add(x25, x25, 1); 5206 5207 __ Ldr(q6, MemOperand(x23)); 5208 __ Ld1(v6.S(), 2, MemOperand(x23, x25, PostIndex)); 5209 __ Add(x25, x25, 1); 5210 5211 __ Ldr(q7, MemOperand(x24)); 5212 __ Ld1(v7.D(), 1, MemOperand(x24, x25, PostIndex)); 5213 5214 END(); 5215 5216 RUN(); 5217 5218 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0); 5219 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q1); 5220 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q2); 5221 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q3); 5222 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4); 5223 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5); 5224 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6); 5225 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7); 5226 ASSERT_EQUAL_64(src_base + 16, x17); 5227 ASSERT_EQUAL_64(src_base + 16, x18); 5228 ASSERT_EQUAL_64(src_base + 16, x19); 5229 ASSERT_EQUAL_64(src_base + 16, x20); 5230 ASSERT_EQUAL_64(src_base + 1, x21); 5231 ASSERT_EQUAL_64(src_base + 2, x22); 5232 ASSERT_EQUAL_64(src_base + 3, x23); 5233 ASSERT_EQUAL_64(src_base + 4, x24); 5234 5235 TEARDOWN(); 5236 } 5237 5238 5239 TEST(neon_st1_lane_postindex) { 5240 SETUP(); 5241 5242 uint8_t src[64]; 5243 for (unsigned i = 0; i < sizeof(src); i++) { 5244 src[i] = i; 5245 } 5246 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5247 5248 START(); 5249 __ Mov(x17, src_base); 5250 __ Mov(x18, -16); 5251 __ Ldr(q0, MemOperand(x17)); 5252 5253 for (int i = 15; i >= 0; i--) { 5254 __ St1(v0.B(), i, MemOperand(x17, 1, PostIndex)); 5255 } 5256 __ Ldr(q1, MemOperand(x17, x18)); 5257 5258 for (int i = 7; i >= 0; i--) { 5259 __ St1(v0.H(), i, MemOperand(x17, 2, PostIndex)); 5260 } 5261 __ Ldr(q2, MemOperand(x17, x18)); 5262 5263 for (int i = 3; i >= 0; i--) { 5264 __ St1(v0.S(), i, MemOperand(x17, 4, PostIndex)); 5265 } 5266 __ Ldr(q3, MemOperand(x17, x18)); 5267 5268 for (int i = 1; i >= 0; i--) { 5269 __ St1(v0.D(), i, MemOperand(x17, 8, PostIndex)); 5270 } 5271 __ Ldr(q4, MemOperand(x17, x18)); 5272 5273 END(); 5274 5275 RUN(); 5276 5277 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1); 5278 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2); 5279 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3); 5280 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4); 5281 5282 TEARDOWN(); 5283 } 5284 5285 5286 TEST(neon_ld1_alllanes) { 5287 SETUP(); 5288 5289 uint8_t src[64]; 5290 for (unsigned i = 0; i < sizeof(src); i++) { 5291 src[i] = i; 5292 } 5293 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5294 5295 START(); 5296 __ Mov(x17, src_base + 1); 5297 __ Ld1r(v0.V8B(), MemOperand(x17)); 5298 __ Add(x17, x17, 1); 5299 __ Ld1r(v1.V16B(), MemOperand(x17)); 5300 __ Add(x17, x17, 1); 5301 __ Ld1r(v2.V4H(), MemOperand(x17)); 5302 __ Add(x17, x17, 1); 5303 __ Ld1r(v3.V8H(), MemOperand(x17)); 5304 __ Add(x17, x17, 1); 5305 __ Ld1r(v4.V2S(), MemOperand(x17)); 5306 __ Add(x17, x17, 1); 5307 __ Ld1r(v5.V4S(), MemOperand(x17)); 5308 __ Add(x17, x17, 1); 5309 __ Ld1r(v6.V1D(), MemOperand(x17)); 5310 __ Add(x17, x17, 1); 5311 __ Ld1r(v7.V2D(), MemOperand(x17)); 5312 END(); 5313 5314 RUN(); 5315 5316 ASSERT_EQUAL_128(0, 0x0101010101010101, q0); 5317 ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1); 5318 ASSERT_EQUAL_128(0, 0x0403040304030403, q2); 5319 ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3); 5320 ASSERT_EQUAL_128(0, 0x0807060508070605, q4); 5321 ASSERT_EQUAL_128(0x0908070609080706, 0x0908070609080706, q5); 5322 ASSERT_EQUAL_128(0, 0x0e0d0c0b0a090807, q6); 5323 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0f0e0d0c0b0a0908, q7); 5324 5325 TEARDOWN(); 5326 } 5327 5328 5329 TEST(neon_ld1_alllanes_postindex) { 5330 SETUP(); 5331 5332 uint8_t src[64]; 5333 for (unsigned i = 0; i < sizeof(src); i++) { 5334 src[i] = i; 5335 } 5336 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5337 5338 START(); 5339 __ Mov(x17, src_base + 1); 5340 __ Mov(x18, 1); 5341 __ Ld1r(v0.V8B(), MemOperand(x17, 1, PostIndex)); 5342 __ Ld1r(v1.V16B(), MemOperand(x17, x18, PostIndex)); 5343 __ Ld1r(v2.V4H(), MemOperand(x17, x18, PostIndex)); 5344 __ Ld1r(v3.V8H(), MemOperand(x17, 2, PostIndex)); 5345 __ Ld1r(v4.V2S(), MemOperand(x17, x18, PostIndex)); 5346 __ Ld1r(v5.V4S(), MemOperand(x17, 4, PostIndex)); 5347 __ Ld1r(v6.V2D(), MemOperand(x17, 8, PostIndex)); 5348 END(); 5349 5350 RUN(); 5351 5352 ASSERT_EQUAL_128(0, 0x0101010101010101, q0); 5353 ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1); 5354 ASSERT_EQUAL_128(0, 0x0403040304030403, q2); 5355 ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3); 5356 ASSERT_EQUAL_128(0, 0x0908070609080706, q4); 5357 ASSERT_EQUAL_128(0x0a0908070a090807, 0x0a0908070a090807, q5); 5358 ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x1211100f0e0d0c0b, q6); 5359 ASSERT_EQUAL_64(src_base + 19, x17); 5360 5361 TEARDOWN(); 5362 } 5363 5364 5365 TEST(neon_st1_d) { 5366 SETUP(); 5367 5368 uint8_t src[14 * kDRegSizeInBytes]; 5369 for (unsigned i = 0; i < sizeof(src); i++) { 5370 src[i] = i; 5371 } 5372 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5373 5374 START(); 5375 __ Mov(x17, src_base); 5376 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5377 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5378 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 5379 __ Ldr(q3, MemOperand(x17, 16, PostIndex)); 5380 __ Mov(x17, src_base); 5381 5382 __ St1(v0.V8B(), MemOperand(x17)); 5383 __ Ldr(d16, MemOperand(x17, 8, PostIndex)); 5384 5385 __ St1(v0.V8B(), v1.V8B(), MemOperand(x17)); 5386 __ Ldr(q17, MemOperand(x17, 16, PostIndex)); 5387 5388 __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17)); 5389 __ Ldr(d18, MemOperand(x17, 8, PostIndex)); 5390 __ Ldr(d19, MemOperand(x17, 8, PostIndex)); 5391 __ Ldr(d20, MemOperand(x17, 8, PostIndex)); 5392 5393 __ St1(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x17)); 5394 __ Ldr(q21, MemOperand(x17, 16, PostIndex)); 5395 __ Ldr(q22, MemOperand(x17, 16, PostIndex)); 5396 5397 __ St1(v0.V1D(), v1.V1D(), v2.V1D(), v3.V1D(), MemOperand(x17)); 5398 __ Ldr(q23, MemOperand(x17, 16, PostIndex)); 5399 __ Ldr(q24, MemOperand(x17)); 5400 END(); 5401 5402 RUN(); 5403 5404 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q0); 5405 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q1); 5406 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q2); 5407 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q3); 5408 ASSERT_EQUAL_128(0, 0x0706050403020100, q16); 5409 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17); 5410 ASSERT_EQUAL_128(0, 0x0706050403020100, q18); 5411 ASSERT_EQUAL_128(0, 0x1716151413121110, q19); 5412 ASSERT_EQUAL_128(0, 0x2726252423222120, q20); 5413 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21); 5414 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22); 5415 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23); 5416 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24); 5417 5418 TEARDOWN(); 5419 } 5420 5421 5422 TEST(neon_st1_d_postindex) { 5423 SETUP(); 5424 5425 uint8_t src[64 + 14 * kDRegSizeInBytes]; 5426 for (unsigned i = 0; i < sizeof(src); i++) { 5427 src[i] = i; 5428 } 5429 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5430 5431 START(); 5432 __ Mov(x17, src_base); 5433 __ Mov(x18, -8); 5434 __ Mov(x19, -16); 5435 __ Mov(x20, -24); 5436 __ Mov(x21, -32); 5437 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5438 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5439 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 5440 __ Ldr(q3, MemOperand(x17, 16, PostIndex)); 5441 __ Mov(x17, src_base); 5442 5443 __ St1(v0.V8B(), MemOperand(x17, 8, PostIndex)); 5444 __ Ldr(d16, MemOperand(x17, x18)); 5445 5446 __ St1(v0.V8B(), v1.V8B(), MemOperand(x17, 16, PostIndex)); 5447 __ Ldr(q17, MemOperand(x17, x19)); 5448 5449 __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17, 24, PostIndex)); 5450 __ Ldr(d18, MemOperand(x17, x20)); 5451 __ Ldr(d19, MemOperand(x17, x19)); 5452 __ Ldr(d20, MemOperand(x17, x18)); 5453 5454 __ St1(v0.V2S(), 5455 v1.V2S(), 5456 v2.V2S(), 5457 v3.V2S(), 5458 MemOperand(x17, 32, PostIndex)); 5459 __ Ldr(q21, MemOperand(x17, x21)); 5460 __ Ldr(q22, MemOperand(x17, x19)); 5461 5462 __ St1(v0.V1D(), 5463 v1.V1D(), 5464 v2.V1D(), 5465 v3.V1D(), 5466 MemOperand(x17, 32, PostIndex)); 5467 __ Ldr(q23, MemOperand(x17, x21)); 5468 __ Ldr(q24, MemOperand(x17, x19)); 5469 END(); 5470 5471 RUN(); 5472 5473 ASSERT_EQUAL_128(0, 0x0706050403020100, q16); 5474 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17); 5475 ASSERT_EQUAL_128(0, 0x0706050403020100, q18); 5476 ASSERT_EQUAL_128(0, 0x1716151413121110, q19); 5477 ASSERT_EQUAL_128(0, 0x2726252423222120, q20); 5478 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21); 5479 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22); 5480 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23); 5481 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24); 5482 5483 TEARDOWN(); 5484 } 5485 5486 5487 TEST(neon_st1_q) { 5488 SETUP(); 5489 5490 uint8_t src[64 + 160]; 5491 for (unsigned i = 0; i < sizeof(src); i++) { 5492 src[i] = i; 5493 } 5494 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5495 5496 START(); 5497 __ Mov(x17, src_base); 5498 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5499 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5500 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 5501 __ Ldr(q3, MemOperand(x17, 16, PostIndex)); 5502 5503 __ St1(v0.V16B(), MemOperand(x17)); 5504 __ Ldr(q16, MemOperand(x17, 16, PostIndex)); 5505 5506 __ St1(v0.V8H(), v1.V8H(), MemOperand(x17)); 5507 __ Ldr(q17, MemOperand(x17, 16, PostIndex)); 5508 __ Ldr(q18, MemOperand(x17, 16, PostIndex)); 5509 5510 __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17)); 5511 __ Ldr(q19, MemOperand(x17, 16, PostIndex)); 5512 __ Ldr(q20, MemOperand(x17, 16, PostIndex)); 5513 __ Ldr(q21, MemOperand(x17, 16, PostIndex)); 5514 5515 __ St1(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x17)); 5516 __ Ldr(q22, MemOperand(x17, 16, PostIndex)); 5517 __ Ldr(q23, MemOperand(x17, 16, PostIndex)); 5518 __ Ldr(q24, MemOperand(x17, 16, PostIndex)); 5519 __ Ldr(q25, MemOperand(x17)); 5520 END(); 5521 5522 RUN(); 5523 5524 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16); 5525 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17); 5526 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18); 5527 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19); 5528 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20); 5529 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21); 5530 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22); 5531 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23); 5532 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24); 5533 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25); 5534 5535 TEARDOWN(); 5536 } 5537 5538 5539 TEST(neon_st1_q_postindex) { 5540 SETUP(); 5541 5542 uint8_t src[64 + 160]; 5543 for (unsigned i = 0; i < sizeof(src); i++) { 5544 src[i] = i; 5545 } 5546 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5547 5548 START(); 5549 __ Mov(x17, src_base); 5550 __ Mov(x18, -16); 5551 __ Mov(x19, -32); 5552 __ Mov(x20, -48); 5553 __ Mov(x21, -64); 5554 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5555 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5556 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 5557 __ Ldr(q3, MemOperand(x17, 16, PostIndex)); 5558 5559 __ St1(v0.V16B(), MemOperand(x17, 16, PostIndex)); 5560 __ Ldr(q16, MemOperand(x17, x18)); 5561 5562 __ St1(v0.V8H(), v1.V8H(), MemOperand(x17, 32, PostIndex)); 5563 __ Ldr(q17, MemOperand(x17, x19)); 5564 __ Ldr(q18, MemOperand(x17, x18)); 5565 5566 __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17, 48, PostIndex)); 5567 __ Ldr(q19, MemOperand(x17, x20)); 5568 __ Ldr(q20, MemOperand(x17, x19)); 5569 __ Ldr(q21, MemOperand(x17, x18)); 5570 5571 __ St1(v0.V2D(), 5572 v1.V2D(), 5573 v2.V2D(), 5574 v3.V2D(), 5575 MemOperand(x17, 64, PostIndex)); 5576 __ Ldr(q22, MemOperand(x17, x21)); 5577 __ Ldr(q23, MemOperand(x17, x20)); 5578 __ Ldr(q24, MemOperand(x17, x19)); 5579 __ Ldr(q25, MemOperand(x17, x18)); 5580 5581 END(); 5582 5583 RUN(); 5584 5585 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16); 5586 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17); 5587 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18); 5588 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19); 5589 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20); 5590 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21); 5591 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22); 5592 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23); 5593 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24); 5594 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25); 5595 5596 TEARDOWN(); 5597 } 5598 5599 5600 TEST(neon_st2_d) { 5601 SETUP(); 5602 5603 uint8_t src[4 * 16]; 5604 for (unsigned i = 0; i < sizeof(src); i++) { 5605 src[i] = i; 5606 } 5607 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5608 5609 START(); 5610 __ Mov(x17, src_base); 5611 __ Mov(x18, src_base); 5612 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5613 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5614 5615 __ St2(v0.V8B(), v1.V8B(), MemOperand(x18)); 5616 __ Add(x18, x18, 22); 5617 __ St2(v0.V4H(), v1.V4H(), MemOperand(x18)); 5618 __ Add(x18, x18, 11); 5619 __ St2(v0.V2S(), v1.V2S(), MemOperand(x18)); 5620 5621 __ Mov(x19, src_base); 5622 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 5623 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 5624 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 5625 __ Ldr(q3, MemOperand(x19, 16, PostIndex)); 5626 5627 END(); 5628 5629 RUN(); 5630 5631 ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q0); 5632 ASSERT_EQUAL_128(0x0504131203021110, 0x0100151413121110, q1); 5633 ASSERT_EQUAL_128(0x1615140706050413, 0x1211100302010014, q2); 5634 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323117, q3); 5635 5636 TEARDOWN(); 5637 } 5638 5639 5640 TEST(neon_st2_d_postindex) { 5641 SETUP(); 5642 5643 uint8_t src[4 * 16]; 5644 for (unsigned i = 0; i < sizeof(src); i++) { 5645 src[i] = i; 5646 } 5647 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5648 5649 START(); 5650 __ Mov(x22, 5); 5651 __ Mov(x17, src_base); 5652 __ Mov(x18, src_base); 5653 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5654 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5655 5656 __ St2(v0.V8B(), v1.V8B(), MemOperand(x18, x22, PostIndex)); 5657 __ St2(v0.V4H(), v1.V4H(), MemOperand(x18, 16, PostIndex)); 5658 __ St2(v0.V2S(), v1.V2S(), MemOperand(x18)); 5659 5660 5661 __ Mov(x19, src_base); 5662 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 5663 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 5664 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 5665 5666 END(); 5667 5668 RUN(); 5669 5670 ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0); 5671 ASSERT_EQUAL_128(0x0605041312111003, 0x0201001716070615, q1); 5672 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726251716151407, q2); 5673 5674 TEARDOWN(); 5675 } 5676 5677 5678 TEST(neon_st2_q) { 5679 SETUP(); 5680 5681 uint8_t src[5 * 16]; 5682 for (unsigned i = 0; i < sizeof(src); i++) { 5683 src[i] = i; 5684 } 5685 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5686 5687 START(); 5688 __ Mov(x17, src_base); 5689 __ Mov(x18, src_base); 5690 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5691 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5692 5693 __ St2(v0.V16B(), v1.V16B(), MemOperand(x18)); 5694 __ Add(x18, x18, 8); 5695 __ St2(v0.V8H(), v1.V8H(), MemOperand(x18)); 5696 __ Add(x18, x18, 22); 5697 __ St2(v0.V4S(), v1.V4S(), MemOperand(x18)); 5698 __ Add(x18, x18, 2); 5699 __ St2(v0.V2D(), v1.V2D(), MemOperand(x18)); 5700 5701 __ Mov(x19, src_base); 5702 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 5703 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 5704 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 5705 __ Ldr(q3, MemOperand(x19, 16, PostIndex)); 5706 5707 END(); 5708 5709 RUN(); 5710 5711 ASSERT_EQUAL_128(0x1312030211100100, 0x1303120211011000, q0); 5712 ASSERT_EQUAL_128(0x01000b0a19180908, 0x1716070615140504, q1); 5713 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q2); 5714 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0f0e0d0c0b0a0908, q3); 5715 TEARDOWN(); 5716 } 5717 5718 5719 TEST(neon_st2_q_postindex) { 5720 SETUP(); 5721 5722 uint8_t src[5 * 16]; 5723 for (unsigned i = 0; i < sizeof(src); i++) { 5724 src[i] = i; 5725 } 5726 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5727 5728 START(); 5729 __ Mov(x22, 5); 5730 __ Mov(x17, src_base); 5731 __ Mov(x18, src_base); 5732 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5733 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5734 5735 __ St2(v0.V16B(), v1.V16B(), MemOperand(x18, x22, PostIndex)); 5736 __ St2(v0.V8H(), v1.V8H(), MemOperand(x18, 32, PostIndex)); 5737 __ St2(v0.V4S(), v1.V4S(), MemOperand(x18, x22, PostIndex)); 5738 __ St2(v0.V2D(), v1.V2D(), MemOperand(x18)); 5739 5740 __ Mov(x19, src_base); 5741 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 5742 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 5743 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 5744 __ Ldr(q3, MemOperand(x19, 16, PostIndex)); 5745 __ Ldr(q4, MemOperand(x19, 16, PostIndex)); 5746 5747 END(); 5748 5749 RUN(); 5750 5751 ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0); 5752 ASSERT_EQUAL_128(0x1c0d0c1b1a0b0a19, 0x1809081716070615, q1); 5753 ASSERT_EQUAL_128(0x0504030201001003, 0x0201001f1e0f0e1d, q2); 5754 ASSERT_EQUAL_128(0x0d0c0b0a09081716, 0x1514131211100706, q3); 5755 ASSERT_EQUAL_128(0x4f4e4d4c4b4a1f1e, 0x1d1c1b1a19180f0e, q4); 5756 5757 TEARDOWN(); 5758 } 5759 5760 5761 TEST(neon_st3_d) { 5762 SETUP(); 5763 5764 uint8_t src[3 * 16]; 5765 for (unsigned i = 0; i < sizeof(src); i++) { 5766 src[i] = i; 5767 } 5768 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5769 5770 START(); 5771 __ Mov(x17, src_base); 5772 __ Mov(x18, src_base); 5773 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5774 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5775 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 5776 5777 __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18)); 5778 __ Add(x18, x18, 3); 5779 __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18)); 5780 __ Add(x18, x18, 2); 5781 __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18)); 5782 5783 5784 __ Mov(x19, src_base); 5785 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 5786 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 5787 5788 END(); 5789 5790 RUN(); 5791 5792 ASSERT_EQUAL_128(0x2221201312111003, 0x0201000100201000, q0); 5793 ASSERT_EQUAL_128(0x1f1e1d2726252417, 0x1615140706050423, q1); 5794 5795 TEARDOWN(); 5796 } 5797 5798 5799 TEST(neon_st3_d_postindex) { 5800 SETUP(); 5801 5802 uint8_t src[4 * 16]; 5803 for (unsigned i = 0; i < sizeof(src); i++) { 5804 src[i] = i; 5805 } 5806 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5807 5808 START(); 5809 __ Mov(x22, 5); 5810 __ Mov(x17, src_base); 5811 __ Mov(x18, src_base); 5812 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5813 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5814 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 5815 5816 __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18, x22, PostIndex)); 5817 __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18, 24, PostIndex)); 5818 __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18)); 5819 5820 5821 __ Mov(x19, src_base); 5822 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 5823 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 5824 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 5825 __ Ldr(q3, MemOperand(x19, 16, PostIndex)); 5826 5827 END(); 5828 5829 RUN(); 5830 5831 ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0); 5832 ASSERT_EQUAL_128(0x0201002726171607, 0x0625241514050423, q1); 5833 ASSERT_EQUAL_128(0x1615140706050423, 0x2221201312111003, q2); 5834 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736352726252417, q3); 5835 5836 TEARDOWN(); 5837 } 5838 5839 5840 TEST(neon_st3_q) { 5841 SETUP(); 5842 5843 uint8_t src[6 * 16]; 5844 for (unsigned i = 0; i < sizeof(src); i++) { 5845 src[i] = i; 5846 } 5847 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5848 5849 START(); 5850 __ Mov(x17, src_base); 5851 __ Mov(x18, src_base); 5852 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5853 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5854 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 5855 5856 __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18)); 5857 __ Add(x18, x18, 5); 5858 __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18)); 5859 __ Add(x18, x18, 12); 5860 __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18)); 5861 __ Add(x18, x18, 22); 5862 __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18)); 5863 5864 __ Mov(x19, src_base); 5865 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 5866 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 5867 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 5868 __ Ldr(q3, MemOperand(x19, 16, PostIndex)); 5869 __ Ldr(q4, MemOperand(x19, 16, PostIndex)); 5870 __ Ldr(q5, MemOperand(x19, 16, PostIndex)); 5871 5872 END(); 5873 5874 RUN(); 5875 5876 ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0); 5877 ASSERT_EQUAL_128(0x0605042322212013, 0x1211100302010023, q1); 5878 ASSERT_EQUAL_128(0x1007060504030201, 0x0025241716151407, q2); 5879 ASSERT_EQUAL_128(0x0827262524232221, 0x2017161514131211, q3); 5880 ASSERT_EQUAL_128(0x281f1e1d1c1b1a19, 0x180f0e0d0c0b0a09, q4); 5881 ASSERT_EQUAL_128(0x5f5e5d5c5b5a5958, 0x572f2e2d2c2b2a29, q5); 5882 5883 TEARDOWN(); 5884 } 5885 5886 5887 TEST(neon_st3_q_postindex) { 5888 SETUP(); 5889 5890 uint8_t src[7 * 16]; 5891 for (unsigned i = 0; i < sizeof(src); i++) { 5892 src[i] = i; 5893 } 5894 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5895 5896 START(); 5897 __ Mov(x22, 5); 5898 __ Mov(x17, src_base); 5899 __ Mov(x18, src_base); 5900 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5901 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5902 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 5903 5904 __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18, x22, PostIndex)); 5905 __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18, 48, PostIndex)); 5906 __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18, x22, PostIndex)); 5907 __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18)); 5908 5909 __ Mov(x19, src_base); 5910 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 5911 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 5912 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 5913 __ Ldr(q3, MemOperand(x19, 16, PostIndex)); 5914 __ Ldr(q4, MemOperand(x19, 16, PostIndex)); 5915 __ Ldr(q5, MemOperand(x19, 16, PostIndex)); 5916 __ Ldr(q6, MemOperand(x19, 16, PostIndex)); 5917 5918 END(); 5919 5920 RUN(); 5921 5922 ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0); 5923 ASSERT_EQUAL_128(0x1809082726171607, 0x0625241514050423, q1); 5924 ASSERT_EQUAL_128(0x0e2d2c1d1c0d0c2b, 0x2a1b1a0b0a292819, q2); 5925 ASSERT_EQUAL_128(0x0504030201001003, 0x0201002f2e1f1e0f, q3); 5926 ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q4); 5927 ASSERT_EQUAL_128(0x1d1c1b1a19180f0e, 0x0d0c0b0a09082726, q5); 5928 ASSERT_EQUAL_128(0x6f6e6d6c6b6a2f2e, 0x2d2c2b2a29281f1e, q6); 5929 5930 TEARDOWN(); 5931 } 5932 5933 5934 TEST(neon_st4_d) { 5935 SETUP(); 5936 5937 uint8_t src[4 * 16]; 5938 for (unsigned i = 0; i < sizeof(src); i++) { 5939 src[i] = i; 5940 } 5941 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5942 5943 START(); 5944 __ Mov(x17, src_base); 5945 __ Mov(x18, src_base); 5946 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5947 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5948 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 5949 __ Ldr(q3, MemOperand(x17, 16, PostIndex)); 5950 5951 __ St4(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x18)); 5952 __ Add(x18, x18, 12); 5953 __ St4(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(), MemOperand(x18)); 5954 __ Add(x18, x18, 15); 5955 __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18)); 5956 5957 5958 __ Mov(x19, src_base); 5959 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 5960 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 5961 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 5962 __ Ldr(q3, MemOperand(x19, 16, PostIndex)); 5963 5964 END(); 5965 5966 RUN(); 5967 5968 ASSERT_EQUAL_128(0x1110010032221202, 0X3121110130201000, q0); 5969 ASSERT_EQUAL_128(0x1003020100322322, 0X1312030231302120, q1); 5970 ASSERT_EQUAL_128(0x1407060504333231, 0X3023222120131211, q2); 5971 ASSERT_EQUAL_128(0x3f3e3d3c3b373635, 0x3427262524171615, q3); 5972 5973 TEARDOWN(); 5974 } 5975 5976 5977 TEST(neon_st4_d_postindex) { 5978 SETUP(); 5979 5980 uint8_t src[5 * 16]; 5981 for (unsigned i = 0; i < sizeof(src); i++) { 5982 src[i] = i; 5983 } 5984 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 5985 5986 START(); 5987 __ Mov(x22, 5); 5988 __ Mov(x17, src_base); 5989 __ Mov(x18, src_base); 5990 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 5991 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 5992 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 5993 __ Ldr(q3, MemOperand(x17, 16, PostIndex)); 5994 5995 __ St4(v0.V8B(), 5996 v1.V8B(), 5997 v2.V8B(), 5998 v3.V8B(), 5999 MemOperand(x18, x22, PostIndex)); 6000 __ St4(v0.V4H(), 6001 v1.V4H(), 6002 v2.V4H(), 6003 v3.V4H(), 6004 MemOperand(x18, 32, PostIndex)); 6005 __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18)); 6006 6007 6008 __ Mov(x19, src_base); 6009 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 6010 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 6011 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 6012 __ Ldr(q3, MemOperand(x19, 16, PostIndex)); 6013 __ Ldr(q4, MemOperand(x19, 16, PostIndex)); 6014 6015 END(); 6016 6017 RUN(); 6018 6019 ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0); 6020 ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1); 6021 ASSERT_EQUAL_128(0x2221201312111003, 0x0201003736272617, q2); 6022 ASSERT_EQUAL_128(0x2625241716151407, 0x0605043332313023, q3); 6023 ASSERT_EQUAL_128(0x4f4e4d4c4b4a4948, 0x4746453736353427, q4); 6024 6025 TEARDOWN(); 6026 } 6027 6028 6029 TEST(neon_st4_q) { 6030 SETUP(); 6031 6032 uint8_t src[7 * 16]; 6033 for (unsigned i = 0; i < sizeof(src); i++) { 6034 src[i] = i; 6035 } 6036 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6037 6038 START(); 6039 __ Mov(x17, src_base); 6040 __ Mov(x18, src_base); 6041 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 6042 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 6043 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 6044 __ Ldr(q3, MemOperand(x17, 16, PostIndex)); 6045 6046 __ St4(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), MemOperand(x18)); 6047 __ Add(x18, x18, 5); 6048 __ St4(v0.V8H(), v1.V8H(), v2.V8H(), v3.V8H(), MemOperand(x18)); 6049 __ Add(x18, x18, 12); 6050 __ St4(v0.V4S(), v1.V4S(), v2.V4S(), v3.V4S(), MemOperand(x18)); 6051 __ Add(x18, x18, 22); 6052 __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18)); 6053 __ Add(x18, x18, 10); 6054 6055 __ Mov(x19, src_base); 6056 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 6057 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 6058 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 6059 __ Ldr(q3, MemOperand(x19, 16, PostIndex)); 6060 __ Ldr(q4, MemOperand(x19, 16, PostIndex)); 6061 __ Ldr(q5, MemOperand(x19, 16, PostIndex)); 6062 __ Ldr(q6, MemOperand(x19, 16, PostIndex)); 6063 6064 END(); 6065 6066 RUN(); 6067 6068 ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0); 6069 ASSERT_EQUAL_128(0x3231302322212013, 0x1211100302010013, q1); 6070 ASSERT_EQUAL_128(0x1007060504030201, 0x0015140706050433, q2); 6071 ASSERT_EQUAL_128(0x3027262524232221, 0x2017161514131211, q3); 6072 ASSERT_EQUAL_128(0x180f0e0d0c0b0a09, 0x0837363534333231, q4); 6073 ASSERT_EQUAL_128(0x382f2e2d2c2b2a29, 0x281f1e1d1c1b1a19, q5); 6074 ASSERT_EQUAL_128(0x6f6e6d6c6b6a6968, 0x673f3e3d3c3b3a39, q6); 6075 6076 TEARDOWN(); 6077 } 6078 6079 6080 TEST(neon_st4_q_postindex) { 6081 SETUP(); 6082 6083 uint8_t src[9 * 16]; 6084 for (unsigned i = 0; i < sizeof(src); i++) { 6085 src[i] = i; 6086 } 6087 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6088 6089 START(); 6090 __ Mov(x22, 5); 6091 __ Mov(x17, src_base); 6092 __ Mov(x18, src_base); 6093 __ Ldr(q0, MemOperand(x17, 16, PostIndex)); 6094 __ Ldr(q1, MemOperand(x17, 16, PostIndex)); 6095 __ Ldr(q2, MemOperand(x17, 16, PostIndex)); 6096 __ Ldr(q3, MemOperand(x17, 16, PostIndex)); 6097 6098 __ St4(v0.V16B(), 6099 v1.V16B(), 6100 v2.V16B(), 6101 v3.V16B(), 6102 MemOperand(x18, x22, PostIndex)); 6103 __ St4(v0.V8H(), 6104 v1.V8H(), 6105 v2.V8H(), 6106 v3.V8H(), 6107 MemOperand(x18, 64, PostIndex)); 6108 __ St4(v0.V4S(), 6109 v1.V4S(), 6110 v2.V4S(), 6111 v3.V4S(), 6112 MemOperand(x18, x22, PostIndex)); 6113 __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18)); 6114 6115 __ Mov(x19, src_base); 6116 __ Ldr(q0, MemOperand(x19, 16, PostIndex)); 6117 __ Ldr(q1, MemOperand(x19, 16, PostIndex)); 6118 __ Ldr(q2, MemOperand(x19, 16, PostIndex)); 6119 __ Ldr(q3, MemOperand(x19, 16, PostIndex)); 6120 __ Ldr(q4, MemOperand(x19, 16, PostIndex)); 6121 __ Ldr(q5, MemOperand(x19, 16, PostIndex)); 6122 __ Ldr(q6, MemOperand(x19, 16, PostIndex)); 6123 __ Ldr(q7, MemOperand(x19, 16, PostIndex)); 6124 __ Ldr(q8, MemOperand(x19, 16, PostIndex)); 6125 6126 END(); 6127 6128 RUN(); 6129 6130 ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0); 6131 ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1); 6132 ASSERT_EQUAL_128(0x1a0b0a3938292819, 0x1809083736272617, q2); 6133 ASSERT_EQUAL_128(0x1e0f0e3d3c2d2c1d, 0x1c0d0c3b3a2b2a1b, q3); 6134 ASSERT_EQUAL_128(0x0504030201001003, 0x0201003f3e2f2e1f, q4); 6135 ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q5); 6136 ASSERT_EQUAL_128(0x0d0c0b0a09083736, 0x3534333231302726, q6); 6137 ASSERT_EQUAL_128(0x2d2c2b2a29281f1e, 0x1d1c1b1a19180f0e, q7); 6138 ASSERT_EQUAL_128(0x8f8e8d8c8b8a3f3e, 0x3d3c3b3a39382f2e, q8); 6139 6140 TEARDOWN(); 6141 } 6142 6143 6144 TEST(neon_destructive_minmaxp) { 6145 SETUP(); 6146 6147 START(); 6148 __ Movi(v0.V2D(), 0, 0x2222222233333333); 6149 __ Movi(v1.V2D(), 0, 0x0000000011111111); 6150 6151 __ Sminp(v16.V2S(), v0.V2S(), v1.V2S()); 6152 __ Mov(v17, v0); 6153 __ Sminp(v17.V2S(), v17.V2S(), v1.V2S()); 6154 __ Mov(v18, v1); 6155 __ Sminp(v18.V2S(), v0.V2S(), v18.V2S()); 6156 __ Mov(v19, v0); 6157 __ Sminp(v19.V2S(), v19.V2S(), v19.V2S()); 6158 6159 __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S()); 6160 __ Mov(v21, v0); 6161 __ Smaxp(v21.V2S(), v21.V2S(), v1.V2S()); 6162 __ Mov(v22, v1); 6163 __ Smaxp(v22.V2S(), v0.V2S(), v22.V2S()); 6164 __ Mov(v23, v0); 6165 __ Smaxp(v23.V2S(), v23.V2S(), v23.V2S()); 6166 6167 __ Uminp(v24.V2S(), v0.V2S(), v1.V2S()); 6168 __ Mov(v25, v0); 6169 __ Uminp(v25.V2S(), v25.V2S(), v1.V2S()); 6170 __ Mov(v26, v1); 6171 __ Uminp(v26.V2S(), v0.V2S(), v26.V2S()); 6172 __ Mov(v27, v0); 6173 __ Uminp(v27.V2S(), v27.V2S(), v27.V2S()); 6174 6175 __ Umaxp(v28.V2S(), v0.V2S(), v1.V2S()); 6176 __ Mov(v29, v0); 6177 __ Umaxp(v29.V2S(), v29.V2S(), v1.V2S()); 6178 __ Mov(v30, v1); 6179 __ Umaxp(v30.V2S(), v0.V2S(), v30.V2S()); 6180 __ Mov(v31, v0); 6181 __ Umaxp(v31.V2S(), v31.V2S(), v31.V2S()); 6182 END(); 6183 6184 RUN(); 6185 6186 ASSERT_EQUAL_128(0, 0x0000000022222222, q16); 6187 ASSERT_EQUAL_128(0, 0x0000000022222222, q17); 6188 ASSERT_EQUAL_128(0, 0x0000000022222222, q18); 6189 ASSERT_EQUAL_128(0, 0x2222222222222222, q19); 6190 6191 ASSERT_EQUAL_128(0, 0x1111111133333333, q20); 6192 ASSERT_EQUAL_128(0, 0x1111111133333333, q21); 6193 ASSERT_EQUAL_128(0, 0x1111111133333333, q22); 6194 ASSERT_EQUAL_128(0, 0x3333333333333333, q23); 6195 6196 ASSERT_EQUAL_128(0, 0x0000000022222222, q24); 6197 ASSERT_EQUAL_128(0, 0x0000000022222222, q25); 6198 ASSERT_EQUAL_128(0, 0x0000000022222222, q26); 6199 ASSERT_EQUAL_128(0, 0x2222222222222222, q27); 6200 6201 ASSERT_EQUAL_128(0, 0x1111111133333333, q28); 6202 ASSERT_EQUAL_128(0, 0x1111111133333333, q29); 6203 ASSERT_EQUAL_128(0, 0x1111111133333333, q30); 6204 ASSERT_EQUAL_128(0, 0x3333333333333333, q31); 6205 6206 TEARDOWN(); 6207 } 6208 6209 6210 TEST(neon_destructive_tbl) { 6211 SETUP(); 6212 6213 START(); 6214 __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f); 6215 __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0); 6216 __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0); 6217 __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0); 6218 __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0); 6219 6220 __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555); 6221 __ Tbl(v16.V16B(), v1.V16B(), v0.V16B()); 6222 __ Mov(v17, v0); 6223 __ Tbl(v17.V16B(), v1.V16B(), v17.V16B()); 6224 __ Mov(v18, v1); 6225 __ Tbl(v18.V16B(), v18.V16B(), v0.V16B()); 6226 __ Mov(v19, v0); 6227 __ Tbl(v19.V16B(), v19.V16B(), v19.V16B()); 6228 6229 __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555); 6230 __ Tbl(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B()); 6231 __ Mov(v21, v0); 6232 __ Tbl(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B()); 6233 __ Mov(v22, v1); 6234 __ Mov(v23, v2); 6235 __ Mov(v24, v3); 6236 __ Mov(v25, v4); 6237 __ Tbl(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B()); 6238 __ Mov(v26, v0); 6239 __ Mov(v27, v1); 6240 __ Mov(v28, v2); 6241 __ Mov(v29, v3); 6242 __ Tbl(v26.V16B(), 6243 v26.V16B(), 6244 v27.V16B(), 6245 v28.V16B(), 6246 v29.V16B(), 6247 v26.V16B()); 6248 END(); 6249 6250 RUN(); 6251 6252 ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q16); 6253 ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q17); 6254 ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q18); 6255 ASSERT_EQUAL_128(0x0f00000000000000, 0x0000000000424100, q19); 6256 6257 ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q20); 6258 ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q21); 6259 ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q22); 6260 ASSERT_EQUAL_128(0x0f000000c4c5c6b7, 0xb8b9aaabac424100, q26); 6261 6262 TEARDOWN(); 6263 } 6264 6265 6266 TEST(neon_destructive_tbx) { 6267 SETUP(); 6268 6269 START(); 6270 __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f); 6271 __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0); 6272 __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0); 6273 __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0); 6274 __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0); 6275 6276 __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555); 6277 __ Tbx(v16.V16B(), v1.V16B(), v0.V16B()); 6278 __ Mov(v17, v0); 6279 __ Tbx(v17.V16B(), v1.V16B(), v17.V16B()); 6280 __ Mov(v18, v1); 6281 __ Tbx(v18.V16B(), v18.V16B(), v0.V16B()); 6282 __ Mov(v19, v0); 6283 __ Tbx(v19.V16B(), v19.V16B(), v19.V16B()); 6284 6285 __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555); 6286 __ Tbx(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B()); 6287 __ Mov(v21, v0); 6288 __ Tbx(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B()); 6289 __ Mov(v22, v1); 6290 __ Mov(v23, v2); 6291 __ Mov(v24, v3); 6292 __ Mov(v25, v4); 6293 __ Tbx(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B()); 6294 __ Mov(v26, v0); 6295 __ Mov(v27, v1); 6296 __ Mov(v28, v2); 6297 __ Mov(v29, v3); 6298 __ Tbx(v26.V16B(), 6299 v26.V16B(), 6300 v27.V16B(), 6301 v28.V16B(), 6302 v29.V16B(), 6303 v26.V16B()); 6304 END(); 6305 6306 RUN(); 6307 6308 ASSERT_EQUAL_128(0xa055555555555555, 0x5555555555adaeaf, q16); 6309 ASSERT_EQUAL_128(0xa041424334353627, 0x28291a1b1cadaeaf, q17); 6310 ASSERT_EQUAL_128(0xa0aeadacabaaa9a8, 0xa7a6a5a4a3adaeaf, q18); 6311 ASSERT_EQUAL_128(0x0f41424334353627, 0x28291a1b1c424100, q19); 6312 6313 ASSERT_EQUAL_128(0xa0555555d4d5d6c7, 0xc8c9babbbcadaeaf, q20); 6314 ASSERT_EQUAL_128(0xa0414243d4d5d6c7, 0xc8c9babbbcadaeaf, q21); 6315 ASSERT_EQUAL_128(0xa0aeadacd4d5d6c7, 0xc8c9babbbcadaeaf, q22); 6316 ASSERT_EQUAL_128(0x0f414243c4c5c6b7, 0xb8b9aaabac424100, q26); 6317 6318 TEARDOWN(); 6319 } 6320 6321 6322 TEST(neon_destructive_fcvtl) { 6323 SETUP(); 6324 6325 START(); 6326 __ Movi(v0.V2D(), 0x400000003f800000, 0xbf800000c0000000); 6327 __ Fcvtl(v16.V2D(), v0.V2S()); 6328 __ Fcvtl2(v17.V2D(), v0.V4S()); 6329 __ Mov(v18, v0); 6330 __ Mov(v19, v0); 6331 __ Fcvtl(v18.V2D(), v18.V2S()); 6332 __ Fcvtl2(v19.V2D(), v19.V4S()); 6333 6334 __ Movi(v1.V2D(), 0x40003c003c004000, 0xc000bc00bc00c000); 6335 __ Fcvtl(v20.V4S(), v1.V4H()); 6336 __ Fcvtl2(v21.V4S(), v1.V8H()); 6337 __ Mov(v22, v1); 6338 __ Mov(v23, v1); 6339 __ Fcvtl(v22.V4S(), v22.V4H()); 6340 __ Fcvtl2(v23.V4S(), v23.V8H()); 6341 6342 END(); 6343 6344 RUN(); 6345 6346 ASSERT_EQUAL_128(0xbff0000000000000, 0xc000000000000000, q16); 6347 ASSERT_EQUAL_128(0x4000000000000000, 0x3ff0000000000000, q17); 6348 ASSERT_EQUAL_128(0xbff0000000000000, 0xc000000000000000, q18); 6349 ASSERT_EQUAL_128(0x4000000000000000, 0x3ff0000000000000, q19); 6350 6351 ASSERT_EQUAL_128(0xc0000000bf800000, 0xbf800000c0000000, q20); 6352 ASSERT_EQUAL_128(0x400000003f800000, 0x3f80000040000000, q21); 6353 ASSERT_EQUAL_128(0xc0000000bf800000, 0xbf800000c0000000, q22); 6354 ASSERT_EQUAL_128(0x400000003f800000, 0x3f80000040000000, q23); 6355 6356 TEARDOWN(); 6357 } 6358 6359 6360 TEST(ldp_stp_float) { 6361 SETUP(); 6362 6363 float src[2] = {1.0, 2.0}; 6364 float dst[3] = {0.0, 0.0, 0.0}; 6365 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6366 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6367 6368 START(); 6369 __ Mov(x16, src_base); 6370 __ Mov(x17, dst_base); 6371 __ Ldp(s31, s0, MemOperand(x16, 2 * sizeof(src[0]), PostIndex)); 6372 __ Stp(s0, s31, MemOperand(x17, sizeof(dst[1]), PreIndex)); 6373 END(); 6374 6375 RUN(); 6376 6377 ASSERT_EQUAL_FP32(1.0, s31); 6378 ASSERT_EQUAL_FP32(2.0, s0); 6379 ASSERT_EQUAL_FP32(0.0, dst[0]); 6380 ASSERT_EQUAL_FP32(2.0, dst[1]); 6381 ASSERT_EQUAL_FP32(1.0, dst[2]); 6382 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x16); 6383 ASSERT_EQUAL_64(dst_base + sizeof(dst[1]), x17); 6384 6385 TEARDOWN(); 6386 } 6387 6388 6389 TEST(ldp_stp_double) { 6390 SETUP(); 6391 6392 double src[2] = {1.0, 2.0}; 6393 double dst[3] = {0.0, 0.0, 0.0}; 6394 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6395 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6396 6397 START(); 6398 __ Mov(x16, src_base); 6399 __ Mov(x17, dst_base); 6400 __ Ldp(d31, d0, MemOperand(x16, 2 * sizeof(src[0]), PostIndex)); 6401 __ Stp(d0, d31, MemOperand(x17, sizeof(dst[1]), PreIndex)); 6402 END(); 6403 6404 RUN(); 6405 6406 ASSERT_EQUAL_FP64(1.0, d31); 6407 ASSERT_EQUAL_FP64(2.0, d0); 6408 ASSERT_EQUAL_FP64(0.0, dst[0]); 6409 ASSERT_EQUAL_FP64(2.0, dst[1]); 6410 ASSERT_EQUAL_FP64(1.0, dst[2]); 6411 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x16); 6412 ASSERT_EQUAL_64(dst_base + sizeof(dst[1]), x17); 6413 6414 TEARDOWN(); 6415 } 6416 6417 6418 TEST(ldp_stp_quad) { 6419 SETUP(); 6420 6421 uint64_t src[4] = {0x0123456789abcdef, 6422 0xaaaaaaaa55555555, 6423 0xfedcba9876543210, 6424 0x55555555aaaaaaaa}; 6425 uint64_t dst[6] = {0, 0, 0, 0, 0, 0}; 6426 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6427 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6428 6429 START(); 6430 __ Mov(x16, src_base); 6431 __ Mov(x17, dst_base); 6432 __ Ldp(q31, q0, MemOperand(x16, 4 * sizeof(src[0]), PostIndex)); 6433 __ Stp(q0, q31, MemOperand(x17, 2 * sizeof(dst[1]), PreIndex)); 6434 END(); 6435 6436 RUN(); 6437 6438 ASSERT_EQUAL_128(0xaaaaaaaa55555555, 0x0123456789abcdef, q31); 6439 ASSERT_EQUAL_128(0x55555555aaaaaaaa, 0xfedcba9876543210, q0); 6440 ASSERT_EQUAL_64(0, dst[0]); 6441 ASSERT_EQUAL_64(0, dst[1]); 6442 ASSERT_EQUAL_64(0xfedcba9876543210, dst[2]); 6443 ASSERT_EQUAL_64(0x55555555aaaaaaaa, dst[3]); 6444 ASSERT_EQUAL_64(0x0123456789abcdef, dst[4]); 6445 ASSERT_EQUAL_64(0xaaaaaaaa55555555, dst[5]); 6446 ASSERT_EQUAL_64(src_base + 4 * sizeof(src[0]), x16); 6447 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[1]), x17); 6448 6449 TEARDOWN(); 6450 } 6451 6452 6453 TEST(ldp_stp_offset) { 6454 SETUP(); 6455 6456 uint64_t src[3] = {0x0011223344556677, 6457 0x8899aabbccddeeff, 6458 0xffeeddccbbaa9988}; 6459 uint64_t dst[7] = {0, 0, 0, 0, 0, 0, 0}; 6460 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6461 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6462 6463 START(); 6464 __ Mov(x16, src_base); 6465 __ Mov(x17, dst_base); 6466 __ Mov(x18, src_base + 24); 6467 __ Mov(x19, dst_base + 56); 6468 __ Ldp(w0, w1, MemOperand(x16)); 6469 __ Ldp(w2, w3, MemOperand(x16, 4)); 6470 __ Ldp(x4, x5, MemOperand(x16, 8)); 6471 __ Ldp(w6, w7, MemOperand(x18, -12)); 6472 __ Ldp(x8, x9, MemOperand(x18, -16)); 6473 __ Stp(w0, w1, MemOperand(x17)); 6474 __ Stp(w2, w3, MemOperand(x17, 8)); 6475 __ Stp(x4, x5, MemOperand(x17, 16)); 6476 __ Stp(w6, w7, MemOperand(x19, -24)); 6477 __ Stp(x8, x9, MemOperand(x19, -16)); 6478 END(); 6479 6480 RUN(); 6481 6482 ASSERT_EQUAL_64(0x44556677, x0); 6483 ASSERT_EQUAL_64(0x00112233, x1); 6484 ASSERT_EQUAL_64(0x0011223344556677, dst[0]); 6485 ASSERT_EQUAL_64(0x00112233, x2); 6486 ASSERT_EQUAL_64(0xccddeeff, x3); 6487 ASSERT_EQUAL_64(0xccddeeff00112233, dst[1]); 6488 ASSERT_EQUAL_64(0x8899aabbccddeeff, x4); 6489 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[2]); 6490 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5); 6491 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[3]); 6492 ASSERT_EQUAL_64(0x8899aabb, x6); 6493 ASSERT_EQUAL_64(0xbbaa9988, x7); 6494 ASSERT_EQUAL_64(0xbbaa99888899aabb, dst[4]); 6495 ASSERT_EQUAL_64(0x8899aabbccddeeff, x8); 6496 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[5]); 6497 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x9); 6498 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[6]); 6499 ASSERT_EQUAL_64(src_base, x16); 6500 ASSERT_EQUAL_64(dst_base, x17); 6501 ASSERT_EQUAL_64(src_base + 24, x18); 6502 ASSERT_EQUAL_64(dst_base + 56, x19); 6503 6504 TEARDOWN(); 6505 } 6506 6507 6508 TEST(ldp_stp_offset_wide) { 6509 SETUP(); 6510 6511 uint64_t src[3] = {0x0011223344556677, 6512 0x8899aabbccddeeff, 6513 0xffeeddccbbaa9988}; 6514 uint64_t dst[7] = {0, 0, 0, 0, 0, 0, 0}; 6515 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6516 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6517 // Move base too far from the array to force multiple instructions 6518 // to be emitted. 6519 const int64_t base_offset = 1024; 6520 6521 START(); 6522 __ Mov(x20, src_base - base_offset); 6523 __ Mov(x21, dst_base - base_offset); 6524 __ Mov(x18, src_base + base_offset + 24); 6525 __ Mov(x19, dst_base + base_offset + 56); 6526 __ Ldp(w0, w1, MemOperand(x20, base_offset)); 6527 __ Ldp(w2, w3, MemOperand(x20, base_offset + 4)); 6528 __ Ldp(x4, x5, MemOperand(x20, base_offset + 8)); 6529 __ Ldp(w6, w7, MemOperand(x18, -12 - base_offset)); 6530 __ Ldp(x8, x9, MemOperand(x18, -16 - base_offset)); 6531 __ Stp(w0, w1, MemOperand(x21, base_offset)); 6532 __ Stp(w2, w3, MemOperand(x21, base_offset + 8)); 6533 __ Stp(x4, x5, MemOperand(x21, base_offset + 16)); 6534 __ Stp(w6, w7, MemOperand(x19, -24 - base_offset)); 6535 __ Stp(x8, x9, MemOperand(x19, -16 - base_offset)); 6536 END(); 6537 6538 RUN(); 6539 6540 ASSERT_EQUAL_64(0x44556677, x0); 6541 ASSERT_EQUAL_64(0x00112233, x1); 6542 ASSERT_EQUAL_64(0x0011223344556677, dst[0]); 6543 ASSERT_EQUAL_64(0x00112233, x2); 6544 ASSERT_EQUAL_64(0xccddeeff, x3); 6545 ASSERT_EQUAL_64(0xccddeeff00112233, dst[1]); 6546 ASSERT_EQUAL_64(0x8899aabbccddeeff, x4); 6547 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[2]); 6548 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5); 6549 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[3]); 6550 ASSERT_EQUAL_64(0x8899aabb, x6); 6551 ASSERT_EQUAL_64(0xbbaa9988, x7); 6552 ASSERT_EQUAL_64(0xbbaa99888899aabb, dst[4]); 6553 ASSERT_EQUAL_64(0x8899aabbccddeeff, x8); 6554 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[5]); 6555 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x9); 6556 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[6]); 6557 ASSERT_EQUAL_64(src_base - base_offset, x20); 6558 ASSERT_EQUAL_64(dst_base - base_offset, x21); 6559 ASSERT_EQUAL_64(src_base + base_offset + 24, x18); 6560 ASSERT_EQUAL_64(dst_base + base_offset + 56, x19); 6561 6562 TEARDOWN(); 6563 } 6564 6565 6566 TEST(ldnp_stnp_offset) { 6567 SETUP(); 6568 6569 uint64_t src[4] = {0x0011223344556677, 6570 0x8899aabbccddeeff, 6571 0xffeeddccbbaa9988, 6572 0x7766554433221100}; 6573 uint64_t dst[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 6574 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6575 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6576 6577 START(); 6578 __ Mov(x16, src_base); 6579 __ Mov(x17, dst_base); 6580 __ Mov(x18, src_base + 24); 6581 __ Mov(x19, dst_base + 64); 6582 __ Mov(x20, src_base + 32); 6583 6584 // Ensure address set up has happened before executing non-temporal ops. 6585 __ Dmb(InnerShareable, BarrierAll); 6586 6587 __ Ldnp(w0, w1, MemOperand(x16)); 6588 __ Ldnp(w2, w3, MemOperand(x16, 4)); 6589 __ Ldnp(x4, x5, MemOperand(x16, 8)); 6590 __ Ldnp(w6, w7, MemOperand(x18, -12)); 6591 __ Ldnp(x8, x9, MemOperand(x18, -16)); 6592 __ Ldnp(q16, q17, MemOperand(x16)); 6593 __ Ldnp(q19, q18, MemOperand(x20, -32)); 6594 __ Stnp(w0, w1, MemOperand(x17)); 6595 __ Stnp(w2, w3, MemOperand(x17, 8)); 6596 __ Stnp(x4, x5, MemOperand(x17, 16)); 6597 __ Stnp(w6, w7, MemOperand(x19, -32)); 6598 __ Stnp(x8, x9, MemOperand(x19, -24)); 6599 __ Stnp(q17, q16, MemOperand(x19)); 6600 __ Stnp(q18, q19, MemOperand(x19, 32)); 6601 END(); 6602 6603 RUN(); 6604 6605 ASSERT_EQUAL_64(0x44556677, x0); 6606 ASSERT_EQUAL_64(0x00112233, x1); 6607 ASSERT_EQUAL_64(0x0011223344556677, dst[0]); 6608 ASSERT_EQUAL_64(0x00112233, x2); 6609 ASSERT_EQUAL_64(0xccddeeff, x3); 6610 ASSERT_EQUAL_64(0xccddeeff00112233, dst[1]); 6611 ASSERT_EQUAL_64(0x8899aabbccddeeff, x4); 6612 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[2]); 6613 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5); 6614 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[3]); 6615 ASSERT_EQUAL_64(0x8899aabb, x6); 6616 ASSERT_EQUAL_64(0xbbaa9988, x7); 6617 ASSERT_EQUAL_64(0xbbaa99888899aabb, dst[4]); 6618 ASSERT_EQUAL_64(0x8899aabbccddeeff, x8); 6619 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[5]); 6620 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x9); 6621 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[6]); 6622 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q16); 6623 ASSERT_EQUAL_128(0x7766554433221100, 0xffeeddccbbaa9988, q17); 6624 ASSERT_EQUAL_128(0x7766554433221100, 0xffeeddccbbaa9988, q18); 6625 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q19); 6626 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[8]); 6627 ASSERT_EQUAL_64(0x7766554433221100, dst[9]); 6628 ASSERT_EQUAL_64(0x0011223344556677, dst[10]); 6629 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[11]); 6630 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[12]); 6631 ASSERT_EQUAL_64(0x7766554433221100, dst[13]); 6632 ASSERT_EQUAL_64(0x0011223344556677, dst[14]); 6633 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[15]); 6634 ASSERT_EQUAL_64(src_base, x16); 6635 ASSERT_EQUAL_64(dst_base, x17); 6636 ASSERT_EQUAL_64(src_base + 24, x18); 6637 ASSERT_EQUAL_64(dst_base + 64, x19); 6638 ASSERT_EQUAL_64(src_base + 32, x20); 6639 6640 TEARDOWN(); 6641 } 6642 6643 6644 TEST(ldnp_stnp_offset_float) { 6645 SETUP(); 6646 6647 float src[3] = {1.2, 2.3, 3.4}; 6648 float dst[6] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; 6649 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6650 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6651 6652 START(); 6653 __ Mov(x16, src_base); 6654 __ Mov(x17, dst_base); 6655 __ Mov(x18, src_base + 12); 6656 __ Mov(x19, dst_base + 24); 6657 6658 // Ensure address set up has happened before executing non-temporal ops. 6659 __ Dmb(InnerShareable, BarrierAll); 6660 6661 __ Ldnp(s0, s1, MemOperand(x16)); 6662 __ Ldnp(s2, s3, MemOperand(x16, 4)); 6663 __ Ldnp(s5, s4, MemOperand(x18, -8)); 6664 __ Stnp(s1, s0, MemOperand(x17)); 6665 __ Stnp(s3, s2, MemOperand(x17, 8)); 6666 __ Stnp(s4, s5, MemOperand(x19, -8)); 6667 END(); 6668 6669 RUN(); 6670 6671 ASSERT_EQUAL_FP32(1.2, s0); 6672 ASSERT_EQUAL_FP32(2.3, s1); 6673 ASSERT_EQUAL_FP32(2.3, dst[0]); 6674 ASSERT_EQUAL_FP32(1.2, dst[1]); 6675 ASSERT_EQUAL_FP32(2.3, s2); 6676 ASSERT_EQUAL_FP32(3.4, s3); 6677 ASSERT_EQUAL_FP32(3.4, dst[2]); 6678 ASSERT_EQUAL_FP32(2.3, dst[3]); 6679 ASSERT_EQUAL_FP32(3.4, s4); 6680 ASSERT_EQUAL_FP32(2.3, s5); 6681 ASSERT_EQUAL_FP32(3.4, dst[4]); 6682 ASSERT_EQUAL_FP32(2.3, dst[5]); 6683 ASSERT_EQUAL_64(src_base, x16); 6684 ASSERT_EQUAL_64(dst_base, x17); 6685 ASSERT_EQUAL_64(src_base + 12, x18); 6686 ASSERT_EQUAL_64(dst_base + 24, x19); 6687 6688 TEARDOWN(); 6689 } 6690 6691 6692 TEST(ldnp_stnp_offset_double) { 6693 SETUP(); 6694 6695 double src[3] = {1.2, 2.3, 3.4}; 6696 double dst[6] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; 6697 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6698 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6699 6700 START(); 6701 __ Mov(x16, src_base); 6702 __ Mov(x17, dst_base); 6703 __ Mov(x18, src_base + 24); 6704 __ Mov(x19, dst_base + 48); 6705 6706 // Ensure address set up has happened before executing non-temporal ops. 6707 __ Dmb(InnerShareable, BarrierAll); 6708 6709 __ Ldnp(d0, d1, MemOperand(x16)); 6710 __ Ldnp(d2, d3, MemOperand(x16, 8)); 6711 __ Ldnp(d5, d4, MemOperand(x18, -16)); 6712 __ Stnp(d1, d0, MemOperand(x17)); 6713 __ Stnp(d3, d2, MemOperand(x17, 16)); 6714 __ Stnp(d4, d5, MemOperand(x19, -16)); 6715 END(); 6716 6717 RUN(); 6718 6719 ASSERT_EQUAL_FP64(1.2, d0); 6720 ASSERT_EQUAL_FP64(2.3, d1); 6721 ASSERT_EQUAL_FP64(2.3, dst[0]); 6722 ASSERT_EQUAL_FP64(1.2, dst[1]); 6723 ASSERT_EQUAL_FP64(2.3, d2); 6724 ASSERT_EQUAL_FP64(3.4, d3); 6725 ASSERT_EQUAL_FP64(3.4, dst[2]); 6726 ASSERT_EQUAL_FP64(2.3, dst[3]); 6727 ASSERT_EQUAL_FP64(3.4, d4); 6728 ASSERT_EQUAL_FP64(2.3, d5); 6729 ASSERT_EQUAL_FP64(3.4, dst[4]); 6730 ASSERT_EQUAL_FP64(2.3, dst[5]); 6731 ASSERT_EQUAL_64(src_base, x16); 6732 ASSERT_EQUAL_64(dst_base, x17); 6733 ASSERT_EQUAL_64(src_base + 24, x18); 6734 ASSERT_EQUAL_64(dst_base + 48, x19); 6735 6736 TEARDOWN(); 6737 } 6738 6739 6740 TEST(ldp_stp_preindex) { 6741 SETUP(); 6742 6743 uint64_t src[3] = {0x0011223344556677, 6744 0x8899aabbccddeeff, 6745 0xffeeddccbbaa9988}; 6746 uint64_t dst[5] = {0, 0, 0, 0, 0}; 6747 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6748 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6749 6750 START(); 6751 __ Mov(x16, src_base); 6752 __ Mov(x17, dst_base); 6753 __ Mov(x18, dst_base + 16); 6754 __ Ldp(w0, w1, MemOperand(x16, 4, PreIndex)); 6755 __ Mov(x19, x16); 6756 __ Ldp(w2, w3, MemOperand(x16, -4, PreIndex)); 6757 __ Stp(w2, w3, MemOperand(x17, 4, PreIndex)); 6758 __ Mov(x20, x17); 6759 __ Stp(w0, w1, MemOperand(x17, -4, PreIndex)); 6760 __ Ldp(x4, x5, MemOperand(x16, 8, PreIndex)); 6761 __ Mov(x21, x16); 6762 __ Ldp(x6, x7, MemOperand(x16, -8, PreIndex)); 6763 __ Stp(x7, x6, MemOperand(x18, 8, PreIndex)); 6764 __ Mov(x22, x18); 6765 __ Stp(x5, x4, MemOperand(x18, -8, PreIndex)); 6766 END(); 6767 6768 RUN(); 6769 6770 ASSERT_EQUAL_64(0x00112233, x0); 6771 ASSERT_EQUAL_64(0xccddeeff, x1); 6772 ASSERT_EQUAL_64(0x44556677, x2); 6773 ASSERT_EQUAL_64(0x00112233, x3); 6774 ASSERT_EQUAL_64(0xccddeeff00112233, dst[0]); 6775 ASSERT_EQUAL_64(0x0000000000112233, dst[1]); 6776 ASSERT_EQUAL_64(0x8899aabbccddeeff, x4); 6777 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5); 6778 ASSERT_EQUAL_64(0x0011223344556677, x6); 6779 ASSERT_EQUAL_64(0x8899aabbccddeeff, x7); 6780 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]); 6781 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]); 6782 ASSERT_EQUAL_64(0x0011223344556677, dst[4]); 6783 ASSERT_EQUAL_64(src_base, x16); 6784 ASSERT_EQUAL_64(dst_base, x17); 6785 ASSERT_EQUAL_64(dst_base + 16, x18); 6786 ASSERT_EQUAL_64(src_base + 4, x19); 6787 ASSERT_EQUAL_64(dst_base + 4, x20); 6788 ASSERT_EQUAL_64(src_base + 8, x21); 6789 ASSERT_EQUAL_64(dst_base + 24, x22); 6790 6791 TEARDOWN(); 6792 } 6793 6794 6795 TEST(ldp_stp_preindex_wide) { 6796 SETUP(); 6797 6798 uint64_t src[3] = {0x0011223344556677, 6799 0x8899aabbccddeeff, 6800 0xffeeddccbbaa9988}; 6801 uint64_t dst[5] = {0, 0, 0, 0, 0}; 6802 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6803 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6804 // Move base too far from the array to force multiple instructions 6805 // to be emitted. 6806 const int64_t base_offset = 1024; 6807 6808 START(); 6809 __ Mov(x24, src_base - base_offset); 6810 __ Mov(x25, dst_base + base_offset); 6811 __ Mov(x18, dst_base + base_offset + 16); 6812 __ Ldp(w0, w1, MemOperand(x24, base_offset + 4, PreIndex)); 6813 __ Mov(x19, x24); 6814 __ Mov(x24, src_base - base_offset + 4); 6815 __ Ldp(w2, w3, MemOperand(x24, base_offset - 4, PreIndex)); 6816 __ Stp(w2, w3, MemOperand(x25, 4 - base_offset, PreIndex)); 6817 __ Mov(x20, x25); 6818 __ Mov(x25, dst_base + base_offset + 4); 6819 __ Mov(x24, src_base - base_offset); 6820 __ Stp(w0, w1, MemOperand(x25, -4 - base_offset, PreIndex)); 6821 __ Ldp(x4, x5, MemOperand(x24, base_offset + 8, PreIndex)); 6822 __ Mov(x21, x24); 6823 __ Mov(x24, src_base - base_offset + 8); 6824 __ Ldp(x6, x7, MemOperand(x24, base_offset - 8, PreIndex)); 6825 __ Stp(x7, x6, MemOperand(x18, 8 - base_offset, PreIndex)); 6826 __ Mov(x22, x18); 6827 __ Mov(x18, dst_base + base_offset + 16 + 8); 6828 __ Stp(x5, x4, MemOperand(x18, -8 - base_offset, PreIndex)); 6829 END(); 6830 6831 RUN(); 6832 6833 ASSERT_EQUAL_64(0x00112233, x0); 6834 ASSERT_EQUAL_64(0xccddeeff, x1); 6835 ASSERT_EQUAL_64(0x44556677, x2); 6836 ASSERT_EQUAL_64(0x00112233, x3); 6837 ASSERT_EQUAL_64(0xccddeeff00112233, dst[0]); 6838 ASSERT_EQUAL_64(0x0000000000112233, dst[1]); 6839 ASSERT_EQUAL_64(0x8899aabbccddeeff, x4); 6840 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5); 6841 ASSERT_EQUAL_64(0x0011223344556677, x6); 6842 ASSERT_EQUAL_64(0x8899aabbccddeeff, x7); 6843 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]); 6844 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]); 6845 ASSERT_EQUAL_64(0x0011223344556677, dst[4]); 6846 ASSERT_EQUAL_64(src_base, x24); 6847 ASSERT_EQUAL_64(dst_base, x25); 6848 ASSERT_EQUAL_64(dst_base + 16, x18); 6849 ASSERT_EQUAL_64(src_base + 4, x19); 6850 ASSERT_EQUAL_64(dst_base + 4, x20); 6851 ASSERT_EQUAL_64(src_base + 8, x21); 6852 ASSERT_EQUAL_64(dst_base + 24, x22); 6853 6854 TEARDOWN(); 6855 } 6856 6857 6858 TEST(ldp_stp_postindex) { 6859 SETUP(); 6860 6861 uint64_t src[4] = {0x0011223344556677, 6862 0x8899aabbccddeeff, 6863 0xffeeddccbbaa9988, 6864 0x7766554433221100}; 6865 uint64_t dst[5] = {0, 0, 0, 0, 0}; 6866 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6867 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6868 6869 START(); 6870 __ Mov(x16, src_base); 6871 __ Mov(x17, dst_base); 6872 __ Mov(x18, dst_base + 16); 6873 __ Ldp(w0, w1, MemOperand(x16, 4, PostIndex)); 6874 __ Mov(x19, x16); 6875 __ Ldp(w2, w3, MemOperand(x16, -4, PostIndex)); 6876 __ Stp(w2, w3, MemOperand(x17, 4, PostIndex)); 6877 __ Mov(x20, x17); 6878 __ Stp(w0, w1, MemOperand(x17, -4, PostIndex)); 6879 __ Ldp(x4, x5, MemOperand(x16, 8, PostIndex)); 6880 __ Mov(x21, x16); 6881 __ Ldp(x6, x7, MemOperand(x16, -8, PostIndex)); 6882 __ Stp(x7, x6, MemOperand(x18, 8, PostIndex)); 6883 __ Mov(x22, x18); 6884 __ Stp(x5, x4, MemOperand(x18, -8, PostIndex)); 6885 END(); 6886 6887 RUN(); 6888 6889 ASSERT_EQUAL_64(0x44556677, x0); 6890 ASSERT_EQUAL_64(0x00112233, x1); 6891 ASSERT_EQUAL_64(0x00112233, x2); 6892 ASSERT_EQUAL_64(0xccddeeff, x3); 6893 ASSERT_EQUAL_64(0x4455667700112233, dst[0]); 6894 ASSERT_EQUAL_64(0x0000000000112233, dst[1]); 6895 ASSERT_EQUAL_64(0x0011223344556677, x4); 6896 ASSERT_EQUAL_64(0x8899aabbccddeeff, x5); 6897 ASSERT_EQUAL_64(0x8899aabbccddeeff, x6); 6898 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x7); 6899 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]); 6900 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]); 6901 ASSERT_EQUAL_64(0x0011223344556677, dst[4]); 6902 ASSERT_EQUAL_64(src_base, x16); 6903 ASSERT_EQUAL_64(dst_base, x17); 6904 ASSERT_EQUAL_64(dst_base + 16, x18); 6905 ASSERT_EQUAL_64(src_base + 4, x19); 6906 ASSERT_EQUAL_64(dst_base + 4, x20); 6907 ASSERT_EQUAL_64(src_base + 8, x21); 6908 ASSERT_EQUAL_64(dst_base + 24, x22); 6909 6910 TEARDOWN(); 6911 } 6912 6913 6914 TEST(ldp_stp_postindex_wide) { 6915 SETUP(); 6916 6917 uint64_t src[4] = {0x0011223344556677, 6918 0x8899aabbccddeeff, 6919 0xffeeddccbbaa9988, 6920 0x7766554433221100}; 6921 uint64_t dst[5] = {0, 0, 0, 0, 0}; 6922 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6923 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 6924 // Move base too far from the array to force multiple instructions 6925 // to be emitted. 6926 const int64_t base_offset = 1024; 6927 6928 START(); 6929 __ Mov(x24, src_base); 6930 __ Mov(x25, dst_base); 6931 __ Mov(x18, dst_base + 16); 6932 __ Ldp(w0, w1, MemOperand(x24, base_offset + 4, PostIndex)); 6933 __ Mov(x19, x24); 6934 __ Sub(x24, x24, base_offset); 6935 __ Ldp(w2, w3, MemOperand(x24, base_offset - 4, PostIndex)); 6936 __ Stp(w2, w3, MemOperand(x25, 4 - base_offset, PostIndex)); 6937 __ Mov(x20, x25); 6938 __ Sub(x24, x24, base_offset); 6939 __ Add(x25, x25, base_offset); 6940 __ Stp(w0, w1, MemOperand(x25, -4 - base_offset, PostIndex)); 6941 __ Ldp(x4, x5, MemOperand(x24, base_offset + 8, PostIndex)); 6942 __ Mov(x21, x24); 6943 __ Sub(x24, x24, base_offset); 6944 __ Ldp(x6, x7, MemOperand(x24, base_offset - 8, PostIndex)); 6945 __ Stp(x7, x6, MemOperand(x18, 8 - base_offset, PostIndex)); 6946 __ Mov(x22, x18); 6947 __ Add(x18, x18, base_offset); 6948 __ Stp(x5, x4, MemOperand(x18, -8 - base_offset, PostIndex)); 6949 END(); 6950 6951 RUN(); 6952 6953 ASSERT_EQUAL_64(0x44556677, x0); 6954 ASSERT_EQUAL_64(0x00112233, x1); 6955 ASSERT_EQUAL_64(0x00112233, x2); 6956 ASSERT_EQUAL_64(0xccddeeff, x3); 6957 ASSERT_EQUAL_64(0x4455667700112233, dst[0]); 6958 ASSERT_EQUAL_64(0x0000000000112233, dst[1]); 6959 ASSERT_EQUAL_64(0x0011223344556677, x4); 6960 ASSERT_EQUAL_64(0x8899aabbccddeeff, x5); 6961 ASSERT_EQUAL_64(0x8899aabbccddeeff, x6); 6962 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x7); 6963 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]); 6964 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]); 6965 ASSERT_EQUAL_64(0x0011223344556677, dst[4]); 6966 ASSERT_EQUAL_64(src_base + base_offset, x24); 6967 ASSERT_EQUAL_64(dst_base - base_offset, x25); 6968 ASSERT_EQUAL_64(dst_base - base_offset + 16, x18); 6969 ASSERT_EQUAL_64(src_base + base_offset + 4, x19); 6970 ASSERT_EQUAL_64(dst_base - base_offset + 4, x20); 6971 ASSERT_EQUAL_64(src_base + base_offset + 8, x21); 6972 ASSERT_EQUAL_64(dst_base - base_offset + 24, x22); 6973 6974 TEARDOWN(); 6975 } 6976 6977 6978 TEST(ldp_sign_extend) { 6979 SETUP(); 6980 6981 uint32_t src[2] = {0x80000000, 0x7fffffff}; 6982 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 6983 6984 START(); 6985 __ Mov(x24, src_base); 6986 __ Ldpsw(x0, x1, MemOperand(x24)); 6987 END(); 6988 6989 RUN(); 6990 6991 ASSERT_EQUAL_64(0xffffffff80000000, x0); 6992 ASSERT_EQUAL_64(0x000000007fffffff, x1); 6993 6994 TEARDOWN(); 6995 } 6996 6997 6998 TEST(ldur_stur) { 6999 SETUP(); 7000 7001 int64_t src[2] = {0x0123456789abcdef, 0x0123456789abcdef}; 7002 int64_t dst[5] = {0, 0, 0, 0, 0}; 7003 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 7004 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 7005 7006 START(); 7007 __ Mov(x17, src_base); 7008 __ Mov(x18, dst_base); 7009 __ Mov(x19, src_base + 16); 7010 __ Mov(x20, dst_base + 32); 7011 __ Mov(x21, dst_base + 40); 7012 __ Ldr(w0, MemOperand(x17, 1)); 7013 __ Str(w0, MemOperand(x18, 2)); 7014 __ Ldr(x1, MemOperand(x17, 3)); 7015 __ Str(x1, MemOperand(x18, 9)); 7016 __ Ldr(w2, MemOperand(x19, -9)); 7017 __ Str(w2, MemOperand(x20, -5)); 7018 __ Ldrb(w3, MemOperand(x19, -1)); 7019 __ Strb(w3, MemOperand(x21, -1)); 7020 END(); 7021 7022 RUN(); 7023 7024 ASSERT_EQUAL_64(0x6789abcd, x0); 7025 ASSERT_EQUAL_64(0x00006789abcd0000, dst[0]); 7026 ASSERT_EQUAL_64(0xabcdef0123456789, x1); 7027 ASSERT_EQUAL_64(0xcdef012345678900, dst[1]); 7028 ASSERT_EQUAL_64(0x000000ab, dst[2]); 7029 ASSERT_EQUAL_64(0xabcdef01, x2); 7030 ASSERT_EQUAL_64(0x00abcdef01000000, dst[3]); 7031 ASSERT_EQUAL_64(0x00000001, x3); 7032 ASSERT_EQUAL_64(0x0100000000000000, dst[4]); 7033 ASSERT_EQUAL_64(src_base, x17); 7034 ASSERT_EQUAL_64(dst_base, x18); 7035 ASSERT_EQUAL_64(src_base + 16, x19); 7036 ASSERT_EQUAL_64(dst_base + 32, x20); 7037 7038 TEARDOWN(); 7039 } 7040 7041 7042 TEST(ldur_stur_fp) { 7043 SETUP(); 7044 7045 int64_t src[3] = {0x0123456789abcdef, 0x0123456789abcdef, 0x0123456789abcdef}; 7046 int64_t dst[5] = {0, 0, 0, 0, 0}; 7047 uintptr_t src_base = reinterpret_cast<uintptr_t>(src); 7048 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst); 7049 7050 START(); 7051 __ Mov(x17, src_base); 7052 __ Mov(x18, dst_base); 7053 __ Ldr(b0, MemOperand(x17)); 7054 __ Str(b0, MemOperand(x18)); 7055 __ Ldr(h1, MemOperand(x17, 1)); 7056 __ Str(h1, MemOperand(x18, 1)); 7057 __ Ldr(s2, MemOperand(x17, 2)); 7058 __ Str(s2, MemOperand(x18, 3)); 7059 __ Ldr(d3, MemOperand(x17, 3)); 7060 __ Str(d3, MemOperand(x18, 7)); 7061 __ Ldr(q4, MemOperand(x17, 4)); 7062 __ Str(q4, MemOperand(x18, 15)); 7063 END(); 7064 7065 RUN(); 7066 7067 ASSERT_EQUAL_128(0, 0xef, q0); 7068 ASSERT_EQUAL_128(0, 0xabcd, q1); 7069 ASSERT_EQUAL_128(0, 0x456789ab, q2); 7070 ASSERT_EQUAL_128(0, 0xabcdef0123456789, q3); 7071 ASSERT_EQUAL_128(0x89abcdef01234567, 0x89abcdef01234567, q4); 7072 ASSERT_EQUAL_64(0x89456789ababcdef, dst[0]); 7073 ASSERT_EQUAL_64(0x67abcdef01234567, dst[1]); 7074 ASSERT_EQUAL_64(0x6789abcdef012345, dst[2]); 7075 ASSERT_EQUAL_64(0x0089abcdef012345, dst[3]); 7076 7077 TEARDOWN(); 7078 } 7079 7080 7081 TEST(ldr_literal) { 7082 SETUP(); 7083 7084 START(); 7085 __ Ldr(x2, 0x1234567890abcdef); 7086 __ Ldr(w3, 0xfedcba09); 7087 __ Ldrsw(x4, 0x7fffffff); 7088 __ Ldrsw(x5, 0x80000000); 7089 __ Ldr(q11, 0x1234000056780000, 0xabcd0000ef000000); 7090 __ Ldr(d13, 1.234); 7091 __ Ldr(s25, 2.5); 7092 END(); 7093 7094 RUN(); 7095 7096 ASSERT_EQUAL_64(0x1234567890abcdef, x2); 7097 ASSERT_EQUAL_64(0xfedcba09, x3); 7098 ASSERT_EQUAL_64(0x7fffffff, x4); 7099 ASSERT_EQUAL_64(0xffffffff80000000, x5); 7100 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q11); 7101 ASSERT_EQUAL_FP64(1.234, d13); 7102 ASSERT_EQUAL_FP32(2.5, s25); 7103 7104 TEARDOWN(); 7105 } 7106 7107 7108 TEST(ldr_literal_range) { 7109 SETUP(); 7110 7111 START(); 7112 // Make sure the pool is empty; 7113 masm.EmitLiteralPool(LiteralPool::kBranchRequired); 7114 ASSERT_LITERAL_POOL_SIZE(0); 7115 7116 // Create some literal pool entries. 7117 __ Ldr(x0, 0x1234567890abcdef); 7118 __ Ldr(w1, 0xfedcba09); 7119 __ Ldrsw(x2, 0x7fffffff); 7120 __ Ldrsw(x3, 0x80000000); 7121 __ Ldr(q2, 0x1234000056780000, 0xabcd0000ef000000); 7122 __ Ldr(d0, 1.234); 7123 __ Ldr(s1, 2.5); 7124 ASSERT_LITERAL_POOL_SIZE(48); 7125 7126 // Emit more code than the maximum literal load range to ensure the pool 7127 // should be emitted. 7128 const ptrdiff_t end = masm.GetCursorOffset() + 2 * kMaxLoadLiteralRange; 7129 while (masm.GetCursorOffset() < end) { 7130 __ Nop(); 7131 } 7132 7133 // The pool should have been emitted. 7134 ASSERT_LITERAL_POOL_SIZE(0); 7135 7136 // These loads should be after the pool (and will require a new one). 7137 __ Ldr(x4, 0x34567890abcdef12); 7138 __ Ldr(w5, 0xdcba09fe); 7139 __ Ldrsw(x6, 0x7fffffff); 7140 __ Ldrsw(x7, 0x80000000); 7141 __ Ldr(q6, 0x1234000056780000, 0xabcd0000ef000000); 7142 __ Ldr(d4, 123.4); 7143 __ Ldr(s5, 250.0); 7144 ASSERT_LITERAL_POOL_SIZE(48); 7145 END(); 7146 7147 RUN(); 7148 7149 // Check that the literals loaded correctly. 7150 ASSERT_EQUAL_64(0x1234567890abcdef, x0); 7151 ASSERT_EQUAL_64(0xfedcba09, x1); 7152 ASSERT_EQUAL_64(0x7fffffff, x2); 7153 ASSERT_EQUAL_64(0xffffffff80000000, x3); 7154 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q2); 7155 ASSERT_EQUAL_FP64(1.234, d0); 7156 ASSERT_EQUAL_FP32(2.5, s1); 7157 ASSERT_EQUAL_64(0x34567890abcdef12, x4); 7158 ASSERT_EQUAL_64(0xdcba09fe, x5); 7159 ASSERT_EQUAL_64(0x7fffffff, x6); 7160 ASSERT_EQUAL_64(0xffffffff80000000, x7); 7161 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q6); 7162 ASSERT_EQUAL_FP64(123.4, d4); 7163 ASSERT_EQUAL_FP32(250.0, s5); 7164 7165 TEARDOWN(); 7166 } 7167 7168 7169 TEST(ldr_literal_values_q) { 7170 SETUP(); 7171 7172 static const uint64_t kHalfValues[] = {0x8000000000000000, 7173 0x7fffffffffffffff, 7174 0x0000000000000000, 7175 0xffffffffffffffff, 7176 0x00ff00ff00ff00ff, 7177 0x1234567890abcdef}; 7178 const int card = sizeof(kHalfValues) / sizeof(kHalfValues[0]); 7179 const Register& ref_low64 = x1; 7180 const Register& ref_high64 = x2; 7181 const Register& loaded_low64 = x3; 7182 const Register& loaded_high64 = x4; 7183 const VRegister& tgt = q0; 7184 7185 START(); 7186 __ Mov(x0, 0); 7187 7188 for (int i = 0; i < card; i++) { 7189 __ Mov(ref_low64, kHalfValues[i]); 7190 for (int j = 0; j < card; j++) { 7191 __ Mov(ref_high64, kHalfValues[j]); 7192 __ Ldr(tgt, kHalfValues[j], kHalfValues[i]); 7193 __ Mov(loaded_low64, tgt.V2D(), 0); 7194 __ Mov(loaded_high64, tgt.V2D(), 1); 7195 __ Cmp(loaded_low64, ref_low64); 7196 __ Ccmp(loaded_high64, ref_high64, NoFlag, eq); 7197 __ Cset(x0, ne); 7198 } 7199 } 7200 END(); 7201 7202 RUN(); 7203 7204 // If one of the values differs, the trace can be used to identify which one. 7205 ASSERT_EQUAL_64(0, x0); 7206 7207 TEARDOWN(); 7208 } 7209 7210 7211 template <typename T> 7212 void LoadIntValueHelper(T values[], int card) { 7213 SETUP(); 7214 7215 const bool is_32bit = (sizeof(T) == 4); 7216 Register tgt1 = is_32bit ? Register(w1) : Register(x1); 7217 Register tgt2 = is_32bit ? Register(w2) : Register(x2); 7218 7219 START(); 7220 __ Mov(x0, 0); 7221 7222 // If one of the values differ then x0 will be one. 7223 for (int i = 0; i < card; ++i) { 7224 __ Mov(tgt1, values[i]); 7225 __ Ldr(tgt2, values[i]); 7226 __ Cmp(tgt1, tgt2); 7227 __ Cset(x0, ne); 7228 } 7229 END(); 7230 7231 RUN(); 7232 7233 // If one of the values differs, the trace can be used to identify which one. 7234 ASSERT_EQUAL_64(0, x0); 7235 7236 TEARDOWN(); 7237 } 7238 7239 7240 TEST(ldr_literal_values_x) { 7241 static const uint64_t kValues[] = {0x8000000000000000, 7242 0x7fffffffffffffff, 7243 0x0000000000000000, 7244 0xffffffffffffffff, 7245 0x00ff00ff00ff00ff, 7246 0x1234567890abcdef}; 7247 7248 LoadIntValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0])); 7249 } 7250 7251 7252 TEST(ldr_literal_values_w) { 7253 static const uint32_t kValues[] = {0x80000000, 7254 0x7fffffff, 7255 0x00000000, 7256 0xffffffff, 7257 0x00ff00ff, 7258 0x12345678, 7259 0x90abcdef}; 7260 7261 LoadIntValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0])); 7262 } 7263 7264 7265 template <typename T> 7266 void LoadFPValueHelper(T values[], int card) { 7267 SETUP(); 7268 7269 const bool is_32bits = (sizeof(T) == 4); 7270 const FPRegister& fp_tgt = is_32bits ? s2 : d2; 7271 const Register& tgt1 = is_32bits ? Register(w1) : Register(x1); 7272 const Register& tgt2 = is_32bits ? Register(w2) : Register(x2); 7273 7274 START(); 7275 __ Mov(x0, 0); 7276 7277 // If one of the values differ then x0 will be one. 7278 for (int i = 0; i < card; ++i) { 7279 __ Mov(tgt1, 7280 is_32bits ? FloatToRawbits(values[i]) : DoubleToRawbits(values[i])); 7281 __ Ldr(fp_tgt, values[i]); 7282 __ Fmov(tgt2, fp_tgt); 7283 __ Cmp(tgt1, tgt2); 7284 __ Cset(x0, ne); 7285 } 7286 END(); 7287 7288 RUN(); 7289 7290 // If one of the values differs, the trace can be used to identify which one. 7291 ASSERT_EQUAL_64(0, x0); 7292 7293 TEARDOWN(); 7294 } 7295 7296 TEST(ldr_literal_values_d) { 7297 static const double kValues[] = {-0.0, 0.0, -1.0, 1.0, -1e10, 1e10}; 7298 7299 LoadFPValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0])); 7300 } 7301 7302 7303 TEST(ldr_literal_values_s) { 7304 static const float kValues[] = {-0.0, 0.0, -1.0, 1.0, -1e10, 1e10}; 7305 7306 LoadFPValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0])); 7307 } 7308 7309 7310 TEST(ldr_literal_custom) { 7311 SETUP(); 7312 7313 Label end_of_pool_before; 7314 Label end_of_pool_after; 7315 7316 const size_t kSizeOfPoolInBytes = 44; 7317 7318 Literal<uint64_t> before_x(0x1234567890abcdef); 7319 Literal<uint32_t> before_w(0xfedcba09); 7320 Literal<uint32_t> before_sx(0x80000000); 7321 Literal<uint64_t> before_q(0x1234000056780000, 0xabcd0000ef000000); 7322 Literal<double> before_d(1.234); 7323 Literal<float> before_s(2.5); 7324 7325 Literal<uint64_t> after_x(0x1234567890abcdef); 7326 Literal<uint32_t> after_w(0xfedcba09); 7327 Literal<uint32_t> after_sx(0x80000000); 7328 Literal<uint64_t> after_q(0x1234000056780000, 0xabcd0000ef000000); 7329 Literal<double> after_d(1.234); 7330 Literal<float> after_s(2.5); 7331 7332 START(); 7333 7334 // Manually generate a pool. 7335 __ B(&end_of_pool_before); 7336 { 7337 ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes); 7338 __ place(&before_x); 7339 __ place(&before_w); 7340 __ place(&before_sx); 7341 __ place(&before_q); 7342 __ place(&before_d); 7343 __ place(&before_s); 7344 } 7345 __ Bind(&end_of_pool_before); 7346 7347 { 7348 ExactAssemblyScope scope(&masm, 12 * kInstructionSize); 7349 __ ldr(x2, &before_x); 7350 __ ldr(w3, &before_w); 7351 __ ldrsw(x5, &before_sx); 7352 __ ldr(q11, &before_q); 7353 __ ldr(d13, &before_d); 7354 __ ldr(s25, &before_s); 7355 7356 __ ldr(x6, &after_x); 7357 __ ldr(w7, &after_w); 7358 __ ldrsw(x8, &after_sx); 7359 __ ldr(q18, &after_q); 7360 __ ldr(d14, &after_d); 7361 __ ldr(s26, &after_s); 7362 } 7363 7364 // Manually generate a pool. 7365 __ B(&end_of_pool_after); 7366 { 7367 ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes); 7368 __ place(&after_x); 7369 __ place(&after_w); 7370 __ place(&after_sx); 7371 __ place(&after_q); 7372 __ place(&after_d); 7373 __ place(&after_s); 7374 } 7375 __ Bind(&end_of_pool_after); 7376 7377 END(); 7378 7379 RUN(); 7380 7381 ASSERT_EQUAL_64(0x1234567890abcdef, x2); 7382 ASSERT_EQUAL_64(0xfedcba09, x3); 7383 ASSERT_EQUAL_64(0xffffffff80000000, x5); 7384 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q11); 7385 ASSERT_EQUAL_FP64(1.234, d13); 7386 ASSERT_EQUAL_FP32(2.5, s25); 7387 7388 ASSERT_EQUAL_64(0x1234567890abcdef, x6); 7389 ASSERT_EQUAL_64(0xfedcba09, x7); 7390 ASSERT_EQUAL_64(0xffffffff80000000, x8); 7391 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q18); 7392 ASSERT_EQUAL_FP64(1.234, d14); 7393 ASSERT_EQUAL_FP32(2.5, s26); 7394 7395 TEARDOWN(); 7396 } 7397 7398 7399 TEST(ldr_literal_custom_shared) { 7400 SETUP(); 7401 7402 Label end_of_pool_before; 7403 Label end_of_pool_after; 7404 7405 const size_t kSizeOfPoolInBytes = 40; 7406 7407 Literal<uint64_t> before_x(0x1234567890abcdef); 7408 Literal<uint32_t> before_w(0xfedcba09); 7409 Literal<uint64_t> before_q(0x1234000056780000, 0xabcd0000ef000000); 7410 Literal<double> before_d(1.234); 7411 Literal<float> before_s(2.5); 7412 7413 Literal<uint64_t> after_x(0x1234567890abcdef); 7414 Literal<uint32_t> after_w(0xfedcba09); 7415 Literal<uint64_t> after_q(0x1234000056780000, 0xabcd0000ef000000); 7416 Literal<double> after_d(1.234); 7417 Literal<float> after_s(2.5); 7418 7419 START(); 7420 7421 // Manually generate a pool. 7422 __ B(&end_of_pool_before); 7423 { 7424 ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes); 7425 __ place(&before_x); 7426 __ place(&before_w); 7427 __ place(&before_q); 7428 __ place(&before_d); 7429 __ place(&before_s); 7430 } 7431 __ Bind(&end_of_pool_before); 7432 7433 // Load the entries several times to test that literals can be shared. 7434 for (int i = 0; i < 50; i++) { 7435 ExactAssemblyScope scope(&masm, 12 * kInstructionSize); 7436 __ ldr(x2, &before_x); 7437 __ ldr(w3, &before_w); 7438 __ ldrsw(x5, &before_w); // Re-use before_w. 7439 __ ldr(q11, &before_q); 7440 __ ldr(d13, &before_d); 7441 __ ldr(s25, &before_s); 7442 7443 __ ldr(x6, &after_x); 7444 __ ldr(w7, &after_w); 7445 __ ldrsw(x8, &after_w); // Re-use after_w. 7446 __ ldr(q18, &after_q); 7447 __ ldr(d14, &after_d); 7448 __ ldr(s26, &after_s); 7449 } 7450 7451 // Manually generate a pool. 7452 __ B(&end_of_pool_after); 7453 { 7454 ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes); 7455 __ place(&after_x); 7456 __ place(&after_w); 7457 __ place(&after_q); 7458 __ place(&after_d); 7459 __ place(&after_s); 7460 } 7461 __ Bind(&end_of_pool_after); 7462 7463 END(); 7464 7465 RUN(); 7466 7467 ASSERT_EQUAL_64(0x1234567890abcdef, x2); 7468 ASSERT_EQUAL_64(0xfedcba09, x3); 7469 ASSERT_EQUAL_64(0xfffffffffedcba09, x5); 7470 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q11); 7471 ASSERT_EQUAL_FP64(1.234, d13); 7472 ASSERT_EQUAL_FP32(2.5, s25); 7473 7474 ASSERT_EQUAL_64(0x1234567890abcdef, x6); 7475 ASSERT_EQUAL_64(0xfedcba09, x7); 7476 ASSERT_EQUAL_64(0xfffffffffedcba09, x8); 7477 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q18); 7478 ASSERT_EQUAL_FP64(1.234, d14); 7479 ASSERT_EQUAL_FP32(2.5, s26); 7480 7481 TEARDOWN(); 7482 } 7483 7484 7485 TEST(prfm_offset) { 7486 SETUP(); 7487 7488 START(); 7489 // The address used in prfm doesn't have to be valid. 7490 __ Mov(x0, 0x0123456789abcdef); 7491 7492 for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) { 7493 // Unallocated prefetch operations are ignored, so test all of them. 7494 PrefetchOperation op = static_cast<PrefetchOperation>(i); 7495 7496 __ Prfm(op, MemOperand(x0)); 7497 __ Prfm(op, MemOperand(x0, 8)); 7498 __ Prfm(op, MemOperand(x0, 32760)); 7499 __ Prfm(op, MemOperand(x0, 32768)); 7500 7501 __ Prfm(op, MemOperand(x0, 1)); 7502 __ Prfm(op, MemOperand(x0, 9)); 7503 __ Prfm(op, MemOperand(x0, 255)); 7504 __ Prfm(op, MemOperand(x0, 257)); 7505 __ Prfm(op, MemOperand(x0, -1)); 7506 __ Prfm(op, MemOperand(x0, -9)); 7507 __ Prfm(op, MemOperand(x0, -255)); 7508 __ Prfm(op, MemOperand(x0, -257)); 7509 7510 __ Prfm(op, MemOperand(x0, 0xfedcba9876543210)); 7511 } 7512 7513 END(); 7514 RUN(); 7515 TEARDOWN(); 7516 } 7517 7518 7519 TEST(prfm_regoffset) { 7520 SETUP(); 7521 7522 START(); 7523 // The address used in prfm doesn't have to be valid. 7524 __ Mov(x0, 0x0123456789abcdef); 7525 7526 CPURegList inputs(CPURegister::kRegister, kXRegSize, 10, 18); 7527 __ Mov(x10, 0); 7528 __ Mov(x11, 1); 7529 __ Mov(x12, 8); 7530 __ Mov(x13, 255); 7531 __ Mov(x14, -0); 7532 __ Mov(x15, -1); 7533 __ Mov(x16, -8); 7534 __ Mov(x17, -255); 7535 __ Mov(x18, 0xfedcba9876543210); 7536 7537 for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) { 7538 // Unallocated prefetch operations are ignored, so test all of them. 7539 PrefetchOperation op = static_cast<PrefetchOperation>(i); 7540 7541 CPURegList loop = inputs; 7542 while (!loop.IsEmpty()) { 7543 Register input(loop.PopLowestIndex()); 7544 __ Prfm(op, MemOperand(x0, input)); 7545 __ Prfm(op, MemOperand(x0, input, UXTW)); 7546 __ Prfm(op, MemOperand(x0, input, UXTW, 3)); 7547 __ Prfm(op, MemOperand(x0, input, LSL)); 7548 __ Prfm(op, MemOperand(x0, input, LSL, 3)); 7549 __ Prfm(op, MemOperand(x0, input, SXTW)); 7550 __ Prfm(op, MemOperand(x0, input, SXTW, 3)); 7551 __ Prfm(op, MemOperand(x0, input, SXTX)); 7552 __ Prfm(op, MemOperand(x0, input, SXTX, 3)); 7553 } 7554 } 7555 7556 END(); 7557 RUN(); 7558 TEARDOWN(); 7559 } 7560 7561 7562 TEST(prfm_literal_imm19) { 7563 SETUP(); 7564 START(); 7565 7566 for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) { 7567 // Unallocated prefetch operations are ignored, so test all of them. 7568 PrefetchOperation op = static_cast<PrefetchOperation>(i); 7569 7570 ExactAssemblyScope scope(&masm, 7 * kInstructionSize); 7571 // The address used in prfm doesn't have to be valid. 7572 __ prfm(op, INT64_C(0)); 7573 __ prfm(op, 1); 7574 __ prfm(op, -1); 7575 __ prfm(op, 1000); 7576 __ prfm(op, -1000); 7577 __ prfm(op, 0x3ffff); 7578 __ prfm(op, -0x40000); 7579 } 7580 7581 END(); 7582 RUN(); 7583 TEARDOWN(); 7584 } 7585 7586 7587 TEST(prfm_literal) { 7588 SETUP(); 7589 7590 Label end_of_pool_before; 7591 Label end_of_pool_after; 7592 Literal<uint64_t> before(0); 7593 Literal<uint64_t> after(0); 7594 7595 START(); 7596 7597 // Manually generate a pool. 7598 __ B(&end_of_pool_before); 7599 { 7600 ExactAssemblyScope scope(&masm, before.GetSize()); 7601 __ place(&before); 7602 } 7603 __ Bind(&end_of_pool_before); 7604 7605 for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) { 7606 // Unallocated prefetch operations are ignored, so test all of them. 7607 PrefetchOperation op = static_cast<PrefetchOperation>(i); 7608 7609 ExactAssemblyScope guard(&masm, 2 * kInstructionSize); 7610 __ prfm(op, &before); 7611 __ prfm(op, &after); 7612 } 7613 7614 // Manually generate a pool. 7615 __ B(&end_of_pool_after); 7616 { 7617 ExactAssemblyScope scope(&masm, after.GetSize()); 7618 __ place(&after); 7619 } 7620 __ Bind(&end_of_pool_after); 7621 7622 END(); 7623 RUN(); 7624 TEARDOWN(); 7625 } 7626 7627 7628 TEST(prfm_wide) { 7629 SETUP(); 7630 7631 START(); 7632 // The address used in prfm doesn't have to be valid. 7633 __ Mov(x0, 0x0123456789abcdef); 7634 7635 for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) { 7636 // Unallocated prefetch operations are ignored, so test all of them. 7637 PrefetchOperation op = static_cast<PrefetchOperation>(i); 7638 7639 __ Prfm(op, MemOperand(x0, 0x40000)); 7640 __ Prfm(op, MemOperand(x0, -0x40001)); 7641 __ Prfm(op, MemOperand(x0, UINT64_C(0x5555555555555555))); 7642 __ Prfm(op, MemOperand(x0, UINT64_C(0xfedcba9876543210))); 7643 } 7644 7645 END(); 7646 RUN(); 7647 TEARDOWN(); 7648 } 7649 7650 7651 TEST(load_prfm_literal) { 7652 // Test literals shared between both prfm and ldr. 7653 SETUP(); 7654 7655 Label end_of_pool_before; 7656 Label end_of_pool_after; 7657 7658 const size_t kSizeOfPoolInBytes = 28; 7659 7660 Literal<uint64_t> before_x(0x1234567890abcdef); 7661 Literal<uint32_t> before_w(0xfedcba09); 7662 Literal<uint32_t> before_sx(0x80000000); 7663 Literal<double> before_d(1.234); 7664 Literal<float> before_s(2.5); 7665 Literal<uint64_t> after_x(0x1234567890abcdef); 7666 Literal<uint32_t> after_w(0xfedcba09); 7667 Literal<uint32_t> after_sx(0x80000000); 7668 Literal<double> after_d(1.234); 7669 Literal<float> after_s(2.5); 7670 7671 START(); 7672 7673 // Manually generate a pool. 7674 __ B(&end_of_pool_before); 7675 { 7676 ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes); 7677 __ place(&before_x); 7678 __ place(&before_w); 7679 __ place(&before_sx); 7680 __ place(&before_d); 7681 __ place(&before_s); 7682 } 7683 __ Bind(&end_of_pool_before); 7684 7685 for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) { 7686 // Unallocated prefetch operations are ignored, so test all of them. 7687 PrefetchOperation op = static_cast<PrefetchOperation>(i); 7688 ExactAssemblyScope scope(&masm, 10 * kInstructionSize); 7689 7690 __ prfm(op, &before_x); 7691 __ prfm(op, &before_w); 7692 __ prfm(op, &before_sx); 7693 __ prfm(op, &before_d); 7694 __ prfm(op, &before_s); 7695 7696 __ prfm(op, &after_x); 7697 __ prfm(op, &after_w); 7698 __ prfm(op, &after_sx); 7699 __ prfm(op, &after_d); 7700 __ prfm(op, &after_s); 7701 } 7702 7703 { 7704 ExactAssemblyScope scope(&masm, 10 * kInstructionSize); 7705 __ ldr(x2, &before_x); 7706 __ ldr(w3, &before_w); 7707 __ ldrsw(x5, &before_sx); 7708 __ ldr(d13, &before_d); 7709 __ ldr(s25, &before_s); 7710 7711 __ ldr(x6, &after_x); 7712 __ ldr(w7, &after_w); 7713 __ ldrsw(x8, &after_sx); 7714 __ ldr(d14, &after_d); 7715 __ ldr(s26, &after_s); 7716 } 7717 7718 // Manually generate a pool. 7719 __ B(&end_of_pool_after); 7720 { 7721 ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes); 7722 __ place(&after_x); 7723 __ place(&after_w); 7724 __ place(&after_sx); 7725 __ place(&after_d); 7726 __ place(&after_s); 7727 } 7728 __ Bind(&end_of_pool_after); 7729 7730 END(); 7731 7732 RUN(); 7733 7734 ASSERT_EQUAL_64(0x1234567890abcdef, x2); 7735 ASSERT_EQUAL_64(0xfedcba09, x3); 7736 ASSERT_EQUAL_64(0xffffffff80000000, x5); 7737 ASSERT_EQUAL_FP64(1.234, d13); 7738 ASSERT_EQUAL_FP32(2.5, s25); 7739 7740 ASSERT_EQUAL_64(0x1234567890abcdef, x6); 7741 ASSERT_EQUAL_64(0xfedcba09, x7); 7742 ASSERT_EQUAL_64(0xffffffff80000000, x8); 7743 ASSERT_EQUAL_FP64(1.234, d14); 7744 ASSERT_EQUAL_FP32(2.5, s26); 7745 7746 TEARDOWN(); 7747 } 7748 7749 7750 TEST(add_sub_imm) { 7751 SETUP(); 7752 7753 START(); 7754 __ Mov(x0, 0x0); 7755 __ Mov(x1, 0x1111); 7756 __ Mov(x2, 0xffffffffffffffff); 7757 __ Mov(x3, 0x8000000000000000); 7758 7759 __ Add(x10, x0, Operand(0x123)); 7760 __ Add(x11, x1, Operand(0x122000)); 7761 __ Add(x12, x0, Operand(0xabc << 12)); 7762 __ Add(x13, x2, Operand(1)); 7763 7764 __ Add(w14, w0, Operand(0x123)); 7765 __ Add(w15, w1, Operand(0x122000)); 7766 __ Add(w16, w0, Operand(0xabc << 12)); 7767 __ Add(w17, w2, Operand(1)); 7768 7769 __ Sub(x20, x0, Operand(0x1)); 7770 __ Sub(x21, x1, Operand(0x111)); 7771 __ Sub(x22, x1, Operand(0x1 << 12)); 7772 __ Sub(x23, x3, Operand(1)); 7773 7774 __ Sub(w24, w0, Operand(0x1)); 7775 __ Sub(w25, w1, Operand(0x111)); 7776 __ Sub(w26, w1, Operand(0x1 << 12)); 7777 __ Sub(w27, w3, Operand(1)); 7778 END(); 7779 7780 RUN(); 7781 7782 ASSERT_EQUAL_64(0x123, x10); 7783 ASSERT_EQUAL_64(0x123111, x11); 7784 ASSERT_EQUAL_64(0xabc000, x12); 7785 ASSERT_EQUAL_64(0x0, x13); 7786 7787 ASSERT_EQUAL_32(0x123, w14); 7788 ASSERT_EQUAL_32(0x123111, w15); 7789 ASSERT_EQUAL_32(0xabc000, w16); 7790 ASSERT_EQUAL_32(0x0, w17); 7791 7792 ASSERT_EQUAL_64(0xffffffffffffffff, x20); 7793 ASSERT_EQUAL_64(0x1000, x21); 7794 ASSERT_EQUAL_64(0x111, x22); 7795 ASSERT_EQUAL_64(0x7fffffffffffffff, x23); 7796 7797 ASSERT_EQUAL_32(0xffffffff, w24); 7798 ASSERT_EQUAL_32(0x1000, w25); 7799 ASSERT_EQUAL_32(0x111, w26); 7800 ASSERT_EQUAL_32(0xffffffff, w27); 7801 7802 TEARDOWN(); 7803 } 7804 7805 7806 TEST(add_sub_wide_imm) { 7807 SETUP(); 7808 7809 START(); 7810 __ Mov(x0, 0x0); 7811 __ Mov(x1, 0x1); 7812 7813 __ Add(x10, x0, Operand(0x1234567890abcdef)); 7814 __ Add(x11, x1, Operand(0xffffffff)); 7815 7816 __ Add(w12, w0, Operand(0x12345678)); 7817 __ Add(w13, w1, Operand(0xffffffff)); 7818 7819 __ Add(w18, w0, Operand(kWMinInt)); 7820 __ Sub(w19, w0, Operand(kWMinInt)); 7821 7822 __ Sub(x20, x0, Operand(0x1234567890abcdef)); 7823 __ Sub(w21, w0, Operand(0x12345678)); 7824 7825 END(); 7826 7827 RUN(); 7828 7829 ASSERT_EQUAL_64(0x1234567890abcdef, x10); 7830 ASSERT_EQUAL_64(0x100000000, x11); 7831 7832 ASSERT_EQUAL_32(0x12345678, w12); 7833 ASSERT_EQUAL_64(0x0, x13); 7834 7835 ASSERT_EQUAL_32(kWMinInt, w18); 7836 ASSERT_EQUAL_32(kWMinInt, w19); 7837 7838 ASSERT_EQUAL_64(-0x1234567890abcdef, x20); 7839 ASSERT_EQUAL_32(-0x12345678, w21); 7840 7841 TEARDOWN(); 7842 } 7843 7844 7845 TEST(add_sub_shifted) { 7846 SETUP(); 7847 7848 START(); 7849 __ Mov(x0, 0); 7850 __ Mov(x1, 0x0123456789abcdef); 7851 __ Mov(x2, 0xfedcba9876543210); 7852 __ Mov(x3, 0xffffffffffffffff); 7853 7854 __ Add(x10, x1, Operand(x2)); 7855 __ Add(x11, x0, Operand(x1, LSL, 8)); 7856 __ Add(x12, x0, Operand(x1, LSR, 8)); 7857 __ Add(x13, x0, Operand(x1, ASR, 8)); 7858 __ Add(x14, x0, Operand(x2, ASR, 8)); 7859 __ Add(w15, w0, Operand(w1, ASR, 8)); 7860 __ Add(w18, w3, Operand(w1, ROR, 8)); 7861 __ Add(x19, x3, Operand(x1, ROR, 8)); 7862 7863 __ Sub(x20, x3, Operand(x2)); 7864 __ Sub(x21, x3, Operand(x1, LSL, 8)); 7865 __ Sub(x22, x3, Operand(x1, LSR, 8)); 7866 __ Sub(x23, x3, Operand(x1, ASR, 8)); 7867 __ Sub(x24, x3, Operand(x2, ASR, 8)); 7868 __ Sub(w25, w3, Operand(w1, ASR, 8)); 7869 __ Sub(w26, w3, Operand(w1, ROR, 8)); 7870 __ Sub(x27, x3, Operand(x1, ROR, 8)); 7871 END(); 7872 7873 RUN(); 7874 7875 ASSERT_EQUAL_64(0xffffffffffffffff, x10); 7876 ASSERT_EQUAL_64(0x23456789abcdef00, x11); 7877 ASSERT_EQUAL_64(0x000123456789abcd, x12); 7878 ASSERT_EQUAL_64(0x000123456789abcd, x13); 7879 ASSERT_EQUAL_64(0xfffedcba98765432, x14); 7880 ASSERT_EQUAL_64(0xff89abcd, x15); 7881 ASSERT_EQUAL_64(0xef89abcc, x18); 7882 ASSERT_EQUAL_64(0xef0123456789abcc, x19); 7883 7884 ASSERT_EQUAL_64(0x0123456789abcdef, x20); 7885 ASSERT_EQUAL_64(0xdcba9876543210ff, x21); 7886 ASSERT_EQUAL_64(0xfffedcba98765432, x22); 7887 ASSERT_EQUAL_64(0xfffedcba98765432, x23); 7888 ASSERT_EQUAL_64(0x000123456789abcd, x24); 7889 ASSERT_EQUAL_64(0x00765432, x25); 7890 ASSERT_EQUAL_64(0x10765432, x26); 7891 ASSERT_EQUAL_64(0x10fedcba98765432, x27); 7892 7893 TEARDOWN(); 7894 } 7895 7896 7897 TEST(add_sub_extended) { 7898 SETUP(); 7899 7900 START(); 7901 __ Mov(x0, 0); 7902 __ Mov(x1, 0x0123456789abcdef); 7903 __ Mov(x2, 0xfedcba9876543210); 7904 __ Mov(w3, 0x80); 7905 7906 __ Add(x10, x0, Operand(x1, UXTB, 0)); 7907 __ Add(x11, x0, Operand(x1, UXTB, 1)); 7908 __ Add(x12, x0, Operand(x1, UXTH, 2)); 7909 __ Add(x13, x0, Operand(x1, UXTW, 4)); 7910 7911 __ Add(x14, x0, Operand(x1, SXTB, 0)); 7912 __ Add(x15, x0, Operand(x1, SXTB, 1)); 7913 __ Add(x16, x0, Operand(x1, SXTH, 2)); 7914 __ Add(x17, x0, Operand(x1, SXTW, 3)); 7915 __ Add(x18, x0, Operand(x2, SXTB, 0)); 7916 __ Add(x19, x0, Operand(x2, SXTB, 1)); 7917 __ Add(x20, x0, Operand(x2, SXTH, 2)); 7918 __ Add(x21, x0, Operand(x2, SXTW, 3)); 7919 7920 __ Add(x22, x1, Operand(x2, SXTB, 1)); 7921 __ Sub(x23, x1, Operand(x2, SXTB, 1)); 7922 7923 __ Add(w24, w1, Operand(w2, UXTB, 2)); 7924 __ Add(w25, w0, Operand(w1, SXTB, 0)); 7925 __ Add(w26, w0, Operand(w1, SXTB, 1)); 7926 __ Add(w27, w2, Operand(w1, SXTW, 3)); 7927 7928 __ Add(w28, w0, Operand(w1, SXTW, 3)); 7929 __ Add(x29, x0, Operand(w1, SXTW, 3)); 7930 7931 __ Sub(x30, x0, Operand(w3, SXTB, 1)); 7932 END(); 7933 7934 RUN(); 7935 7936 ASSERT_EQUAL_64(0xef, x10); 7937 ASSERT_EQUAL_64(0x1de, x11); 7938 ASSERT_EQUAL_64(0x337bc, x12); 7939 ASSERT_EQUAL_64(0x89abcdef0, x13); 7940 7941 ASSERT_EQUAL_64(0xffffffffffffffef, x14); 7942 ASSERT_EQUAL_64(0xffffffffffffffde, x15); 7943 ASSERT_EQUAL_64(0xffffffffffff37bc, x16); 7944 ASSERT_EQUAL_64(0xfffffffc4d5e6f78, x17); 7945 ASSERT_EQUAL_64(0x10, x18); 7946 ASSERT_EQUAL_64(0x20, x19); 7947 ASSERT_EQUAL_64(0xc840, x20); 7948 ASSERT_EQUAL_64(0x3b2a19080, x21); 7949 7950 ASSERT_EQUAL_64(0x0123456789abce0f, x22); 7951 ASSERT_EQUAL_64(0x0123456789abcdcf, x23); 7952 7953 ASSERT_EQUAL_32(0x89abce2f, w24); 7954 ASSERT_EQUAL_32(0xffffffef, w25); 7955 ASSERT_EQUAL_32(0xffffffde, w26); 7956 ASSERT_EQUAL_32(0xc3b2a188, w27); 7957 7958 ASSERT_EQUAL_32(0x4d5e6f78, w28); 7959 ASSERT_EQUAL_64(0xfffffffc4d5e6f78, x29); 7960 7961 ASSERT_EQUAL_64(256, x30); 7962 7963 TEARDOWN(); 7964 } 7965 7966 7967 TEST(add_sub_negative) { 7968 SETUP(); 7969 7970 START(); 7971 __ Mov(x0, 0); 7972 __ Mov(x1, 4687); 7973 __ Mov(x2, 0x1122334455667788); 7974 __ Mov(w3, 0x11223344); 7975 __ Mov(w4, 400000); 7976 7977 __ Add(x10, x0, -42); 7978 __ Add(x11, x1, -687); 7979 __ Add(x12, x2, -0x88); 7980 7981 __ Sub(x13, x0, -600); 7982 __ Sub(x14, x1, -313); 7983 __ Sub(x15, x2, -0x555); 7984 7985 __ Add(w19, w3, -0x344); 7986 __ Add(w20, w4, -2000); 7987 7988 __ Sub(w21, w3, -0xbc); 7989 __ Sub(w22, w4, -2000); 7990 END(); 7991 7992 RUN(); 7993 7994 ASSERT_EQUAL_64(-42, x10); 7995 ASSERT_EQUAL_64(4000, x11); 7996 ASSERT_EQUAL_64(0x1122334455667700, x12); 7997 7998 ASSERT_EQUAL_64(600, x13); 7999 ASSERT_EQUAL_64(5000, x14); 8000 ASSERT_EQUAL_64(0x1122334455667cdd, x15); 8001 8002 ASSERT_EQUAL_32(0x11223000, w19); 8003 ASSERT_EQUAL_32(398000, w20); 8004 8005 ASSERT_EQUAL_32(0x11223400, w21); 8006 ASSERT_EQUAL_32(402000, w22); 8007 8008 TEARDOWN(); 8009 } 8010 8011 8012 TEST(add_sub_zero) { 8013 SETUP(); 8014 8015 START(); 8016 __ Mov(x0, 0); 8017 __ Mov(x1, 0); 8018 __ Mov(x2, 0); 8019 8020 Label blob1; 8021 __ Bind(&blob1); 8022 __ Add(x0, x0, 0); 8023 __ Sub(x1, x1, 0); 8024 __ Sub(x2, x2, xzr); 8025 VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&blob1) == 0); 8026 8027 Label blob2; 8028 __ Bind(&blob2); 8029 __ Add(w3, w3, 0); 8030 VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&blob2) != 0); 8031 8032 Label blob3; 8033 __ Bind(&blob3); 8034 __ Sub(w3, w3, wzr); 8035 VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&blob3) != 0); 8036 8037 END(); 8038 8039 RUN(); 8040 8041 ASSERT_EQUAL_64(0, x0); 8042 ASSERT_EQUAL_64(0, x1); 8043 ASSERT_EQUAL_64(0, x2); 8044 8045 TEARDOWN(); 8046 } 8047 8048 8049 TEST(claim_drop_zero) { 8050 SETUP(); 8051 8052 START(); 8053 8054 Label start; 8055 __ Bind(&start); 8056 __ Claim(Operand(0)); 8057 __ Drop(Operand(0)); 8058 __ Claim(Operand(xzr)); 8059 __ Drop(Operand(xzr)); 8060 VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&start) == 0); 8061 8062 END(); 8063 8064 RUN(); 8065 8066 TEARDOWN(); 8067 } 8068 8069 8070 TEST(neg) { 8071 SETUP(); 8072 8073 START(); 8074 __ Mov(x0, 0xf123456789abcdef); 8075 8076 // Immediate. 8077 __ Neg(x1, 0x123); 8078 __ Neg(w2, 0x123); 8079 8080 // Shifted. 8081 __ Neg(x3, Operand(x0, LSL, 1)); 8082 __ Neg(w4, Operand(w0, LSL, 2)); 8083 __ Neg(x5, Operand(x0, LSR, 3)); 8084 __ Neg(w6, Operand(w0, LSR, 4)); 8085 __ Neg(x7, Operand(x0, ASR, 5)); 8086 __ Neg(w8, Operand(w0, ASR, 6)); 8087 8088 // Extended. 8089 __ Neg(w9, Operand(w0, UXTB)); 8090 __ Neg(x10, Operand(x0, SXTB, 1)); 8091 __ Neg(w11, Operand(w0, UXTH, 2)); 8092 __ Neg(x12, Operand(x0, SXTH, 3)); 8093 __ Neg(w13, Operand(w0, UXTW, 4)); 8094 __ Neg(x14, Operand(x0, SXTW, 4)); 8095 END(); 8096 8097 RUN(); 8098 8099 ASSERT_EQUAL_64(0xfffffffffffffedd, x1); 8100 ASSERT_EQUAL_64(0xfffffedd, x2); 8101 ASSERT_EQUAL_64(0x1db97530eca86422, x3); 8102 ASSERT_EQUAL_64(0xd950c844, x4); 8103 ASSERT_EQUAL_64(0xe1db97530eca8643, x5); 8104 ASSERT_EQUAL_64(0xf7654322, x6); 8105 ASSERT_EQUAL_64(0x0076e5d4c3b2a191, x7); 8106 ASSERT_EQUAL_64(0x01d950c9, x8); 8107 ASSERT_EQUAL_64(0xffffff11, x9); 8108 ASSERT_EQUAL_64(0x0000000000000022, x10); 8109 ASSERT_EQUAL_64(0xfffcc844, x11); 8110 ASSERT_EQUAL_64(0x0000000000019088, x12); 8111 ASSERT_EQUAL_64(0x65432110, x13); 8112 ASSERT_EQUAL_64(0x0000000765432110, x14); 8113 8114 TEARDOWN(); 8115 } 8116 8117 8118 template <typename T, typename Op> 8119 static void AdcsSbcsHelper( 8120 Op op, T left, T right, int carry, T expected, StatusFlags expected_flags) { 8121 int reg_size = sizeof(T) * 8; 8122 Register left_reg(0, reg_size); 8123 Register right_reg(1, reg_size); 8124 Register result_reg(2, reg_size); 8125 8126 SETUP(); 8127 START(); 8128 8129 __ Mov(left_reg, left); 8130 __ Mov(right_reg, right); 8131 __ Mov(x10, (carry ? CFlag : NoFlag)); 8132 8133 __ Msr(NZCV, x10); 8134 (masm.*op)(result_reg, left_reg, right_reg); 8135 8136 END(); 8137 RUN(); 8138 8139 ASSERT_EQUAL_64(left, left_reg.X()); 8140 ASSERT_EQUAL_64(right, right_reg.X()); 8141 ASSERT_EQUAL_64(expected, result_reg.X()); 8142 ASSERT_EQUAL_NZCV(expected_flags); 8143 8144 TEARDOWN(); 8145 } 8146 8147 8148 TEST(adcs_sbcs_x) { 8149 uint64_t inputs[] = { 8150 0x0000000000000000, 8151 0x0000000000000001, 8152 0x7ffffffffffffffe, 8153 0x7fffffffffffffff, 8154 0x8000000000000000, 8155 0x8000000000000001, 8156 0xfffffffffffffffe, 8157 0xffffffffffffffff, 8158 }; 8159 static const size_t input_count = sizeof(inputs) / sizeof(inputs[0]); 8160 8161 struct Expected { 8162 uint64_t carry0_result; 8163 StatusFlags carry0_flags; 8164 uint64_t carry1_result; 8165 StatusFlags carry1_flags; 8166 }; 8167 8168 static const Expected expected_adcs_x[input_count][input_count] = 8169 {{{0x0000000000000000, ZFlag, 0x0000000000000001, NoFlag}, 8170 {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag}, 8171 {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag}, 8172 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}, 8173 {0x8000000000000000, NFlag, 0x8000000000000001, NFlag}, 8174 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag}, 8175 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}, 8176 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}}, 8177 {{0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag}, 8178 {0x0000000000000002, NoFlag, 0x0000000000000003, NoFlag}, 8179 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}, 8180 {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag}, 8181 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag}, 8182 {0x8000000000000002, NFlag, 0x8000000000000003, NFlag}, 8183 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8184 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}}, 8185 {{0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag}, 8186 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}, 8187 {0xfffffffffffffffc, NVFlag, 0xfffffffffffffffd, NVFlag}, 8188 {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag}, 8189 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}, 8190 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8191 {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag}, 8192 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}}, 8193 {{0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}, 8194 {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag}, 8195 {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag}, 8196 {0xfffffffffffffffe, NVFlag, 0xffffffffffffffff, NVFlag}, 8197 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8198 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}, 8199 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}, 8200 {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag}}, 8201 {{0x8000000000000000, NFlag, 0x8000000000000001, NFlag}, 8202 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag}, 8203 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}, 8204 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8205 {0x0000000000000000, ZCVFlag, 0x0000000000000001, CVFlag}, 8206 {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag}, 8207 {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag}, 8208 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}}, 8209 {{0x8000000000000001, NFlag, 0x8000000000000002, NFlag}, 8210 {0x8000000000000002, NFlag, 0x8000000000000003, NFlag}, 8211 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8212 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}, 8213 {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag}, 8214 {0x0000000000000002, CVFlag, 0x0000000000000003, CVFlag}, 8215 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}, 8216 {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag}}, 8217 {{0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}, 8218 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8219 {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag}, 8220 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}, 8221 {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag}, 8222 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}, 8223 {0xfffffffffffffffc, NCFlag, 0xfffffffffffffffd, NCFlag}, 8224 {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag}}, 8225 {{0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8226 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}, 8227 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}, 8228 {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag}, 8229 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}, 8230 {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag}, 8231 {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag}, 8232 {0xfffffffffffffffe, NCFlag, 0xffffffffffffffff, NCFlag}}}; 8233 8234 static const Expected expected_sbcs_x[input_count][input_count] = 8235 {{{0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8236 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}, 8237 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag}, 8238 {0x8000000000000000, NFlag, 0x8000000000000001, NFlag}, 8239 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}, 8240 {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag}, 8241 {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag}, 8242 {0x0000000000000000, ZFlag, 0x0000000000000001, NoFlag}}, 8243 {{0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}, 8244 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8245 {0x8000000000000002, NFlag, 0x8000000000000003, NFlag}, 8246 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag}, 8247 {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag}, 8248 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}, 8249 {0x0000000000000002, NoFlag, 0x0000000000000003, NoFlag}, 8250 {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag}}, 8251 {{0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}, 8252 {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag}, 8253 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8254 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}, 8255 {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag}, 8256 {0xfffffffffffffffc, NVFlag, 0xfffffffffffffffd, NVFlag}, 8257 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}, 8258 {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag}}, 8259 {{0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag}, 8260 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}, 8261 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}, 8262 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8263 {0xfffffffffffffffe, NVFlag, 0xffffffffffffffff, NVFlag}, 8264 {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag}, 8265 {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag}, 8266 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}}, 8267 {{0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}, 8268 {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag}, 8269 {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag}, 8270 {0x0000000000000000, ZCVFlag, 0x0000000000000001, CVFlag}, 8271 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8272 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}, 8273 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag}, 8274 {0x8000000000000000, NFlag, 0x8000000000000001, NFlag}}, 8275 {{0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag}, 8276 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}, 8277 {0x0000000000000002, CVFlag, 0x0000000000000003, CVFlag}, 8278 {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag}, 8279 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}, 8280 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8281 {0x8000000000000002, NFlag, 0x8000000000000003, NFlag}, 8282 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag}}, 8283 {{0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag}, 8284 {0xfffffffffffffffc, NCFlag, 0xfffffffffffffffd, NCFlag}, 8285 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}, 8286 {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag}, 8287 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}, 8288 {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag}, 8289 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}, 8290 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}}, 8291 {{0xfffffffffffffffe, NCFlag, 0xffffffffffffffff, NCFlag}, 8292 {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag}, 8293 {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag}, 8294 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}, 8295 {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag}, 8296 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}, 8297 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}, 8298 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}}}; 8299 8300 for (size_t left = 0; left < input_count; left++) { 8301 for (size_t right = 0; right < input_count; right++) { 8302 const Expected& expected = expected_adcs_x[left][right]; 8303 AdcsSbcsHelper(&MacroAssembler::Adcs, 8304 inputs[left], 8305 inputs[right], 8306 0, 8307 expected.carry0_result, 8308 expected.carry0_flags); 8309 AdcsSbcsHelper(&MacroAssembler::Adcs, 8310 inputs[left], 8311 inputs[right], 8312 1, 8313 expected.carry1_result, 8314 expected.carry1_flags); 8315 } 8316 } 8317 8318 for (size_t left = 0; left < input_count; left++) { 8319 for (size_t right = 0; right < input_count; right++) { 8320 const Expected& expected = expected_sbcs_x[left][right]; 8321 AdcsSbcsHelper(&MacroAssembler::Sbcs, 8322 inputs[left], 8323 inputs[right], 8324 0, 8325 expected.carry0_result, 8326 expected.carry0_flags); 8327 AdcsSbcsHelper(&MacroAssembler::Sbcs, 8328 inputs[left], 8329 inputs[right], 8330 1, 8331 expected.carry1_result, 8332 expected.carry1_flags); 8333 } 8334 } 8335 } 8336 8337 8338 TEST(adcs_sbcs_w) { 8339 uint32_t inputs[] = { 8340 0x00000000, 8341 0x00000001, 8342 0x7ffffffe, 8343 0x7fffffff, 8344 0x80000000, 8345 0x80000001, 8346 0xfffffffe, 8347 0xffffffff, 8348 }; 8349 static const size_t input_count = sizeof(inputs) / sizeof(inputs[0]); 8350 8351 struct Expected { 8352 uint32_t carry0_result; 8353 StatusFlags carry0_flags; 8354 uint32_t carry1_result; 8355 StatusFlags carry1_flags; 8356 }; 8357 8358 static const Expected expected_adcs_w[input_count][input_count] = 8359 {{{0x00000000, ZFlag, 0x00000001, NoFlag}, 8360 {0x00000001, NoFlag, 0x00000002, NoFlag}, 8361 {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag}, 8362 {0x7fffffff, NoFlag, 0x80000000, NVFlag}, 8363 {0x80000000, NFlag, 0x80000001, NFlag}, 8364 {0x80000001, NFlag, 0x80000002, NFlag}, 8365 {0xfffffffe, NFlag, 0xffffffff, NFlag}, 8366 {0xffffffff, NFlag, 0x00000000, ZCFlag}}, 8367 {{0x00000001, NoFlag, 0x00000002, NoFlag}, 8368 {0x00000002, NoFlag, 0x00000003, NoFlag}, 8369 {0x7fffffff, NoFlag, 0x80000000, NVFlag}, 8370 {0x80000000, NVFlag, 0x80000001, NVFlag}, 8371 {0x80000001, NFlag, 0x80000002, NFlag}, 8372 {0x80000002, NFlag, 0x80000003, NFlag}, 8373 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8374 {0x00000000, ZCFlag, 0x00000001, CFlag}}, 8375 {{0x7ffffffe, NoFlag, 0x7fffffff, NoFlag}, 8376 {0x7fffffff, NoFlag, 0x80000000, NVFlag}, 8377 {0xfffffffc, NVFlag, 0xfffffffd, NVFlag}, 8378 {0xfffffffd, NVFlag, 0xfffffffe, NVFlag}, 8379 {0xfffffffe, NFlag, 0xffffffff, NFlag}, 8380 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8381 {0x7ffffffc, CFlag, 0x7ffffffd, CFlag}, 8382 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag}}, 8383 {{0x7fffffff, NoFlag, 0x80000000, NVFlag}, 8384 {0x80000000, NVFlag, 0x80000001, NVFlag}, 8385 {0xfffffffd, NVFlag, 0xfffffffe, NVFlag}, 8386 {0xfffffffe, NVFlag, 0xffffffff, NVFlag}, 8387 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8388 {0x00000000, ZCFlag, 0x00000001, CFlag}, 8389 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag}, 8390 {0x7ffffffe, CFlag, 0x7fffffff, CFlag}}, 8391 {{0x80000000, NFlag, 0x80000001, NFlag}, 8392 {0x80000001, NFlag, 0x80000002, NFlag}, 8393 {0xfffffffe, NFlag, 0xffffffff, NFlag}, 8394 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8395 {0x00000000, ZCVFlag, 0x00000001, CVFlag}, 8396 {0x00000001, CVFlag, 0x00000002, CVFlag}, 8397 {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag}, 8398 {0x7fffffff, CVFlag, 0x80000000, NCFlag}}, 8399 {{0x80000001, NFlag, 0x80000002, NFlag}, 8400 {0x80000002, NFlag, 0x80000003, NFlag}, 8401 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8402 {0x00000000, ZCFlag, 0x00000001, CFlag}, 8403 {0x00000001, CVFlag, 0x00000002, CVFlag}, 8404 {0x00000002, CVFlag, 0x00000003, CVFlag}, 8405 {0x7fffffff, CVFlag, 0x80000000, NCFlag}, 8406 {0x80000000, NCFlag, 0x80000001, NCFlag}}, 8407 {{0xfffffffe, NFlag, 0xffffffff, NFlag}, 8408 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8409 {0x7ffffffc, CFlag, 0x7ffffffd, CFlag}, 8410 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag}, 8411 {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag}, 8412 {0x7fffffff, CVFlag, 0x80000000, NCFlag}, 8413 {0xfffffffc, NCFlag, 0xfffffffd, NCFlag}, 8414 {0xfffffffd, NCFlag, 0xfffffffe, NCFlag}}, 8415 {{0xffffffff, NFlag, 0x00000000, ZCFlag}, 8416 {0x00000000, ZCFlag, 0x00000001, CFlag}, 8417 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag}, 8418 {0x7ffffffe, CFlag, 0x7fffffff, CFlag}, 8419 {0x7fffffff, CVFlag, 0x80000000, NCFlag}, 8420 {0x80000000, NCFlag, 0x80000001, NCFlag}, 8421 {0xfffffffd, NCFlag, 0xfffffffe, NCFlag}, 8422 {0xfffffffe, NCFlag, 0xffffffff, NCFlag}}}; 8423 8424 static const Expected expected_sbcs_w[input_count][input_count] = 8425 {{{0xffffffff, NFlag, 0x00000000, ZCFlag}, 8426 {0xfffffffe, NFlag, 0xffffffff, NFlag}, 8427 {0x80000001, NFlag, 0x80000002, NFlag}, 8428 {0x80000000, NFlag, 0x80000001, NFlag}, 8429 {0x7fffffff, NoFlag, 0x80000000, NVFlag}, 8430 {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag}, 8431 {0x00000001, NoFlag, 0x00000002, NoFlag}, 8432 {0x00000000, ZFlag, 0x00000001, NoFlag}}, 8433 {{0x00000000, ZCFlag, 0x00000001, CFlag}, 8434 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8435 {0x80000002, NFlag, 0x80000003, NFlag}, 8436 {0x80000001, NFlag, 0x80000002, NFlag}, 8437 {0x80000000, NVFlag, 0x80000001, NVFlag}, 8438 {0x7fffffff, NoFlag, 0x80000000, NVFlag}, 8439 {0x00000002, NoFlag, 0x00000003, NoFlag}, 8440 {0x00000001, NoFlag, 0x00000002, NoFlag}}, 8441 {{0x7ffffffd, CFlag, 0x7ffffffe, CFlag}, 8442 {0x7ffffffc, CFlag, 0x7ffffffd, CFlag}, 8443 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8444 {0xfffffffe, NFlag, 0xffffffff, NFlag}, 8445 {0xfffffffd, NVFlag, 0xfffffffe, NVFlag}, 8446 {0xfffffffc, NVFlag, 0xfffffffd, NVFlag}, 8447 {0x7fffffff, NoFlag, 0x80000000, NVFlag}, 8448 {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag}}, 8449 {{0x7ffffffe, CFlag, 0x7fffffff, CFlag}, 8450 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag}, 8451 {0x00000000, ZCFlag, 0x00000001, CFlag}, 8452 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8453 {0xfffffffe, NVFlag, 0xffffffff, NVFlag}, 8454 {0xfffffffd, NVFlag, 0xfffffffe, NVFlag}, 8455 {0x80000000, NVFlag, 0x80000001, NVFlag}, 8456 {0x7fffffff, NoFlag, 0x80000000, NVFlag}}, 8457 {{0x7fffffff, CVFlag, 0x80000000, NCFlag}, 8458 {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag}, 8459 {0x00000001, CVFlag, 0x00000002, CVFlag}, 8460 {0x00000000, ZCVFlag, 0x00000001, CVFlag}, 8461 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8462 {0xfffffffe, NFlag, 0xffffffff, NFlag}, 8463 {0x80000001, NFlag, 0x80000002, NFlag}, 8464 {0x80000000, NFlag, 0x80000001, NFlag}}, 8465 {{0x80000000, NCFlag, 0x80000001, NCFlag}, 8466 {0x7fffffff, CVFlag, 0x80000000, NCFlag}, 8467 {0x00000002, CVFlag, 0x00000003, CVFlag}, 8468 {0x00000001, CVFlag, 0x00000002, CVFlag}, 8469 {0x00000000, ZCFlag, 0x00000001, CFlag}, 8470 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8471 {0x80000002, NFlag, 0x80000003, NFlag}, 8472 {0x80000001, NFlag, 0x80000002, NFlag}}, 8473 {{0xfffffffd, NCFlag, 0xfffffffe, NCFlag}, 8474 {0xfffffffc, NCFlag, 0xfffffffd, NCFlag}, 8475 {0x7fffffff, CVFlag, 0x80000000, NCFlag}, 8476 {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag}, 8477 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag}, 8478 {0x7ffffffc, CFlag, 0x7ffffffd, CFlag}, 8479 {0xffffffff, NFlag, 0x00000000, ZCFlag}, 8480 {0xfffffffe, NFlag, 0xffffffff, NFlag}}, 8481 {{0xfffffffe, NCFlag, 0xffffffff, NCFlag}, 8482 {0xfffffffd, NCFlag, 0xfffffffe, NCFlag}, 8483 {0x80000000, NCFlag, 0x80000001, NCFlag}, 8484 {0x7fffffff, CVFlag, 0x80000000, NCFlag}, 8485 {0x7ffffffe, CFlag, 0x7fffffff, CFlag}, 8486 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag}, 8487 {0x00000000, ZCFlag, 0x00000001, CFlag}, 8488 {0xffffffff, NFlag, 0x00000000, ZCFlag}}}; 8489 8490 for (size_t left = 0; left < input_count; left++) { 8491 for (size_t right = 0; right < input_count; right++) { 8492 const Expected& expected = expected_adcs_w[left][right]; 8493 AdcsSbcsHelper(&MacroAssembler::Adcs, 8494 inputs[left], 8495 inputs[right], 8496 0, 8497 expected.carry0_result, 8498 expected.carry0_flags); 8499 AdcsSbcsHelper(&MacroAssembler::Adcs, 8500 inputs[left], 8501 inputs[right], 8502 1, 8503 expected.carry1_result, 8504 expected.carry1_flags); 8505 } 8506 } 8507 8508 for (size_t left = 0; left < input_count; left++) { 8509 for (size_t right = 0; right < input_count; right++) { 8510 const Expected& expected = expected_sbcs_w[left][right]; 8511 AdcsSbcsHelper(&MacroAssembler::Sbcs, 8512 inputs[left], 8513 inputs[right], 8514 0, 8515 expected.carry0_result, 8516 expected.carry0_flags); 8517 AdcsSbcsHelper(&MacroAssembler::Sbcs, 8518 inputs[left], 8519 inputs[right], 8520 1, 8521 expected.carry1_result, 8522 expected.carry1_flags); 8523 } 8524 } 8525 } 8526 8527 8528 TEST(adc_sbc_shift) { 8529 SETUP(); 8530 8531 START(); 8532 __ Mov(x0, 0); 8533 __ Mov(x1, 1); 8534 __ Mov(x2, 0x0123456789abcdef); 8535 __ Mov(x3, 0xfedcba9876543210); 8536 __ Mov(x4, 0xffffffffffffffff); 8537 8538 // Clear the C flag. 8539 __ Adds(x0, x0, Operand(0)); 8540 8541 __ Adc(x5, x2, Operand(x3)); 8542 __ Adc(x6, x0, Operand(x1, LSL, 60)); 8543 __ Sbc(x7, x4, Operand(x3, LSR, 4)); 8544 __ Adc(x8, x2, Operand(x3, ASR, 4)); 8545 __ Adc(x9, x2, Operand(x3, ROR, 8)); 8546 8547 __ Adc(w10, w2, Operand(w3)); 8548 __ Adc(w11, w0, Operand(w1, LSL, 30)); 8549 __ Sbc(w12, w4, Operand(w3, LSR, 4)); 8550 __ Adc(w13, w2, Operand(w3, ASR, 4)); 8551 __ Adc(w14, w2, Operand(w3, ROR, 8)); 8552 8553 // Set the C flag. 8554 __ Cmp(w0, Operand(w0)); 8555 8556 __ Adc(x18, x2, Operand(x3)); 8557 __ Adc(x19, x0, Operand(x1, LSL, 60)); 8558 __ Sbc(x20, x4, Operand(x3, LSR, 4)); 8559 __ Adc(x21, x2, Operand(x3, ASR, 4)); 8560 __ Adc(x22, x2, Operand(x3, ROR, 8)); 8561 8562 __ Adc(w23, w2, Operand(w3)); 8563 __ Adc(w24, w0, Operand(w1, LSL, 30)); 8564 __ Sbc(w25, w4, Operand(w3, LSR, 4)); 8565 __ Adc(w26, w2, Operand(w3, ASR, 4)); 8566 __ Adc(w27, w2, Operand(w3, ROR, 8)); 8567 END(); 8568 8569 RUN(); 8570 8571 ASSERT_EQUAL_64(0xffffffffffffffff, x5); 8572 ASSERT_EQUAL_64(INT64_C(1) << 60, x6); 8573 ASSERT_EQUAL_64(0xf0123456789abcdd, x7); 8574 ASSERT_EQUAL_64(0x0111111111111110, x8); 8575 ASSERT_EQUAL_64(0x1222222222222221, x9); 8576 8577 ASSERT_EQUAL_32(0xffffffff, w10); 8578 ASSERT_EQUAL_32(INT32_C(1) << 30, w11); 8579 ASSERT_EQUAL_32(0xf89abcdd, w12); 8580 ASSERT_EQUAL_32(0x91111110, w13); 8581 ASSERT_EQUAL_32(0x9a222221, w14); 8582 8583 ASSERT_EQUAL_64(0xffffffffffffffff + 1, x18); 8584 ASSERT_EQUAL_64((INT64_C(1) << 60) + 1, x19); 8585 ASSERT_EQUAL_64(0xf0123456789abcdd + 1, x20); 8586 ASSERT_EQUAL_64(0x0111111111111110 + 1, x21); 8587 ASSERT_EQUAL_64(0x1222222222222221 + 1, x22); 8588 8589 ASSERT_EQUAL_32(0xffffffff + 1, w23); 8590 ASSERT_EQUAL_32((INT32_C(1) << 30) + 1, w24); 8591 ASSERT_EQUAL_32(0xf89abcdd + 1, w25); 8592 ASSERT_EQUAL_32(0x91111110 + 1, w26); 8593 ASSERT_EQUAL_32(0x9a222221 + 1, w27); 8594 8595 TEARDOWN(); 8596 } 8597 8598 8599 TEST(adc_sbc_extend) { 8600 SETUP(); 8601 8602 START(); 8603 // Clear the C flag. 8604 __ Adds(x0, x0, Operand(0)); 8605 8606 __ Mov(x0, 0); 8607 __ Mov(x1, 1); 8608 __ Mov(x2, 0x0123456789abcdef); 8609 8610 __ Adc(x10, x1, Operand(w2, UXTB, 1)); 8611 __ Adc(x11, x1, Operand(x2, SXTH, 2)); 8612 __ Sbc(x12, x1, Operand(w2, UXTW, 4)); 8613 __ Adc(x13, x1, Operand(x2, UXTX, 4)); 8614 8615 __ Adc(w14, w1, Operand(w2, UXTB, 1)); 8616 __ Adc(w15, w1, Operand(w2, SXTH, 2)); 8617 __ Adc(w9, w1, Operand(w2, UXTW, 4)); 8618 8619 // Set the C flag. 8620 __ Cmp(w0, Operand(w0)); 8621 8622 __ Adc(x20, x1, Operand(w2, UXTB, 1)); 8623 __ Adc(x21, x1, Operand(x2, SXTH, 2)); 8624 __ Sbc(x22, x1, Operand(w2, UXTW, 4)); 8625 __ Adc(x23, x1, Operand(x2, UXTX, 4)); 8626 8627 __ Adc(w24, w1, Operand(w2, UXTB, 1)); 8628 __ Adc(w25, w1, Operand(w2, SXTH, 2)); 8629 __ Adc(w26, w1, Operand(w2, UXTW, 4)); 8630 END(); 8631 8632 RUN(); 8633 8634 ASSERT_EQUAL_64(0x1df, x10); 8635 ASSERT_EQUAL_64(0xffffffffffff37bd, x11); 8636 ASSERT_EQUAL_64(0xfffffff765432110, x12); 8637 ASSERT_EQUAL_64(0x123456789abcdef1, x13); 8638 8639 ASSERT_EQUAL_32(0x1df, w14); 8640 ASSERT_EQUAL_32(0xffff37bd, w15); 8641 ASSERT_EQUAL_32(0x9abcdef1, w9); 8642 8643 ASSERT_EQUAL_64(0x1df + 1, x20); 8644 ASSERT_EQUAL_64(0xffffffffffff37bd + 1, x21); 8645 ASSERT_EQUAL_64(0xfffffff765432110 + 1, x22); 8646 ASSERT_EQUAL_64(0x123456789abcdef1 + 1, x23); 8647 8648 ASSERT_EQUAL_32(0x1df + 1, w24); 8649 ASSERT_EQUAL_32(0xffff37bd + 1, w25); 8650 ASSERT_EQUAL_32(0x9abcdef1 + 1, w26); 8651 8652 // Check that adc correctly sets the condition flags. 8653 START(); 8654 __ Mov(x0, 0xff); 8655 __ Mov(x1, 0xffffffffffffffff); 8656 // Clear the C flag. 8657 __ Adds(x0, x0, Operand(0)); 8658 __ Adcs(x10, x0, Operand(x1, SXTX, 1)); 8659 END(); 8660 8661 RUN(); 8662 8663 ASSERT_EQUAL_NZCV(CFlag); 8664 8665 START(); 8666 __ Mov(x0, 0x7fffffffffffffff); 8667 __ Mov(x1, 1); 8668 // Clear the C flag. 8669 __ Adds(x0, x0, Operand(0)); 8670 __ Adcs(x10, x0, Operand(x1, UXTB, 2)); 8671 END(); 8672 8673 RUN(); 8674 8675 ASSERT_EQUAL_NZCV(NVFlag); 8676 8677 START(); 8678 __ Mov(x0, 0x7fffffffffffffff); 8679 // Clear the C flag. 8680 __ Adds(x0, x0, Operand(0)); 8681 __ Adcs(x10, x0, Operand(1)); 8682 END(); 8683 8684 RUN(); 8685 8686 ASSERT_EQUAL_NZCV(NVFlag); 8687 8688 TEARDOWN(); 8689 } 8690 8691 8692 TEST(adc_sbc_wide_imm) { 8693 SETUP(); 8694 8695 START(); 8696 __ Mov(x0, 0); 8697 8698 // Clear the C flag. 8699 __ Adds(x0, x0, Operand(0)); 8700 8701 __ Adc(x7, x0, Operand(0x1234567890abcdef)); 8702 __ Adc(w8, w0, Operand(0xffffffff)); 8703 __ Sbc(x9, x0, Operand(0x1234567890abcdef)); 8704 __ Sbc(w10, w0, Operand(0xffffffff)); 8705 __ Ngc(x11, Operand(0xffffffff00000000)); 8706 __ Ngc(w12, Operand(0xffff0000)); 8707 8708 // Set the C flag. 8709 __ Cmp(w0, Operand(w0)); 8710 8711 __ Adc(x18, x0, Operand(0x1234567890abcdef)); 8712 __ Adc(w19, w0, Operand(0xffffffff)); 8713 __ Sbc(x20, x0, Operand(0x1234567890abcdef)); 8714 __ Sbc(w21, w0, Operand(0xffffffff)); 8715 __ Ngc(x22, Operand(0xffffffff00000000)); 8716 __ Ngc(w23, Operand(0xffff0000)); 8717 END(); 8718 8719 RUN(); 8720 8721 ASSERT_EQUAL_64(0x1234567890abcdef, x7); 8722 ASSERT_EQUAL_64(0xffffffff, x8); 8723 ASSERT_EQUAL_64(0xedcba9876f543210, x9); 8724 ASSERT_EQUAL_64(0, x10); 8725 ASSERT_EQUAL_64(0xffffffff, x11); 8726 ASSERT_EQUAL_64(0xffff, x12); 8727 8728 ASSERT_EQUAL_64(0x1234567890abcdef + 1, x18); 8729 ASSERT_EQUAL_64(0, x19); 8730 ASSERT_EQUAL_64(0xedcba9876f543211, x20); 8731 ASSERT_EQUAL_64(1, x21); 8732 ASSERT_EQUAL_64(0x0000000100000000, x22); 8733 ASSERT_EQUAL_64(0x0000000000010000, x23); 8734 8735 TEARDOWN(); 8736 } 8737 8738 TEST(flags) { 8739 SETUP(); 8740 8741 START(); 8742 __ Mov(x0, 0); 8743 __ Mov(x1, 0x1111111111111111); 8744 __ Neg(x10, Operand(x0)); 8745 __ Neg(x11, Operand(x1)); 8746 __ Neg(w12, Operand(w1)); 8747 // Clear the C flag. 8748 __ Adds(x0, x0, Operand(0)); 8749 __ Ngc(x13, Operand(x0)); 8750 // Set the C flag. 8751 __ Cmp(x0, Operand(x0)); 8752 __ Ngc(w14, Operand(w0)); 8753 END(); 8754 8755 RUN(); 8756 8757 ASSERT_EQUAL_64(0, x10); 8758 ASSERT_EQUAL_64(-0x1111111111111111, x11); 8759 ASSERT_EQUAL_32(-0x11111111, w12); 8760 ASSERT_EQUAL_64(-1, x13); 8761 ASSERT_EQUAL_32(0, w14); 8762 8763 START(); 8764 __ Mov(x0, 0); 8765 __ Cmp(x0, Operand(x0)); 8766 END(); 8767 8768 RUN(); 8769 8770 ASSERT_EQUAL_NZCV(ZCFlag); 8771 8772 START(); 8773 __ Mov(w0, 0); 8774 __ Cmp(w0, Operand(w0)); 8775 END(); 8776 8777 RUN(); 8778 8779 ASSERT_EQUAL_NZCV(ZCFlag); 8780 8781 START(); 8782 __ Mov(x0, 0); 8783 __ Mov(x1, 0x1111111111111111); 8784 __ Cmp(x0, Operand(x1)); 8785 END(); 8786 8787 RUN(); 8788 8789 ASSERT_EQUAL_NZCV(NFlag); 8790 8791 START(); 8792 __ Mov(w0, 0); 8793 __ Mov(w1, 0x11111111); 8794 __ Cmp(w0, Operand(w1)); 8795 END(); 8796 8797 RUN(); 8798 8799 ASSERT_EQUAL_NZCV(NFlag); 8800 8801 START(); 8802 __ Mov(x1, 0x1111111111111111); 8803 __ Cmp(x1, Operand(0)); 8804 END(); 8805 8806 RUN(); 8807 8808 ASSERT_EQUAL_NZCV(CFlag); 8809 8810 START(); 8811 __ Mov(w1, 0x11111111); 8812 __ Cmp(w1, Operand(0)); 8813 END(); 8814 8815 RUN(); 8816 8817 ASSERT_EQUAL_NZCV(CFlag); 8818 8819 START(); 8820 __ Mov(x0, 1); 8821 __ Mov(x1, 0x7fffffffffffffff); 8822 __ Cmn(x1, Operand(x0)); 8823 END(); 8824 8825 RUN(); 8826 8827 ASSERT_EQUAL_NZCV(NVFlag); 8828 8829 START(); 8830 __ Mov(w0, 1); 8831 __ Mov(w1, 0x7fffffff); 8832 __ Cmn(w1, Operand(w0)); 8833 END(); 8834 8835 RUN(); 8836 8837 ASSERT_EQUAL_NZCV(NVFlag); 8838 8839 START(); 8840 __ Mov(x0, 1); 8841 __ Mov(x1, 0xffffffffffffffff); 8842 __ Cmn(x1, Operand(x0)); 8843 END(); 8844 8845 RUN(); 8846 8847 ASSERT_EQUAL_NZCV(ZCFlag); 8848 8849 START(); 8850 __ Mov(w0, 1); 8851 __ Mov(w1, 0xffffffff); 8852 __ Cmn(w1, Operand(w0)); 8853 END(); 8854 8855 RUN(); 8856 8857 ASSERT_EQUAL_NZCV(ZCFlag); 8858 8859 START(); 8860 __ Mov(w0, 0); 8861 __ Mov(w1, 1); 8862 // Clear the C flag. 8863 __ Adds(w0, w0, Operand(0)); 8864 __ Ngcs(w0, Operand(w1)); 8865 END(); 8866 8867 RUN(); 8868 8869 ASSERT_EQUAL_NZCV(NFlag); 8870 8871 START(); 8872 __ Mov(w0, 0); 8873 __ Mov(w1, 0); 8874 // Set the C flag. 8875 __ Cmp(w0, Operand(w0)); 8876 __ Ngcs(w0, Operand(w1)); 8877 END(); 8878 8879 RUN(); 8880 8881 ASSERT_EQUAL_NZCV(ZCFlag); 8882 8883 TEARDOWN(); 8884 } 8885 8886 8887 TEST(cmp_shift) { 8888 SETUP(); 8889 8890 START(); 8891 __ Mov(x18, 0xf0000000); 8892 __ Mov(x19, 0xf000000010000000); 8893 __ Mov(x20, 0xf0000000f0000000); 8894 __ Mov(x21, 0x7800000078000000); 8895 __ Mov(x22, 0x3c0000003c000000); 8896 __ Mov(x23, 0x8000000780000000); 8897 __ Mov(x24, 0x0000000f00000000); 8898 __ Mov(x25, 0x00000003c0000000); 8899 __ Mov(x26, 0x8000000780000000); 8900 __ Mov(x27, 0xc0000003); 8901 8902 __ Cmp(w20, Operand(w21, LSL, 1)); 8903 __ Mrs(x0, NZCV); 8904 8905 __ Cmp(x20, Operand(x22, LSL, 2)); 8906 __ Mrs(x1, NZCV); 8907 8908 __ Cmp(w19, Operand(w23, LSR, 3)); 8909 __ Mrs(x2, NZCV); 8910 8911 __ Cmp(x18, Operand(x24, LSR, 4)); 8912 __ Mrs(x3, NZCV); 8913 8914 __ Cmp(w20, Operand(w25, ASR, 2)); 8915 __ Mrs(x4, NZCV); 8916 8917 __ Cmp(x20, Operand(x26, ASR, 3)); 8918 __ Mrs(x5, NZCV); 8919 8920 __ Cmp(w27, Operand(w22, ROR, 28)); 8921 __ Mrs(x6, NZCV); 8922 8923 __ Cmp(x20, Operand(x21, ROR, 31)); 8924 __ Mrs(x7, NZCV); 8925 END(); 8926 8927 RUN(); 8928 8929 ASSERT_EQUAL_32(ZCFlag, w0); 8930 ASSERT_EQUAL_32(ZCFlag, w1); 8931 ASSERT_EQUAL_32(ZCFlag, w2); 8932 ASSERT_EQUAL_32(ZCFlag, w3); 8933 ASSERT_EQUAL_32(ZCFlag, w4); 8934 ASSERT_EQUAL_32(ZCFlag, w5); 8935 ASSERT_EQUAL_32(ZCFlag, w6); 8936 ASSERT_EQUAL_32(ZCFlag, w7); 8937 8938 TEARDOWN(); 8939 } 8940 8941 8942 TEST(cmp_extend) { 8943 SETUP(); 8944 8945 START(); 8946 __ Mov(w20, 0x2); 8947 __ Mov(w21, 0x1); 8948 __ Mov(x22, 0xffffffffffffffff); 8949 __ Mov(x23, 0xff); 8950 __ Mov(x24, 0xfffffffffffffffe); 8951 __ Mov(x25, 0xffff); 8952 __ Mov(x26, 0xffffffff); 8953 8954 __ Cmp(w20, Operand(w21, LSL, 1)); 8955 __ Mrs(x0, NZCV); 8956 8957 __ Cmp(x22, Operand(x23, SXTB, 0)); 8958 __ Mrs(x1, NZCV); 8959 8960 __ Cmp(x24, Operand(x23, SXTB, 1)); 8961 __ Mrs(x2, NZCV); 8962 8963 __ Cmp(x24, Operand(x23, UXTB, 1)); 8964 __ Mrs(x3, NZCV); 8965 8966 __ Cmp(w22, Operand(w25, UXTH)); 8967 __ Mrs(x4, NZCV); 8968 8969 __ Cmp(x22, Operand(x25, SXTH)); 8970 __ Mrs(x5, NZCV); 8971 8972 __ Cmp(x22, Operand(x26, UXTW)); 8973 __ Mrs(x6, NZCV); 8974 8975 __ Cmp(x24, Operand(x26, SXTW, 1)); 8976 __ Mrs(x7, NZCV); 8977 END(); 8978 8979 RUN(); 8980 8981 ASSERT_EQUAL_32(ZCFlag, w0); 8982 ASSERT_EQUAL_32(ZCFlag, w1); 8983 ASSERT_EQUAL_32(ZCFlag, w2); 8984 ASSERT_EQUAL_32(NCFlag, w3); 8985 ASSERT_EQUAL_32(NCFlag, w4); 8986 ASSERT_EQUAL_32(ZCFlag, w5); 8987 ASSERT_EQUAL_32(NCFlag, w6); 8988 ASSERT_EQUAL_32(ZCFlag, w7); 8989 8990 TEARDOWN(); 8991 } 8992 8993 8994 TEST(ccmp) { 8995 SETUP(); 8996 8997 START(); 8998 __ Mov(w16, 0); 8999 __ Mov(w17, 1); 9000 __ Cmp(w16, w16); 9001 __ Ccmp(w16, w17, NCFlag, eq); 9002 __ Mrs(x0, NZCV); 9003 9004 __ Cmp(w16, w16); 9005 __ Ccmp(w16, w17, NCFlag, ne); 9006 __ Mrs(x1, NZCV); 9007 9008 __ Cmp(x16, x16); 9009 __ Ccmn(x16, 2, NZCVFlag, eq); 9010 __ Mrs(x2, NZCV); 9011 9012 __ Cmp(x16, x16); 9013 __ Ccmn(x16, 2, NZCVFlag, ne); 9014 __ Mrs(x3, NZCV); 9015 9016 // The MacroAssembler does not allow al as a condition. 9017 { 9018 ExactAssemblyScope scope(&masm, kInstructionSize); 9019 __ ccmp(x16, x16, NZCVFlag, al); 9020 } 9021 __ Mrs(x4, NZCV); 9022 9023 // The MacroAssembler does not allow nv as a condition. 9024 { 9025 ExactAssemblyScope scope(&masm, kInstructionSize); 9026 __ ccmp(x16, x16, NZCVFlag, nv); 9027 } 9028 __ Mrs(x5, NZCV); 9029 9030 END(); 9031 9032 RUN(); 9033 9034 ASSERT_EQUAL_32(NFlag, w0); 9035 ASSERT_EQUAL_32(NCFlag, w1); 9036 ASSERT_EQUAL_32(NoFlag, w2); 9037 ASSERT_EQUAL_32(NZCVFlag, w3); 9038 ASSERT_EQUAL_32(ZCFlag, w4); 9039 ASSERT_EQUAL_32(ZCFlag, w5); 9040 9041 TEARDOWN(); 9042 } 9043 9044 9045 TEST(ccmp_wide_imm) { 9046 SETUP(); 9047 9048 START(); 9049 __ Mov(w20, 0); 9050 9051 __ Cmp(w20, Operand(w20)); 9052 __ Ccmp(w20, Operand(0x12345678), NZCVFlag, eq); 9053 __ Mrs(x0, NZCV); 9054 9055 __ Cmp(w20, Operand(w20)); 9056 __ Ccmp(x20, Operand(0xffffffffffffffff), NZCVFlag, eq); 9057 __ Mrs(x1, NZCV); 9058 END(); 9059 9060 RUN(); 9061 9062 ASSERT_EQUAL_32(NFlag, w0); 9063 ASSERT_EQUAL_32(NoFlag, w1); 9064 9065 TEARDOWN(); 9066 } 9067 9068 9069 TEST(ccmp_shift_extend) { 9070 SETUP(); 9071 9072 START(); 9073 __ Mov(w20, 0x2); 9074 __ Mov(w21, 0x1); 9075 __ Mov(x22, 0xffffffffffffffff); 9076 __ Mov(x23, 0xff); 9077 __ Mov(x24, 0xfffffffffffffffe); 9078 9079 __ Cmp(w20, Operand(w20)); 9080 __ Ccmp(w20, Operand(w21, LSL, 1), NZCVFlag, eq); 9081 __ Mrs(x0, NZCV); 9082 9083 __ Cmp(w20, Operand(w20)); 9084 __ Ccmp(x22, Operand(x23, SXTB, 0), NZCVFlag, eq); 9085 __ Mrs(x1, NZCV); 9086 9087 __ Cmp(w20, Operand(w20)); 9088 __ Ccmp(x24, Operand(x23, SXTB, 1), NZCVFlag, eq); 9089 __ Mrs(x2, NZCV); 9090 9091 __ Cmp(w20, Operand(w20)); 9092 __ Ccmp(x24, Operand(x23, UXTB, 1), NZCVFlag, eq); 9093 __ Mrs(x3, NZCV); 9094 9095 __ Cmp(w20, Operand(w20)); 9096 __ Ccmp(x24, Operand(x23, UXTB, 1), NZCVFlag, ne); 9097 __ Mrs(x4, NZCV); 9098 END(); 9099 9100 RUN(); 9101 9102 ASSERT_EQUAL_32(ZCFlag, w0); 9103 ASSERT_EQUAL_32(ZCFlag, w1); 9104 ASSERT_EQUAL_32(ZCFlag, w2); 9105 ASSERT_EQUAL_32(NCFlag, w3); 9106 ASSERT_EQUAL_32(NZCVFlag, w4); 9107 9108 TEARDOWN(); 9109 } 9110 9111 9112 TEST(csel_reg) { 9113 SETUP(); 9114 9115 START(); 9116 __ Mov(x16, 0); 9117 __ Mov(x24, 0x0000000f0000000f); 9118 __ Mov(x25, 0x0000001f0000001f); 9119 9120 __ Cmp(w16, Operand(0)); 9121 __ Csel(w0, w24, w25, eq); 9122 __ Csel(w1, w24, w25, ne); 9123 __ Csinc(w2, w24, w25, mi); 9124 __ Csinc(w3, w24, w25, pl); 9125 9126 // The MacroAssembler does not allow al or nv as a condition. 9127 { 9128 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 9129 __ csel(w13, w24, w25, al); 9130 __ csel(x14, x24, x25, nv); 9131 } 9132 9133 __ Cmp(x16, Operand(1)); 9134 __ Csinv(x4, x24, x25, gt); 9135 __ Csinv(x5, x24, x25, le); 9136 __ Csneg(x6, x24, x25, hs); 9137 __ Csneg(x7, x24, x25, lo); 9138 9139 __ Cset(w8, ne); 9140 __ Csetm(w9, ne); 9141 __ Cinc(x10, x25, ne); 9142 __ Cinv(x11, x24, ne); 9143 __ Cneg(x12, x24, ne); 9144 9145 // The MacroAssembler does not allow al or nv as a condition. 9146 { 9147 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 9148 __ csel(w15, w24, w25, al); 9149 __ csel(x17, x24, x25, nv); 9150 } 9151 9152 END(); 9153 9154 RUN(); 9155 9156 ASSERT_EQUAL_64(0x0000000f, x0); 9157 ASSERT_EQUAL_64(0x0000001f, x1); 9158 ASSERT_EQUAL_64(0x00000020, x2); 9159 ASSERT_EQUAL_64(0x0000000f, x3); 9160 ASSERT_EQUAL_64(0xffffffe0ffffffe0, x4); 9161 ASSERT_EQUAL_64(0x0000000f0000000f, x5); 9162 ASSERT_EQUAL_64(0xffffffe0ffffffe1, x6); 9163 ASSERT_EQUAL_64(0x0000000f0000000f, x7); 9164 ASSERT_EQUAL_64(0x00000001, x8); 9165 ASSERT_EQUAL_64(0xffffffff, x9); 9166 ASSERT_EQUAL_64(0x0000001f00000020, x10); 9167 ASSERT_EQUAL_64(0xfffffff0fffffff0, x11); 9168 ASSERT_EQUAL_64(0xfffffff0fffffff1, x12); 9169 ASSERT_EQUAL_64(0x0000000f, x13); 9170 ASSERT_EQUAL_64(0x0000000f0000000f, x14); 9171 ASSERT_EQUAL_64(0x0000000f, x15); 9172 ASSERT_EQUAL_64(0x0000000f0000000f, x17); 9173 9174 TEARDOWN(); 9175 } 9176 9177 9178 TEST(csel_imm) { 9179 SETUP(); 9180 9181 int values[] = {-123, -2, -1, 0, 1, 2, 123}; 9182 int n_values = sizeof(values) / sizeof(values[0]); 9183 9184 for (int i = 0; i < n_values; i++) { 9185 for (int j = 0; j < n_values; j++) { 9186 int left = values[i]; 9187 int right = values[j]; 9188 9189 START(); 9190 __ Mov(x10, 0); 9191 __ Cmp(x10, 0); 9192 __ Csel(w0, left, right, eq); 9193 __ Csel(w1, left, right, ne); 9194 __ Csel(x2, left, right, eq); 9195 __ Csel(x3, left, right, ne); 9196 9197 END(); 9198 9199 RUN(); 9200 9201 ASSERT_EQUAL_32(left, w0); 9202 ASSERT_EQUAL_32(right, w1); 9203 ASSERT_EQUAL_64(left, x2); 9204 ASSERT_EQUAL_64(right, x3); 9205 } 9206 } 9207 9208 TEARDOWN(); 9209 } 9210 9211 9212 TEST(csel_mixed) { 9213 SETUP(); 9214 9215 START(); 9216 __ Mov(x18, 0); 9217 __ Mov(x19, 0x80000000); 9218 __ Mov(x20, 0x8000000000000000); 9219 9220 __ Cmp(x18, Operand(0)); 9221 __ Csel(w0, w19, -2, ne); 9222 __ Csel(w1, w19, -1, ne); 9223 __ Csel(w2, w19, 0, ne); 9224 __ Csel(w3, w19, 1, ne); 9225 __ Csel(w4, w19, 2, ne); 9226 __ Csel(w5, w19, Operand(w19, ASR, 31), ne); 9227 __ Csel(w6, w19, Operand(w19, ROR, 1), ne); 9228 __ Csel(w7, w19, 3, eq); 9229 9230 __ Csel(x8, x20, -2, ne); 9231 __ Csel(x9, x20, -1, ne); 9232 __ Csel(x10, x20, 0, ne); 9233 __ Csel(x11, x20, 1, ne); 9234 __ Csel(x12, x20, 2, ne); 9235 __ Csel(x13, x20, Operand(x20, ASR, 63), ne); 9236 __ Csel(x14, x20, Operand(x20, ROR, 1), ne); 9237 __ Csel(x15, x20, 3, eq); 9238 9239 END(); 9240 9241 RUN(); 9242 9243 ASSERT_EQUAL_32(-2, w0); 9244 ASSERT_EQUAL_32(-1, w1); 9245 ASSERT_EQUAL_32(0, w2); 9246 ASSERT_EQUAL_32(1, w3); 9247 ASSERT_EQUAL_32(2, w4); 9248 ASSERT_EQUAL_32(-1, w5); 9249 ASSERT_EQUAL_32(0x40000000, w6); 9250 ASSERT_EQUAL_32(0x80000000, w7); 9251 9252 ASSERT_EQUAL_64(-2, x8); 9253 ASSERT_EQUAL_64(-1, x9); 9254 ASSERT_EQUAL_64(0, x10); 9255 ASSERT_EQUAL_64(1, x11); 9256 ASSERT_EQUAL_64(2, x12); 9257 ASSERT_EQUAL_64(-1, x13); 9258 ASSERT_EQUAL_64(0x4000000000000000, x14); 9259 ASSERT_EQUAL_64(0x8000000000000000, x15); 9260 9261 TEARDOWN(); 9262 } 9263 9264 9265 TEST(lslv) { 9266 SETUP(); 9267 9268 uint64_t value = 0x0123456789abcdef; 9269 int shift[] = {1, 3, 5, 9, 17, 33}; 9270 9271 START(); 9272 __ Mov(x0, value); 9273 __ Mov(w1, shift[0]); 9274 __ Mov(w2, shift[1]); 9275 __ Mov(w3, shift[2]); 9276 __ Mov(w4, shift[3]); 9277 __ Mov(w5, shift[4]); 9278 __ Mov(w6, shift[5]); 9279 9280 // The MacroAssembler does not allow zr as an argument. 9281 { 9282 ExactAssemblyScope scope(&masm, kInstructionSize); 9283 __ lslv(x0, x0, xzr); 9284 } 9285 9286 __ Lsl(x16, x0, x1); 9287 __ Lsl(x17, x0, x2); 9288 __ Lsl(x18, x0, x3); 9289 __ Lsl(x19, x0, x4); 9290 __ Lsl(x20, x0, x5); 9291 __ Lsl(x21, x0, x6); 9292 9293 __ Lsl(w22, w0, w1); 9294 __ Lsl(w23, w0, w2); 9295 __ Lsl(w24, w0, w3); 9296 __ Lsl(w25, w0, w4); 9297 __ Lsl(w26, w0, w5); 9298 __ Lsl(w27, w0, w6); 9299 END(); 9300 9301 RUN(); 9302 9303 ASSERT_EQUAL_64(value, x0); 9304 ASSERT_EQUAL_64(value << (shift[0] & 63), x16); 9305 ASSERT_EQUAL_64(value << (shift[1] & 63), x17); 9306 ASSERT_EQUAL_64(value << (shift[2] & 63), x18); 9307 ASSERT_EQUAL_64(value << (shift[3] & 63), x19); 9308 ASSERT_EQUAL_64(value << (shift[4] & 63), x20); 9309 ASSERT_EQUAL_64(value << (shift[5] & 63), x21); 9310 ASSERT_EQUAL_32(value << (shift[0] & 31), w22); 9311 ASSERT_EQUAL_32(value << (shift[1] & 31), w23); 9312 ASSERT_EQUAL_32(value << (shift[2] & 31), w24); 9313 ASSERT_EQUAL_32(value << (shift[3] & 31), w25); 9314 ASSERT_EQUAL_32(value << (shift[4] & 31), w26); 9315 ASSERT_EQUAL_32(value << (shift[5] & 31), w27); 9316 9317 TEARDOWN(); 9318 } 9319 9320 9321 TEST(lsrv) { 9322 SETUP(); 9323 9324 uint64_t value = 0x0123456789abcdef; 9325 int shift[] = {1, 3, 5, 9, 17, 33}; 9326 9327 START(); 9328 __ Mov(x0, value); 9329 __ Mov(w1, shift[0]); 9330 __ Mov(w2, shift[1]); 9331 __ Mov(w3, shift[2]); 9332 __ Mov(w4, shift[3]); 9333 __ Mov(w5, shift[4]); 9334 __ Mov(w6, shift[5]); 9335 9336 // The MacroAssembler does not allow zr as an argument. 9337 { 9338 ExactAssemblyScope scope(&masm, kInstructionSize); 9339 __ lsrv(x0, x0, xzr); 9340 } 9341 9342 __ Lsr(x16, x0, x1); 9343 __ Lsr(x17, x0, x2); 9344 __ Lsr(x18, x0, x3); 9345 __ Lsr(x19, x0, x4); 9346 __ Lsr(x20, x0, x5); 9347 __ Lsr(x21, x0, x6); 9348 9349 __ Lsr(w22, w0, w1); 9350 __ Lsr(w23, w0, w2); 9351 __ Lsr(w24, w0, w3); 9352 __ Lsr(w25, w0, w4); 9353 __ Lsr(w26, w0, w5); 9354 __ Lsr(w27, w0, w6); 9355 END(); 9356 9357 RUN(); 9358 9359 ASSERT_EQUAL_64(value, x0); 9360 ASSERT_EQUAL_64(value >> (shift[0] & 63), x16); 9361 ASSERT_EQUAL_64(value >> (shift[1] & 63), x17); 9362 ASSERT_EQUAL_64(value >> (shift[2] & 63), x18); 9363 ASSERT_EQUAL_64(value >> (shift[3] & 63), x19); 9364 ASSERT_EQUAL_64(value >> (shift[4] & 63), x20); 9365 ASSERT_EQUAL_64(value >> (shift[5] & 63), x21); 9366 9367 value &= 0xffffffff; 9368 ASSERT_EQUAL_32(value >> (shift[0] & 31), w22); 9369 ASSERT_EQUAL_32(value >> (shift[1] & 31), w23); 9370 ASSERT_EQUAL_32(value >> (shift[2] & 31), w24); 9371 ASSERT_EQUAL_32(value >> (shift[3] & 31), w25); 9372 ASSERT_EQUAL_32(value >> (shift[4] & 31), w26); 9373 ASSERT_EQUAL_32(value >> (shift[5] & 31), w27); 9374 9375 TEARDOWN(); 9376 } 9377 9378 9379 TEST(asrv) { 9380 SETUP(); 9381 9382 int64_t value = 0xfedcba98fedcba98; 9383 int shift[] = {1, 3, 5, 9, 17, 33}; 9384 9385 START(); 9386 __ Mov(x0, value); 9387 __ Mov(w1, shift[0]); 9388 __ Mov(w2, shift[1]); 9389 __ Mov(w3, shift[2]); 9390 __ Mov(w4, shift[3]); 9391 __ Mov(w5, shift[4]); 9392 __ Mov(w6, shift[5]); 9393 9394 // The MacroAssembler does not allow zr as an argument. 9395 { 9396 ExactAssemblyScope scope(&masm, kInstructionSize); 9397 __ asrv(x0, x0, xzr); 9398 } 9399 9400 __ Asr(x16, x0, x1); 9401 __ Asr(x17, x0, x2); 9402 __ Asr(x18, x0, x3); 9403 __ Asr(x19, x0, x4); 9404 __ Asr(x20, x0, x5); 9405 __ Asr(x21, x0, x6); 9406 9407 __ Asr(w22, w0, w1); 9408 __ Asr(w23, w0, w2); 9409 __ Asr(w24, w0, w3); 9410 __ Asr(w25, w0, w4); 9411 __ Asr(w26, w0, w5); 9412 __ Asr(w27, w0, w6); 9413 END(); 9414 9415 RUN(); 9416 9417 ASSERT_EQUAL_64(value, x0); 9418 ASSERT_EQUAL_64(value >> (shift[0] & 63), x16); 9419 ASSERT_EQUAL_64(value >> (shift[1] & 63), x17); 9420 ASSERT_EQUAL_64(value >> (shift[2] & 63), x18); 9421 ASSERT_EQUAL_64(value >> (shift[3] & 63), x19); 9422 ASSERT_EQUAL_64(value >> (shift[4] & 63), x20); 9423 ASSERT_EQUAL_64(value >> (shift[5] & 63), x21); 9424 9425 int32_t value32 = static_cast<int32_t>(value & 0xffffffff); 9426 ASSERT_EQUAL_32(value32 >> (shift[0] & 31), w22); 9427 ASSERT_EQUAL_32(value32 >> (shift[1] & 31), w23); 9428 ASSERT_EQUAL_32(value32 >> (shift[2] & 31), w24); 9429 ASSERT_EQUAL_32(value32 >> (shift[3] & 31), w25); 9430 ASSERT_EQUAL_32(value32 >> (shift[4] & 31), w26); 9431 ASSERT_EQUAL_32(value32 >> (shift[5] & 31), w27); 9432 9433 TEARDOWN(); 9434 } 9435 9436 9437 TEST(rorv) { 9438 SETUP(); 9439 9440 uint64_t value = 0x0123456789abcdef; 9441 int shift[] = {4, 8, 12, 16, 24, 36}; 9442 9443 START(); 9444 __ Mov(x0, value); 9445 __ Mov(w1, shift[0]); 9446 __ Mov(w2, shift[1]); 9447 __ Mov(w3, shift[2]); 9448 __ Mov(w4, shift[3]); 9449 __ Mov(w5, shift[4]); 9450 __ Mov(w6, shift[5]); 9451 9452 // The MacroAssembler does not allow zr as an argument. 9453 { 9454 ExactAssemblyScope scope(&masm, kInstructionSize); 9455 __ rorv(x0, x0, xzr); 9456 } 9457 9458 __ Ror(x16, x0, x1); 9459 __ Ror(x17, x0, x2); 9460 __ Ror(x18, x0, x3); 9461 __ Ror(x19, x0, x4); 9462 __ Ror(x20, x0, x5); 9463 __ Ror(x21, x0, x6); 9464 9465 __ Ror(w22, w0, w1); 9466 __ Ror(w23, w0, w2); 9467 __ Ror(w24, w0, w3); 9468 __ Ror(w25, w0, w4); 9469 __ Ror(w26, w0, w5); 9470 __ Ror(w27, w0, w6); 9471 END(); 9472 9473 RUN(); 9474 9475 ASSERT_EQUAL_64(value, x0); 9476 ASSERT_EQUAL_64(0xf0123456789abcde, x16); 9477 ASSERT_EQUAL_64(0xef0123456789abcd, x17); 9478 ASSERT_EQUAL_64(0xdef0123456789abc, x18); 9479 ASSERT_EQUAL_64(0xcdef0123456789ab, x19); 9480 ASSERT_EQUAL_64(0xabcdef0123456789, x20); 9481 ASSERT_EQUAL_64(0x789abcdef0123456, x21); 9482 ASSERT_EQUAL_32(0xf89abcde, w22); 9483 ASSERT_EQUAL_32(0xef89abcd, w23); 9484 ASSERT_EQUAL_32(0xdef89abc, w24); 9485 ASSERT_EQUAL_32(0xcdef89ab, w25); 9486 ASSERT_EQUAL_32(0xabcdef89, w26); 9487 ASSERT_EQUAL_32(0xf89abcde, w27); 9488 9489 TEARDOWN(); 9490 } 9491 9492 9493 TEST(bfm) { 9494 SETUP(); 9495 9496 START(); 9497 __ Mov(x1, 0x0123456789abcdef); 9498 9499 __ Mov(x10, 0x8888888888888888); 9500 __ Mov(x11, 0x8888888888888888); 9501 __ Mov(x12, 0x8888888888888888); 9502 __ Mov(x13, 0x8888888888888888); 9503 __ Mov(w20, 0x88888888); 9504 __ Mov(w21, 0x88888888); 9505 9506 __ Bfm(x10, x1, 16, 31); 9507 __ Bfm(x11, x1, 32, 15); 9508 9509 __ Bfm(w20, w1, 16, 23); 9510 __ Bfm(w21, w1, 24, 15); 9511 9512 // Aliases. 9513 __ Bfi(x12, x1, 16, 8); 9514 __ Bfxil(x13, x1, 16, 8); 9515 END(); 9516 9517 RUN(); 9518 9519 9520 ASSERT_EQUAL_64(0x88888888888889ab, x10); 9521 ASSERT_EQUAL_64(0x8888cdef88888888, x11); 9522 9523 ASSERT_EQUAL_32(0x888888ab, w20); 9524 ASSERT_EQUAL_32(0x88cdef88, w21); 9525 9526 ASSERT_EQUAL_64(0x8888888888ef8888, x12); 9527 ASSERT_EQUAL_64(0x88888888888888ab, x13); 9528 9529 TEARDOWN(); 9530 } 9531 9532 9533 TEST(sbfm) { 9534 SETUP(); 9535 9536 START(); 9537 __ Mov(x1, 0x0123456789abcdef); 9538 __ Mov(x2, 0xfedcba9876543210); 9539 9540 __ Sbfm(x10, x1, 16, 31); 9541 __ Sbfm(x11, x1, 32, 15); 9542 __ Sbfm(x12, x1, 32, 47); 9543 __ Sbfm(x13, x1, 48, 35); 9544 9545 __ Sbfm(w14, w1, 16, 23); 9546 __ Sbfm(w15, w1, 24, 15); 9547 __ Sbfm(w16, w2, 16, 23); 9548 __ Sbfm(w17, w2, 24, 15); 9549 9550 // Aliases. 9551 __ Asr(x18, x1, 32); 9552 __ Asr(x19, x2, 32); 9553 __ Sbfiz(x20, x1, 8, 16); 9554 __ Sbfiz(x21, x2, 8, 16); 9555 __ Sbfx(x22, x1, 8, 16); 9556 __ Sbfx(x23, x2, 8, 16); 9557 __ Sxtb(x24, w1); 9558 __ Sxtb(x25, x2); 9559 __ Sxth(x26, w1); 9560 __ Sxth(x27, x2); 9561 __ Sxtw(x28, w1); 9562 __ Sxtw(x29, x2); 9563 END(); 9564 9565 RUN(); 9566 9567 9568 ASSERT_EQUAL_64(0xffffffffffff89ab, x10); 9569 ASSERT_EQUAL_64(0xffffcdef00000000, x11); 9570 ASSERT_EQUAL_64(0x0000000000004567, x12); 9571 ASSERT_EQUAL_64(0x000789abcdef0000, x13); 9572 9573 ASSERT_EQUAL_32(0xffffffab, w14); 9574 ASSERT_EQUAL_32(0xffcdef00, w15); 9575 ASSERT_EQUAL_32(0x00000054, w16); 9576 ASSERT_EQUAL_32(0x00321000, w17); 9577 9578 ASSERT_EQUAL_64(0x0000000001234567, x18); 9579 ASSERT_EQUAL_64(0xfffffffffedcba98, x19); 9580 ASSERT_EQUAL_64(0xffffffffffcdef00, x20); 9581 ASSERT_EQUAL_64(0x0000000000321000, x21); 9582 ASSERT_EQUAL_64(0xffffffffffffabcd, x22); 9583 ASSERT_EQUAL_64(0x0000000000005432, x23); 9584 ASSERT_EQUAL_64(0xffffffffffffffef, x24); 9585 ASSERT_EQUAL_64(0x0000000000000010, x25); 9586 ASSERT_EQUAL_64(0xffffffffffffcdef, x26); 9587 ASSERT_EQUAL_64(0x0000000000003210, x27); 9588 ASSERT_EQUAL_64(0xffffffff89abcdef, x28); 9589 ASSERT_EQUAL_64(0x0000000076543210, x29); 9590 9591 TEARDOWN(); 9592 } 9593 9594 9595 TEST(ubfm) { 9596 SETUP(); 9597 9598 START(); 9599 __ Mov(x1, 0x0123456789abcdef); 9600 __ Mov(x2, 0xfedcba9876543210); 9601 9602 __ Mov(x10, 0x8888888888888888); 9603 __ Mov(x11, 0x8888888888888888); 9604 9605 __ Ubfm(x10, x1, 16, 31); 9606 __ Ubfm(x11, x1, 32, 15); 9607 __ Ubfm(x12, x1, 32, 47); 9608 __ Ubfm(x13, x1, 48, 35); 9609 9610 __ Ubfm(w25, w1, 16, 23); 9611 __ Ubfm(w26, w1, 24, 15); 9612 __ Ubfm(w27, w2, 16, 23); 9613 __ Ubfm(w28, w2, 24, 15); 9614 9615 // Aliases 9616 __ Lsl(x15, x1, 63); 9617 __ Lsl(x16, x1, 0); 9618 __ Lsr(x17, x1, 32); 9619 __ Ubfiz(x18, x1, 8, 16); 9620 __ Ubfx(x19, x1, 8, 16); 9621 __ Uxtb(x20, x1); 9622 __ Uxth(x21, x1); 9623 __ Uxtw(x22, x1); 9624 END(); 9625 9626 RUN(); 9627 9628 ASSERT_EQUAL_64(0x00000000000089ab, x10); 9629 ASSERT_EQUAL_64(0x0000cdef00000000, x11); 9630 ASSERT_EQUAL_64(0x0000000000004567, x12); 9631 ASSERT_EQUAL_64(0x000789abcdef0000, x13); 9632 9633 ASSERT_EQUAL_32(0x000000ab, w25); 9634 ASSERT_EQUAL_32(0x00cdef00, w26); 9635 ASSERT_EQUAL_32(0x00000054, w27); 9636 ASSERT_EQUAL_32(0x00321000, w28); 9637 9638 ASSERT_EQUAL_64(0x8000000000000000, x15); 9639 ASSERT_EQUAL_64(0x0123456789abcdef, x16); 9640 ASSERT_EQUAL_64(0x0000000001234567, x17); 9641 ASSERT_EQUAL_64(0x0000000000cdef00, x18); 9642 ASSERT_EQUAL_64(0x000000000000abcd, x19); 9643 ASSERT_EQUAL_64(0x00000000000000ef, x20); 9644 ASSERT_EQUAL_64(0x000000000000cdef, x21); 9645 ASSERT_EQUAL_64(0x0000000089abcdef, x22); 9646 9647 TEARDOWN(); 9648 } 9649 9650 9651 TEST(extr) { 9652 SETUP(); 9653 9654 START(); 9655 __ Mov(x1, 0x0123456789abcdef); 9656 __ Mov(x2, 0xfedcba9876543210); 9657 9658 __ Extr(w10, w1, w2, 0); 9659 __ Extr(w11, w1, w2, 1); 9660 __ Extr(x12, x2, x1, 2); 9661 9662 __ Ror(w13, w1, 0); 9663 __ Ror(w14, w2, 17); 9664 __ Ror(w15, w1, 31); 9665 __ Ror(x18, x2, 0); 9666 __ Ror(x19, x2, 1); 9667 __ Ror(x20, x1, 63); 9668 END(); 9669 9670 RUN(); 9671 9672 ASSERT_EQUAL_64(0x76543210, x10); 9673 ASSERT_EQUAL_64(0xbb2a1908, x11); 9674 ASSERT_EQUAL_64(0x0048d159e26af37b, x12); 9675 ASSERT_EQUAL_64(0x89abcdef, x13); 9676 ASSERT_EQUAL_64(0x19083b2a, x14); 9677 ASSERT_EQUAL_64(0x13579bdf, x15); 9678 ASSERT_EQUAL_64(0xfedcba9876543210, x18); 9679 ASSERT_EQUAL_64(0x7f6e5d4c3b2a1908, x19); 9680 ASSERT_EQUAL_64(0x02468acf13579bde, x20); 9681 9682 TEARDOWN(); 9683 } 9684 9685 9686 TEST(fmov_imm) { 9687 SETUP(); 9688 9689 START(); 9690 __ Fmov(s11, 1.0); 9691 __ Fmov(d22, -13.0); 9692 __ Fmov(s1, 255.0); 9693 __ Fmov(d2, 12.34567); 9694 __ Fmov(s3, 0.0); 9695 __ Fmov(d4, 0.0); 9696 __ Fmov(s5, kFP32PositiveInfinity); 9697 __ Fmov(d6, kFP64NegativeInfinity); 9698 END(); 9699 9700 RUN(); 9701 9702 ASSERT_EQUAL_FP32(1.0, s11); 9703 ASSERT_EQUAL_FP64(-13.0, d22); 9704 ASSERT_EQUAL_FP32(255.0, s1); 9705 ASSERT_EQUAL_FP64(12.34567, d2); 9706 ASSERT_EQUAL_FP32(0.0, s3); 9707 ASSERT_EQUAL_FP64(0.0, d4); 9708 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s5); 9709 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d6); 9710 9711 TEARDOWN(); 9712 } 9713 9714 9715 TEST(fmov_reg) { 9716 SETUP(); 9717 9718 START(); 9719 __ Fmov(s20, 1.0); 9720 __ Fmov(w10, s20); 9721 __ Fmov(s30, w10); 9722 __ Fmov(s5, s20); 9723 __ Fmov(d1, -13.0); 9724 __ Fmov(x1, d1); 9725 __ Fmov(d2, x1); 9726 __ Fmov(d4, d1); 9727 __ Fmov(d6, RawbitsToDouble(0x0123456789abcdef)); 9728 __ Fmov(s6, s6); 9729 9730 __ Fmov(d0, 0.0); 9731 __ Fmov(v0.D(), 1, x1); 9732 __ Fmov(x2, v0.D(), 1); 9733 9734 END(); 9735 9736 RUN(); 9737 9738 ASSERT_EQUAL_32(FloatToRawbits(1.0), w10); 9739 ASSERT_EQUAL_FP32(1.0, s30); 9740 ASSERT_EQUAL_FP32(1.0, s5); 9741 ASSERT_EQUAL_64(DoubleToRawbits(-13.0), x1); 9742 ASSERT_EQUAL_FP64(-13.0, d2); 9743 ASSERT_EQUAL_FP64(-13.0, d4); 9744 ASSERT_EQUAL_FP32(RawbitsToFloat(0x89abcdef), s6); 9745 ASSERT_EQUAL_128(DoubleToRawbits(-13.0), 0x0000000000000000, q0); 9746 ASSERT_EQUAL_64(DoubleToRawbits(-13.0), x2); 9747 TEARDOWN(); 9748 } 9749 9750 9751 TEST(fadd) { 9752 SETUP(); 9753 9754 START(); 9755 __ Fmov(s14, -0.0f); 9756 __ Fmov(s15, kFP32PositiveInfinity); 9757 __ Fmov(s16, kFP32NegativeInfinity); 9758 __ Fmov(s17, 3.25f); 9759 __ Fmov(s18, 1.0f); 9760 __ Fmov(s19, 0.0f); 9761 9762 __ Fmov(d26, -0.0); 9763 __ Fmov(d27, kFP64PositiveInfinity); 9764 __ Fmov(d28, kFP64NegativeInfinity); 9765 __ Fmov(d29, 0.0); 9766 __ Fmov(d30, -2.0); 9767 __ Fmov(d31, 2.25); 9768 9769 __ Fadd(s0, s17, s18); 9770 __ Fadd(s1, s18, s19); 9771 __ Fadd(s2, s14, s18); 9772 __ Fadd(s3, s15, s18); 9773 __ Fadd(s4, s16, s18); 9774 __ Fadd(s5, s15, s16); 9775 __ Fadd(s6, s16, s15); 9776 9777 __ Fadd(d7, d30, d31); 9778 __ Fadd(d8, d29, d31); 9779 __ Fadd(d9, d26, d31); 9780 __ Fadd(d10, d27, d31); 9781 __ Fadd(d11, d28, d31); 9782 __ Fadd(d12, d27, d28); 9783 __ Fadd(d13, d28, d27); 9784 END(); 9785 9786 RUN(); 9787 9788 ASSERT_EQUAL_FP32(4.25, s0); 9789 ASSERT_EQUAL_FP32(1.0, s1); 9790 ASSERT_EQUAL_FP32(1.0, s2); 9791 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s3); 9792 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s4); 9793 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s5); 9794 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6); 9795 ASSERT_EQUAL_FP64(0.25, d7); 9796 ASSERT_EQUAL_FP64(2.25, d8); 9797 ASSERT_EQUAL_FP64(2.25, d9); 9798 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d10); 9799 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d11); 9800 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d12); 9801 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13); 9802 9803 TEARDOWN(); 9804 } 9805 9806 9807 TEST(fsub) { 9808 SETUP(); 9809 9810 START(); 9811 __ Fmov(s14, -0.0f); 9812 __ Fmov(s15, kFP32PositiveInfinity); 9813 __ Fmov(s16, kFP32NegativeInfinity); 9814 __ Fmov(s17, 3.25f); 9815 __ Fmov(s18, 1.0f); 9816 __ Fmov(s19, 0.0f); 9817 9818 __ Fmov(d26, -0.0); 9819 __ Fmov(d27, kFP64PositiveInfinity); 9820 __ Fmov(d28, kFP64NegativeInfinity); 9821 __ Fmov(d29, 0.0); 9822 __ Fmov(d30, -2.0); 9823 __ Fmov(d31, 2.25); 9824 9825 __ Fsub(s0, s17, s18); 9826 __ Fsub(s1, s18, s19); 9827 __ Fsub(s2, s14, s18); 9828 __ Fsub(s3, s18, s15); 9829 __ Fsub(s4, s18, s16); 9830 __ Fsub(s5, s15, s15); 9831 __ Fsub(s6, s16, s16); 9832 9833 __ Fsub(d7, d30, d31); 9834 __ Fsub(d8, d29, d31); 9835 __ Fsub(d9, d26, d31); 9836 __ Fsub(d10, d31, d27); 9837 __ Fsub(d11, d31, d28); 9838 __ Fsub(d12, d27, d27); 9839 __ Fsub(d13, d28, d28); 9840 END(); 9841 9842 RUN(); 9843 9844 ASSERT_EQUAL_FP32(2.25, s0); 9845 ASSERT_EQUAL_FP32(1.0, s1); 9846 ASSERT_EQUAL_FP32(-1.0, s2); 9847 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s3); 9848 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s4); 9849 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s5); 9850 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6); 9851 ASSERT_EQUAL_FP64(-4.25, d7); 9852 ASSERT_EQUAL_FP64(-2.25, d8); 9853 ASSERT_EQUAL_FP64(-2.25, d9); 9854 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d10); 9855 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d11); 9856 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d12); 9857 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13); 9858 9859 TEARDOWN(); 9860 } 9861 9862 9863 TEST(fmul) { 9864 SETUP(); 9865 9866 START(); 9867 __ Fmov(s14, -0.0f); 9868 __ Fmov(s15, kFP32PositiveInfinity); 9869 __ Fmov(s16, kFP32NegativeInfinity); 9870 __ Fmov(s17, 3.25f); 9871 __ Fmov(s18, 2.0f); 9872 __ Fmov(s19, 0.0f); 9873 __ Fmov(s20, -2.0f); 9874 9875 __ Fmov(d26, -0.0); 9876 __ Fmov(d27, kFP64PositiveInfinity); 9877 __ Fmov(d28, kFP64NegativeInfinity); 9878 __ Fmov(d29, 0.0); 9879 __ Fmov(d30, -2.0); 9880 __ Fmov(d31, 2.25); 9881 9882 __ Fmul(s0, s17, s18); 9883 __ Fmul(s1, s18, s19); 9884 __ Fmul(s2, s14, s14); 9885 __ Fmul(s3, s15, s20); 9886 __ Fmul(s4, s16, s20); 9887 __ Fmul(s5, s15, s19); 9888 __ Fmul(s6, s19, s16); 9889 9890 __ Fmul(d7, d30, d31); 9891 __ Fmul(d8, d29, d31); 9892 __ Fmul(d9, d26, d26); 9893 __ Fmul(d10, d27, d30); 9894 __ Fmul(d11, d28, d30); 9895 __ Fmul(d12, d27, d29); 9896 __ Fmul(d13, d29, d28); 9897 END(); 9898 9899 RUN(); 9900 9901 ASSERT_EQUAL_FP32(6.5, s0); 9902 ASSERT_EQUAL_FP32(0.0, s1); 9903 ASSERT_EQUAL_FP32(0.0, s2); 9904 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s3); 9905 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s4); 9906 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s5); 9907 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6); 9908 ASSERT_EQUAL_FP64(-4.5, d7); 9909 ASSERT_EQUAL_FP64(0.0, d8); 9910 ASSERT_EQUAL_FP64(0.0, d9); 9911 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d10); 9912 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d11); 9913 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d12); 9914 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13); 9915 9916 TEARDOWN(); 9917 } 9918 9919 9920 static void FmaddFmsubHelper(double n, 9921 double m, 9922 double a, 9923 double fmadd, 9924 double fmsub, 9925 double fnmadd, 9926 double fnmsub) { 9927 SETUP(); 9928 START(); 9929 9930 __ Fmov(d0, n); 9931 __ Fmov(d1, m); 9932 __ Fmov(d2, a); 9933 __ Fmadd(d28, d0, d1, d2); 9934 __ Fmsub(d29, d0, d1, d2); 9935 __ Fnmadd(d30, d0, d1, d2); 9936 __ Fnmsub(d31, d0, d1, d2); 9937 9938 END(); 9939 RUN(); 9940 9941 ASSERT_EQUAL_FP64(fmadd, d28); 9942 ASSERT_EQUAL_FP64(fmsub, d29); 9943 ASSERT_EQUAL_FP64(fnmadd, d30); 9944 ASSERT_EQUAL_FP64(fnmsub, d31); 9945 9946 TEARDOWN(); 9947 } 9948 9949 9950 TEST(fmadd_fmsub_double) { 9951 // It's hard to check the result of fused operations because the only way to 9952 // calculate the result is using fma, which is what the simulator uses anyway. 9953 9954 // Basic operation. 9955 FmaddFmsubHelper(1.0, 2.0, 3.0, 5.0, 1.0, -5.0, -1.0); 9956 FmaddFmsubHelper(-1.0, 2.0, 3.0, 1.0, 5.0, -1.0, -5.0); 9957 9958 // Check the sign of exact zeroes. 9959 // n m a fmadd fmsub fnmadd fnmsub 9960 FmaddFmsubHelper(-0.0, +0.0, -0.0, -0.0, +0.0, +0.0, +0.0); 9961 FmaddFmsubHelper(+0.0, +0.0, -0.0, +0.0, -0.0, +0.0, +0.0); 9962 FmaddFmsubHelper(+0.0, +0.0, +0.0, +0.0, +0.0, -0.0, +0.0); 9963 FmaddFmsubHelper(-0.0, +0.0, +0.0, +0.0, +0.0, +0.0, -0.0); 9964 FmaddFmsubHelper(+0.0, -0.0, -0.0, -0.0, +0.0, +0.0, +0.0); 9965 FmaddFmsubHelper(-0.0, -0.0, -0.0, +0.0, -0.0, +0.0, +0.0); 9966 FmaddFmsubHelper(-0.0, -0.0, +0.0, +0.0, +0.0, -0.0, +0.0); 9967 FmaddFmsubHelper(+0.0, -0.0, +0.0, +0.0, +0.0, +0.0, -0.0); 9968 9969 // Check NaN generation. 9970 FmaddFmsubHelper(kFP64PositiveInfinity, 9971 0.0, 9972 42.0, 9973 kFP64DefaultNaN, 9974 kFP64DefaultNaN, 9975 kFP64DefaultNaN, 9976 kFP64DefaultNaN); 9977 FmaddFmsubHelper(0.0, 9978 kFP64PositiveInfinity, 9979 42.0, 9980 kFP64DefaultNaN, 9981 kFP64DefaultNaN, 9982 kFP64DefaultNaN, 9983 kFP64DefaultNaN); 9984 FmaddFmsubHelper(kFP64PositiveInfinity, 9985 1.0, 9986 kFP64PositiveInfinity, 9987 kFP64PositiveInfinity, // inf + ( inf * 1) = inf 9988 kFP64DefaultNaN, // inf + (-inf * 1) = NaN 9989 kFP64NegativeInfinity, // -inf + (-inf * 1) = -inf 9990 kFP64DefaultNaN); // -inf + ( inf * 1) = NaN 9991 FmaddFmsubHelper(kFP64NegativeInfinity, 9992 1.0, 9993 kFP64PositiveInfinity, 9994 kFP64DefaultNaN, // inf + (-inf * 1) = NaN 9995 kFP64PositiveInfinity, // inf + ( inf * 1) = inf 9996 kFP64DefaultNaN, // -inf + ( inf * 1) = NaN 9997 kFP64NegativeInfinity); // -inf + (-inf * 1) = -inf 9998 } 9999 10000 10001 static void FmaddFmsubHelper(float n, 10002 float m, 10003 float a, 10004 float fmadd, 10005 float fmsub, 10006 float fnmadd, 10007 float fnmsub) { 10008 SETUP(); 10009 START(); 10010 10011 __ Fmov(s0, n); 10012 __ Fmov(s1, m); 10013 __ Fmov(s2, a); 10014 __ Fmadd(s28, s0, s1, s2); 10015 __ Fmsub(s29, s0, s1, s2); 10016 __ Fnmadd(s30, s0, s1, s2); 10017 __ Fnmsub(s31, s0, s1, s2); 10018 10019 END(); 10020 RUN(); 10021 10022 ASSERT_EQUAL_FP32(fmadd, s28); 10023 ASSERT_EQUAL_FP32(fmsub, s29); 10024 ASSERT_EQUAL_FP32(fnmadd, s30); 10025 ASSERT_EQUAL_FP32(fnmsub, s31); 10026 10027 TEARDOWN(); 10028 } 10029 10030 10031 TEST(fmadd_fmsub_float) { 10032 // It's hard to check the result of fused operations because the only way to 10033 // calculate the result is using fma, which is what the simulator uses anyway. 10034 10035 // Basic operation. 10036 FmaddFmsubHelper(1.0f, 2.0f, 3.0f, 5.0f, 1.0f, -5.0f, -1.0f); 10037 FmaddFmsubHelper(-1.0f, 2.0f, 3.0f, 1.0f, 5.0f, -1.0f, -5.0f); 10038 10039 // Check the sign of exact zeroes. 10040 // n m a fmadd fmsub fnmadd fnmsub 10041 FmaddFmsubHelper(-0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f, +0.0f); 10042 FmaddFmsubHelper(+0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, +0.0f); 10043 FmaddFmsubHelper(+0.0f, +0.0f, +0.0f, +0.0f, +0.0f, -0.0f, +0.0f); 10044 FmaddFmsubHelper(-0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, -0.0f); 10045 FmaddFmsubHelper(+0.0f, -0.0f, -0.0f, -0.0f, +0.0f, +0.0f, +0.0f); 10046 FmaddFmsubHelper(-0.0f, -0.0f, -0.0f, +0.0f, -0.0f, +0.0f, +0.0f); 10047 FmaddFmsubHelper(-0.0f, -0.0f, +0.0f, +0.0f, +0.0f, -0.0f, +0.0f); 10048 FmaddFmsubHelper(+0.0f, -0.0f, +0.0f, +0.0f, +0.0f, +0.0f, -0.0f); 10049 10050 // Check NaN generation. 10051 FmaddFmsubHelper(kFP32PositiveInfinity, 10052 0.0f, 10053 42.0f, 10054 kFP32DefaultNaN, 10055 kFP32DefaultNaN, 10056 kFP32DefaultNaN, 10057 kFP32DefaultNaN); 10058 FmaddFmsubHelper(0.0f, 10059 kFP32PositiveInfinity, 10060 42.0f, 10061 kFP32DefaultNaN, 10062 kFP32DefaultNaN, 10063 kFP32DefaultNaN, 10064 kFP32DefaultNaN); 10065 FmaddFmsubHelper(kFP32PositiveInfinity, 10066 1.0f, 10067 kFP32PositiveInfinity, 10068 kFP32PositiveInfinity, // inf + ( inf * 1) = inf 10069 kFP32DefaultNaN, // inf + (-inf * 1) = NaN 10070 kFP32NegativeInfinity, // -inf + (-inf * 1) = -inf 10071 kFP32DefaultNaN); // -inf + ( inf * 1) = NaN 10072 FmaddFmsubHelper(kFP32NegativeInfinity, 10073 1.0f, 10074 kFP32PositiveInfinity, 10075 kFP32DefaultNaN, // inf + (-inf * 1) = NaN 10076 kFP32PositiveInfinity, // inf + ( inf * 1) = inf 10077 kFP32DefaultNaN, // -inf + ( inf * 1) = NaN 10078 kFP32NegativeInfinity); // -inf + (-inf * 1) = -inf 10079 } 10080 10081 10082 TEST(fmadd_fmsub_double_nans) { 10083 // Make sure that NaN propagation works correctly. 10084 double s1 = RawbitsToDouble(0x7ff5555511111111); 10085 double s2 = RawbitsToDouble(0x7ff5555522222222); 10086 double sa = RawbitsToDouble(0x7ff55555aaaaaaaa); 10087 double q1 = RawbitsToDouble(0x7ffaaaaa11111111); 10088 double q2 = RawbitsToDouble(0x7ffaaaaa22222222); 10089 double qa = RawbitsToDouble(0x7ffaaaaaaaaaaaaa); 10090 VIXL_ASSERT(IsSignallingNaN(s1)); 10091 VIXL_ASSERT(IsSignallingNaN(s2)); 10092 VIXL_ASSERT(IsSignallingNaN(sa)); 10093 VIXL_ASSERT(IsQuietNaN(q1)); 10094 VIXL_ASSERT(IsQuietNaN(q2)); 10095 VIXL_ASSERT(IsQuietNaN(qa)); 10096 10097 // The input NaNs after passing through ProcessNaN. 10098 double s1_proc = RawbitsToDouble(0x7ffd555511111111); 10099 double s2_proc = RawbitsToDouble(0x7ffd555522222222); 10100 double sa_proc = RawbitsToDouble(0x7ffd5555aaaaaaaa); 10101 double q1_proc = q1; 10102 double q2_proc = q2; 10103 double qa_proc = qa; 10104 VIXL_ASSERT(IsQuietNaN(s1_proc)); 10105 VIXL_ASSERT(IsQuietNaN(s2_proc)); 10106 VIXL_ASSERT(IsQuietNaN(sa_proc)); 10107 VIXL_ASSERT(IsQuietNaN(q1_proc)); 10108 VIXL_ASSERT(IsQuietNaN(q2_proc)); 10109 VIXL_ASSERT(IsQuietNaN(qa_proc)); 10110 10111 // Negated NaNs as it would be done on ARMv8 hardware. 10112 double s1_proc_neg = RawbitsToDouble(0xfffd555511111111); 10113 double sa_proc_neg = RawbitsToDouble(0xfffd5555aaaaaaaa); 10114 double q1_proc_neg = RawbitsToDouble(0xfffaaaaa11111111); 10115 double qa_proc_neg = RawbitsToDouble(0xfffaaaaaaaaaaaaa); 10116 VIXL_ASSERT(IsQuietNaN(s1_proc_neg)); 10117 VIXL_ASSERT(IsQuietNaN(sa_proc_neg)); 10118 VIXL_ASSERT(IsQuietNaN(q1_proc_neg)); 10119 VIXL_ASSERT(IsQuietNaN(qa_proc_neg)); 10120 10121 // Quiet NaNs are propagated. 10122 FmaddFmsubHelper(q1, 0, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc); 10123 FmaddFmsubHelper(0, q2, 0, q2_proc, q2_proc, q2_proc, q2_proc); 10124 FmaddFmsubHelper(0, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); 10125 FmaddFmsubHelper(q1, q2, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc); 10126 FmaddFmsubHelper(0, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); 10127 FmaddFmsubHelper(q1, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); 10128 FmaddFmsubHelper(q1, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); 10129 10130 // Signalling NaNs are propagated, and made quiet. 10131 FmaddFmsubHelper(s1, 0, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); 10132 FmaddFmsubHelper(0, s2, 0, s2_proc, s2_proc, s2_proc, s2_proc); 10133 FmaddFmsubHelper(0, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10134 FmaddFmsubHelper(s1, s2, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); 10135 FmaddFmsubHelper(0, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10136 FmaddFmsubHelper(s1, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10137 FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10138 10139 // Signalling NaNs take precedence over quiet NaNs. 10140 FmaddFmsubHelper(s1, q2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); 10141 FmaddFmsubHelper(q1, s2, qa, s2_proc, s2_proc, s2_proc, s2_proc); 10142 FmaddFmsubHelper(q1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10143 FmaddFmsubHelper(s1, s2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); 10144 FmaddFmsubHelper(q1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10145 FmaddFmsubHelper(s1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10146 FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10147 10148 // A NaN generated by the intermediate op1 * op2 overrides a quiet NaN in a. 10149 FmaddFmsubHelper(0, 10150 kFP64PositiveInfinity, 10151 qa, 10152 kFP64DefaultNaN, 10153 kFP64DefaultNaN, 10154 kFP64DefaultNaN, 10155 kFP64DefaultNaN); 10156 FmaddFmsubHelper(kFP64PositiveInfinity, 10157 0, 10158 qa, 10159 kFP64DefaultNaN, 10160 kFP64DefaultNaN, 10161 kFP64DefaultNaN, 10162 kFP64DefaultNaN); 10163 FmaddFmsubHelper(0, 10164 kFP64NegativeInfinity, 10165 qa, 10166 kFP64DefaultNaN, 10167 kFP64DefaultNaN, 10168 kFP64DefaultNaN, 10169 kFP64DefaultNaN); 10170 FmaddFmsubHelper(kFP64NegativeInfinity, 10171 0, 10172 qa, 10173 kFP64DefaultNaN, 10174 kFP64DefaultNaN, 10175 kFP64DefaultNaN, 10176 kFP64DefaultNaN); 10177 } 10178 10179 10180 TEST(fmadd_fmsub_float_nans) { 10181 // Make sure that NaN propagation works correctly. 10182 float s1 = RawbitsToFloat(0x7f951111); 10183 float s2 = RawbitsToFloat(0x7f952222); 10184 float sa = RawbitsToFloat(0x7f95aaaa); 10185 float q1 = RawbitsToFloat(0x7fea1111); 10186 float q2 = RawbitsToFloat(0x7fea2222); 10187 float qa = RawbitsToFloat(0x7feaaaaa); 10188 VIXL_ASSERT(IsSignallingNaN(s1)); 10189 VIXL_ASSERT(IsSignallingNaN(s2)); 10190 VIXL_ASSERT(IsSignallingNaN(sa)); 10191 VIXL_ASSERT(IsQuietNaN(q1)); 10192 VIXL_ASSERT(IsQuietNaN(q2)); 10193 VIXL_ASSERT(IsQuietNaN(qa)); 10194 10195 // The input NaNs after passing through ProcessNaN. 10196 float s1_proc = RawbitsToFloat(0x7fd51111); 10197 float s2_proc = RawbitsToFloat(0x7fd52222); 10198 float sa_proc = RawbitsToFloat(0x7fd5aaaa); 10199 float q1_proc = q1; 10200 float q2_proc = q2; 10201 float qa_proc = qa; 10202 VIXL_ASSERT(IsQuietNaN(s1_proc)); 10203 VIXL_ASSERT(IsQuietNaN(s2_proc)); 10204 VIXL_ASSERT(IsQuietNaN(sa_proc)); 10205 VIXL_ASSERT(IsQuietNaN(q1_proc)); 10206 VIXL_ASSERT(IsQuietNaN(q2_proc)); 10207 VIXL_ASSERT(IsQuietNaN(qa_proc)); 10208 10209 // Negated NaNs as it would be done on ARMv8 hardware. 10210 float s1_proc_neg = RawbitsToFloat(0xffd51111); 10211 float sa_proc_neg = RawbitsToFloat(0xffd5aaaa); 10212 float q1_proc_neg = RawbitsToFloat(0xffea1111); 10213 float qa_proc_neg = RawbitsToFloat(0xffeaaaaa); 10214 VIXL_ASSERT(IsQuietNaN(s1_proc_neg)); 10215 VIXL_ASSERT(IsQuietNaN(sa_proc_neg)); 10216 VIXL_ASSERT(IsQuietNaN(q1_proc_neg)); 10217 VIXL_ASSERT(IsQuietNaN(qa_proc_neg)); 10218 10219 // Quiet NaNs are propagated. 10220 FmaddFmsubHelper(q1, 0, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc); 10221 FmaddFmsubHelper(0, q2, 0, q2_proc, q2_proc, q2_proc, q2_proc); 10222 FmaddFmsubHelper(0, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); 10223 FmaddFmsubHelper(q1, q2, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc); 10224 FmaddFmsubHelper(0, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); 10225 FmaddFmsubHelper(q1, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); 10226 FmaddFmsubHelper(q1, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg); 10227 10228 // Signalling NaNs are propagated, and made quiet. 10229 FmaddFmsubHelper(s1, 0, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); 10230 FmaddFmsubHelper(0, s2, 0, s2_proc, s2_proc, s2_proc, s2_proc); 10231 FmaddFmsubHelper(0, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10232 FmaddFmsubHelper(s1, s2, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); 10233 FmaddFmsubHelper(0, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10234 FmaddFmsubHelper(s1, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10235 FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10236 10237 // Signalling NaNs take precedence over quiet NaNs. 10238 FmaddFmsubHelper(s1, q2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); 10239 FmaddFmsubHelper(q1, s2, qa, s2_proc, s2_proc, s2_proc, s2_proc); 10240 FmaddFmsubHelper(q1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10241 FmaddFmsubHelper(s1, s2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc); 10242 FmaddFmsubHelper(q1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10243 FmaddFmsubHelper(s1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10244 FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg); 10245 10246 // A NaN generated by the intermediate op1 * op2 overrides a quiet NaN in a. 10247 FmaddFmsubHelper(0, 10248 kFP32PositiveInfinity, 10249 qa, 10250 kFP32DefaultNaN, 10251 kFP32DefaultNaN, 10252 kFP32DefaultNaN, 10253 kFP32DefaultNaN); 10254 FmaddFmsubHelper(kFP32PositiveInfinity, 10255 0, 10256 qa, 10257 kFP32DefaultNaN, 10258 kFP32DefaultNaN, 10259 kFP32DefaultNaN, 10260 kFP32DefaultNaN); 10261 FmaddFmsubHelper(0, 10262 kFP32NegativeInfinity, 10263 qa, 10264 kFP32DefaultNaN, 10265 kFP32DefaultNaN, 10266 kFP32DefaultNaN, 10267 kFP32DefaultNaN); 10268 FmaddFmsubHelper(kFP32NegativeInfinity, 10269 0, 10270 qa, 10271 kFP32DefaultNaN, 10272 kFP32DefaultNaN, 10273 kFP32DefaultNaN, 10274 kFP32DefaultNaN); 10275 } 10276 10277 10278 TEST(fdiv) { 10279 SETUP(); 10280 10281 START(); 10282 __ Fmov(s14, -0.0f); 10283 __ Fmov(s15, kFP32PositiveInfinity); 10284 __ Fmov(s16, kFP32NegativeInfinity); 10285 __ Fmov(s17, 3.25f); 10286 __ Fmov(s18, 2.0f); 10287 __ Fmov(s19, 2.0f); 10288 __ Fmov(s20, -2.0f); 10289 10290 __ Fmov(d26, -0.0); 10291 __ Fmov(d27, kFP64PositiveInfinity); 10292 __ Fmov(d28, kFP64NegativeInfinity); 10293 __ Fmov(d29, 0.0); 10294 __ Fmov(d30, -2.0); 10295 __ Fmov(d31, 2.25); 10296 10297 __ Fdiv(s0, s17, s18); 10298 __ Fdiv(s1, s18, s19); 10299 __ Fdiv(s2, s14, s18); 10300 __ Fdiv(s3, s18, s15); 10301 __ Fdiv(s4, s18, s16); 10302 __ Fdiv(s5, s15, s16); 10303 __ Fdiv(s6, s14, s14); 10304 10305 __ Fdiv(d7, d31, d30); 10306 __ Fdiv(d8, d29, d31); 10307 __ Fdiv(d9, d26, d31); 10308 __ Fdiv(d10, d31, d27); 10309 __ Fdiv(d11, d31, d28); 10310 __ Fdiv(d12, d28, d27); 10311 __ Fdiv(d13, d29, d29); 10312 END(); 10313 10314 RUN(); 10315 10316 ASSERT_EQUAL_FP32(1.625f, s0); 10317 ASSERT_EQUAL_FP32(1.0f, s1); 10318 ASSERT_EQUAL_FP32(-0.0f, s2); 10319 ASSERT_EQUAL_FP32(0.0f, s3); 10320 ASSERT_EQUAL_FP32(-0.0f, s4); 10321 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s5); 10322 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6); 10323 ASSERT_EQUAL_FP64(-1.125, d7); 10324 ASSERT_EQUAL_FP64(0.0, d8); 10325 ASSERT_EQUAL_FP64(-0.0, d9); 10326 ASSERT_EQUAL_FP64(0.0, d10); 10327 ASSERT_EQUAL_FP64(-0.0, d11); 10328 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d12); 10329 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13); 10330 10331 TEARDOWN(); 10332 } 10333 10334 10335 static float MinMaxHelper(float n, 10336 float m, 10337 bool min, 10338 float quiet_nan_substitute = 0.0) { 10339 const uint64_t kFP32QuietNaNMask = 0x00400000; 10340 uint32_t raw_n = FloatToRawbits(n); 10341 uint32_t raw_m = FloatToRawbits(m); 10342 10343 if (std::isnan(n) && ((raw_n & kFP32QuietNaNMask) == 0)) { 10344 // n is signalling NaN. 10345 return RawbitsToFloat(raw_n | kFP32QuietNaNMask); 10346 } else if (std::isnan(m) && ((raw_m & kFP32QuietNaNMask) == 0)) { 10347 // m is signalling NaN. 10348 return RawbitsToFloat(raw_m | kFP32QuietNaNMask); 10349 } else if (quiet_nan_substitute == 0.0) { 10350 if (std::isnan(n)) { 10351 // n is quiet NaN. 10352 return n; 10353 } else if (std::isnan(m)) { 10354 // m is quiet NaN. 10355 return m; 10356 } 10357 } else { 10358 // Substitute n or m if one is quiet, but not both. 10359 if (std::isnan(n) && !std::isnan(m)) { 10360 // n is quiet NaN: replace with substitute. 10361 n = quiet_nan_substitute; 10362 } else if (!std::isnan(n) && std::isnan(m)) { 10363 // m is quiet NaN: replace with substitute. 10364 m = quiet_nan_substitute; 10365 } 10366 } 10367 10368 if ((n == 0.0) && (m == 0.0) && (copysign(1.0, n) != copysign(1.0, m))) { 10369 return min ? -0.0 : 0.0; 10370 } 10371 10372 return min ? fminf(n, m) : fmaxf(n, m); 10373 } 10374 10375 10376 static double MinMaxHelper(double n, 10377 double m, 10378 bool min, 10379 double quiet_nan_substitute = 0.0) { 10380 const uint64_t kFP64QuietNaNMask = 0x0008000000000000; 10381 uint64_t raw_n = DoubleToRawbits(n); 10382 uint64_t raw_m = DoubleToRawbits(m); 10383 10384 if (std::isnan(n) && ((raw_n & kFP64QuietNaNMask) == 0)) { 10385 // n is signalling NaN. 10386 return RawbitsToDouble(raw_n | kFP64QuietNaNMask); 10387 } else if (std::isnan(m) && ((raw_m & kFP64QuietNaNMask) == 0)) { 10388 // m is signalling NaN. 10389 return RawbitsToDouble(raw_m | kFP64QuietNaNMask); 10390 } else if (quiet_nan_substitute == 0.0) { 10391 if (std::isnan(n)) { 10392 // n is quiet NaN. 10393 return n; 10394 } else if (std::isnan(m)) { 10395 // m is quiet NaN. 10396 return m; 10397 } 10398 } else { 10399 // Substitute n or m if one is quiet, but not both. 10400 if (std::isnan(n) && !std::isnan(m)) { 10401 // n is quiet NaN: replace with substitute. 10402 n = quiet_nan_substitute; 10403 } else if (!std::isnan(n) && std::isnan(m)) { 10404 // m is quiet NaN: replace with substitute. 10405 m = quiet_nan_substitute; 10406 } 10407 } 10408 10409 if ((n == 0.0) && (m == 0.0) && (copysign(1.0, n) != copysign(1.0, m))) { 10410 return min ? -0.0 : 0.0; 10411 } 10412 10413 return min ? fmin(n, m) : fmax(n, m); 10414 } 10415 10416 10417 static void FminFmaxDoubleHelper( 10418 double n, double m, double min, double max, double minnm, double maxnm) { 10419 SETUP(); 10420 10421 START(); 10422 __ Fmov(d0, n); 10423 __ Fmov(d1, m); 10424 __ Fmin(d28, d0, d1); 10425 __ Fmax(d29, d0, d1); 10426 __ Fminnm(d30, d0, d1); 10427 __ Fmaxnm(d31, d0, d1); 10428 END(); 10429 10430 RUN(); 10431 10432 ASSERT_EQUAL_FP64(min, d28); 10433 ASSERT_EQUAL_FP64(max, d29); 10434 ASSERT_EQUAL_FP64(minnm, d30); 10435 ASSERT_EQUAL_FP64(maxnm, d31); 10436 10437 TEARDOWN(); 10438 } 10439 10440 10441 TEST(fmax_fmin_d) { 10442 // Use non-standard NaNs to check that the payload bits are preserved. 10443 double snan = RawbitsToDouble(0x7ff5555512345678); 10444 double qnan = RawbitsToDouble(0x7ffaaaaa87654321); 10445 10446 double snan_processed = RawbitsToDouble(0x7ffd555512345678); 10447 double qnan_processed = qnan; 10448 10449 VIXL_ASSERT(IsSignallingNaN(snan)); 10450 VIXL_ASSERT(IsQuietNaN(qnan)); 10451 VIXL_ASSERT(IsQuietNaN(snan_processed)); 10452 VIXL_ASSERT(IsQuietNaN(qnan_processed)); 10453 10454 // Bootstrap tests. 10455 FminFmaxDoubleHelper(0, 0, 0, 0, 0, 0); 10456 FminFmaxDoubleHelper(0, 1, 0, 1, 0, 1); 10457 FminFmaxDoubleHelper(kFP64PositiveInfinity, 10458 kFP64NegativeInfinity, 10459 kFP64NegativeInfinity, 10460 kFP64PositiveInfinity, 10461 kFP64NegativeInfinity, 10462 kFP64PositiveInfinity); 10463 FminFmaxDoubleHelper(snan, 10464 0, 10465 snan_processed, 10466 snan_processed, 10467 snan_processed, 10468 snan_processed); 10469 FminFmaxDoubleHelper(0, 10470 snan, 10471 snan_processed, 10472 snan_processed, 10473 snan_processed, 10474 snan_processed); 10475 FminFmaxDoubleHelper(qnan, 0, qnan_processed, qnan_processed, 0, 0); 10476 FminFmaxDoubleHelper(0, qnan, qnan_processed, qnan_processed, 0, 0); 10477 FminFmaxDoubleHelper(qnan, 10478 snan, 10479 snan_processed, 10480 snan_processed, 10481 snan_processed, 10482 snan_processed); 10483 FminFmaxDoubleHelper(snan, 10484 qnan, 10485 snan_processed, 10486 snan_processed, 10487 snan_processed, 10488 snan_processed); 10489 10490 // Iterate over all combinations of inputs. 10491 double inputs[] = {DBL_MAX, 10492 DBL_MIN, 10493 1.0, 10494 0.0, 10495 -DBL_MAX, 10496 -DBL_MIN, 10497 -1.0, 10498 -0.0, 10499 kFP64PositiveInfinity, 10500 kFP64NegativeInfinity, 10501 kFP64QuietNaN, 10502 kFP64SignallingNaN}; 10503 10504 const int count = sizeof(inputs) / sizeof(inputs[0]); 10505 10506 for (int in = 0; in < count; in++) { 10507 double n = inputs[in]; 10508 for (int im = 0; im < count; im++) { 10509 double m = inputs[im]; 10510 FminFmaxDoubleHelper(n, 10511 m, 10512 MinMaxHelper(n, m, true), 10513 MinMaxHelper(n, m, false), 10514 MinMaxHelper(n, m, true, kFP64PositiveInfinity), 10515 MinMaxHelper(n, m, false, kFP64NegativeInfinity)); 10516 } 10517 } 10518 } 10519 10520 10521 static void FminFmaxFloatHelper( 10522 float n, float m, float min, float max, float minnm, float maxnm) { 10523 SETUP(); 10524 10525 START(); 10526 __ Fmov(s0, n); 10527 __ Fmov(s1, m); 10528 __ Fmin(s28, s0, s1); 10529 __ Fmax(s29, s0, s1); 10530 __ Fminnm(s30, s0, s1); 10531 __ Fmaxnm(s31, s0, s1); 10532 END(); 10533 10534 RUN(); 10535 10536 ASSERT_EQUAL_FP32(min, s28); 10537 ASSERT_EQUAL_FP32(max, s29); 10538 ASSERT_EQUAL_FP32(minnm, s30); 10539 ASSERT_EQUAL_FP32(maxnm, s31); 10540 10541 TEARDOWN(); 10542 } 10543 10544 10545 TEST(fmax_fmin_s) { 10546 // Use non-standard NaNs to check that the payload bits are preserved. 10547 float snan = RawbitsToFloat(0x7f951234); 10548 float qnan = RawbitsToFloat(0x7fea8765); 10549 10550 float snan_processed = RawbitsToFloat(0x7fd51234); 10551 float qnan_processed = qnan; 10552 10553 VIXL_ASSERT(IsSignallingNaN(snan)); 10554 VIXL_ASSERT(IsQuietNaN(qnan)); 10555 VIXL_ASSERT(IsQuietNaN(snan_processed)); 10556 VIXL_ASSERT(IsQuietNaN(qnan_processed)); 10557 10558 // Bootstrap tests. 10559 FminFmaxFloatHelper(0, 0, 0, 0, 0, 0); 10560 FminFmaxFloatHelper(0, 1, 0, 1, 0, 1); 10561 FminFmaxFloatHelper(kFP32PositiveInfinity, 10562 kFP32NegativeInfinity, 10563 kFP32NegativeInfinity, 10564 kFP32PositiveInfinity, 10565 kFP32NegativeInfinity, 10566 kFP32PositiveInfinity); 10567 FminFmaxFloatHelper(snan, 10568 0, 10569 snan_processed, 10570 snan_processed, 10571 snan_processed, 10572 snan_processed); 10573 FminFmaxFloatHelper(0, 10574 snan, 10575 snan_processed, 10576 snan_processed, 10577 snan_processed, 10578 snan_processed); 10579 FminFmaxFloatHelper(qnan, 0, qnan_processed, qnan_processed, 0, 0); 10580 FminFmaxFloatHelper(0, qnan, qnan_processed, qnan_processed, 0, 0); 10581 FminFmaxFloatHelper(qnan, 10582 snan, 10583 snan_processed, 10584 snan_processed, 10585 snan_processed, 10586 snan_processed); 10587 FminFmaxFloatHelper(snan, 10588 qnan, 10589 snan_processed, 10590 snan_processed, 10591 snan_processed, 10592 snan_processed); 10593 10594 // Iterate over all combinations of inputs. 10595 float inputs[] = {FLT_MAX, 10596 FLT_MIN, 10597 1.0, 10598 0.0, 10599 -FLT_MAX, 10600 -FLT_MIN, 10601 -1.0, 10602 -0.0, 10603 kFP32PositiveInfinity, 10604 kFP32NegativeInfinity, 10605 kFP32QuietNaN, 10606 kFP32SignallingNaN}; 10607 10608 const int count = sizeof(inputs) / sizeof(inputs[0]); 10609 10610 for (int in = 0; in < count; in++) { 10611 float n = inputs[in]; 10612 for (int im = 0; im < count; im++) { 10613 float m = inputs[im]; 10614 FminFmaxFloatHelper(n, 10615 m, 10616 MinMaxHelper(n, m, true), 10617 MinMaxHelper(n, m, false), 10618 MinMaxHelper(n, m, true, kFP32PositiveInfinity), 10619 MinMaxHelper(n, m, false, kFP32NegativeInfinity)); 10620 } 10621 } 10622 } 10623 10624 10625 TEST(fccmp) { 10626 SETUP(); 10627 10628 START(); 10629 __ Fmov(s16, 0.0); 10630 __ Fmov(s17, 0.5); 10631 __ Fmov(d18, -0.5); 10632 __ Fmov(d19, -1.0); 10633 __ Mov(x20, 0); 10634 __ Mov(x21, 0x7ff0000000000001); // Double precision NaN. 10635 __ Fmov(d21, x21); 10636 __ Mov(w22, 0x7f800001); // Single precision NaN. 10637 __ Fmov(s22, w22); 10638 10639 __ Cmp(x20, 0); 10640 __ Fccmp(s16, s16, NoFlag, eq); 10641 __ Mrs(x0, NZCV); 10642 10643 __ Cmp(x20, 0); 10644 __ Fccmp(s16, s16, VFlag, ne); 10645 __ Mrs(x1, NZCV); 10646 10647 __ Cmp(x20, 0); 10648 __ Fccmp(s16, s17, CFlag, ge); 10649 __ Mrs(x2, NZCV); 10650 10651 __ Cmp(x20, 0); 10652 __ Fccmp(s16, s17, CVFlag, lt); 10653 __ Mrs(x3, NZCV); 10654 10655 __ Cmp(x20, 0); 10656 __ Fccmp(d18, d18, ZFlag, le); 10657 __ Mrs(x4, NZCV); 10658 10659 __ Cmp(x20, 0); 10660 __ Fccmp(d18, d18, ZVFlag, gt); 10661 __ Mrs(x5, NZCV); 10662 10663 __ Cmp(x20, 0); 10664 __ Fccmp(d18, d19, ZCVFlag, ls); 10665 __ Mrs(x6, NZCV); 10666 10667 __ Cmp(x20, 0); 10668 __ Fccmp(d18, d19, NFlag, hi); 10669 __ Mrs(x7, NZCV); 10670 10671 // The Macro Assembler does not allow al or nv as condition. 10672 { 10673 ExactAssemblyScope scope(&masm, kInstructionSize); 10674 __ fccmp(s16, s16, NFlag, al); 10675 } 10676 __ Mrs(x8, NZCV); 10677 10678 { 10679 ExactAssemblyScope scope(&masm, kInstructionSize); 10680 __ fccmp(d18, d18, NFlag, nv); 10681 } 10682 __ Mrs(x9, NZCV); 10683 10684 __ Cmp(x20, 0); 10685 __ Fccmpe(s16, s16, NoFlag, eq); 10686 __ Mrs(x10, NZCV); 10687 10688 __ Cmp(x20, 0); 10689 __ Fccmpe(d18, d19, ZCVFlag, ls); 10690 __ Mrs(x11, NZCV); 10691 10692 __ Cmp(x20, 0); 10693 __ Fccmpe(d21, d21, NoFlag, eq); 10694 __ Mrs(x12, NZCV); 10695 10696 __ Cmp(x20, 0); 10697 __ Fccmpe(s22, s22, NoFlag, eq); 10698 __ Mrs(x13, NZCV); 10699 END(); 10700 10701 RUN(); 10702 10703 ASSERT_EQUAL_32(ZCFlag, w0); 10704 ASSERT_EQUAL_32(VFlag, w1); 10705 ASSERT_EQUAL_32(NFlag, w2); 10706 ASSERT_EQUAL_32(CVFlag, w3); 10707 ASSERT_EQUAL_32(ZCFlag, w4); 10708 ASSERT_EQUAL_32(ZVFlag, w5); 10709 ASSERT_EQUAL_32(CFlag, w6); 10710 ASSERT_EQUAL_32(NFlag, w7); 10711 ASSERT_EQUAL_32(ZCFlag, w8); 10712 ASSERT_EQUAL_32(ZCFlag, w9); 10713 ASSERT_EQUAL_32(ZCFlag, w10); 10714 ASSERT_EQUAL_32(CFlag, w11); 10715 ASSERT_EQUAL_32(CVFlag, w12); 10716 ASSERT_EQUAL_32(CVFlag, w13); 10717 10718 TEARDOWN(); 10719 } 10720 10721 10722 TEST(fcmp) { 10723 SETUP(); 10724 10725 START(); 10726 10727 // Some of these tests require a floating-point scratch register assigned to 10728 // the macro assembler, but most do not. 10729 { 10730 UseScratchRegisterScope temps(&masm); 10731 temps.ExcludeAll(); 10732 temps.Include(ip0, ip1); 10733 10734 __ Fmov(s8, 0.0); 10735 __ Fmov(s9, 0.5); 10736 __ Mov(w18, 0x7f800001); // Single precision NaN. 10737 __ Fmov(s18, w18); 10738 10739 __ Fcmp(s8, s8); 10740 __ Mrs(x0, NZCV); 10741 __ Fcmp(s8, s9); 10742 __ Mrs(x1, NZCV); 10743 __ Fcmp(s9, s8); 10744 __ Mrs(x2, NZCV); 10745 __ Fcmp(s8, s18); 10746 __ Mrs(x3, NZCV); 10747 __ Fcmp(s18, s18); 10748 __ Mrs(x4, NZCV); 10749 __ Fcmp(s8, 0.0); 10750 __ Mrs(x5, NZCV); 10751 temps.Include(d0); 10752 __ Fcmp(s8, 255.0); 10753 temps.Exclude(d0); 10754 __ Mrs(x6, NZCV); 10755 10756 __ Fmov(d19, 0.0); 10757 __ Fmov(d20, 0.5); 10758 __ Mov(x21, 0x7ff0000000000001); // Double precision NaN. 10759 __ Fmov(d21, x21); 10760 10761 __ Fcmp(d19, d19); 10762 __ Mrs(x10, NZCV); 10763 __ Fcmp(d19, d20); 10764 __ Mrs(x11, NZCV); 10765 __ Fcmp(d20, d19); 10766 __ Mrs(x12, NZCV); 10767 __ Fcmp(d19, d21); 10768 __ Mrs(x13, NZCV); 10769 __ Fcmp(d21, d21); 10770 __ Mrs(x14, NZCV); 10771 __ Fcmp(d19, 0.0); 10772 __ Mrs(x15, NZCV); 10773 temps.Include(d0); 10774 __ Fcmp(d19, 12.3456); 10775 temps.Exclude(d0); 10776 __ Mrs(x16, NZCV); 10777 10778 __ Fcmpe(s8, s8); 10779 __ Mrs(x22, NZCV); 10780 __ Fcmpe(s8, 0.0); 10781 __ Mrs(x23, NZCV); 10782 __ Fcmpe(d19, d19); 10783 __ Mrs(x24, NZCV); 10784 __ Fcmpe(d19, 0.0); 10785 __ Mrs(x25, NZCV); 10786 __ Fcmpe(s18, s18); 10787 __ Mrs(x26, NZCV); 10788 __ Fcmpe(d21, d21); 10789 __ Mrs(x27, NZCV); 10790 } 10791 10792 END(); 10793 10794 RUN(); 10795 10796 ASSERT_EQUAL_32(ZCFlag, w0); 10797 ASSERT_EQUAL_32(NFlag, w1); 10798 ASSERT_EQUAL_32(CFlag, w2); 10799 ASSERT_EQUAL_32(CVFlag, w3); 10800 ASSERT_EQUAL_32(CVFlag, w4); 10801 ASSERT_EQUAL_32(ZCFlag, w5); 10802 ASSERT_EQUAL_32(NFlag, w6); 10803 ASSERT_EQUAL_32(ZCFlag, w10); 10804 ASSERT_EQUAL_32(NFlag, w11); 10805 ASSERT_EQUAL_32(CFlag, w12); 10806 ASSERT_EQUAL_32(CVFlag, w13); 10807 ASSERT_EQUAL_32(CVFlag, w14); 10808 ASSERT_EQUAL_32(ZCFlag, w15); 10809 ASSERT_EQUAL_32(NFlag, w16); 10810 ASSERT_EQUAL_32(ZCFlag, w22); 10811 ASSERT_EQUAL_32(ZCFlag, w23); 10812 ASSERT_EQUAL_32(ZCFlag, w24); 10813 ASSERT_EQUAL_32(ZCFlag, w25); 10814 ASSERT_EQUAL_32(CVFlag, w26); 10815 ASSERT_EQUAL_32(CVFlag, w27); 10816 10817 TEARDOWN(); 10818 } 10819 10820 10821 TEST(fcsel) { 10822 SETUP(); 10823 10824 START(); 10825 __ Mov(x16, 0); 10826 __ Fmov(s16, 1.0); 10827 __ Fmov(s17, 2.0); 10828 __ Fmov(d18, 3.0); 10829 __ Fmov(d19, 4.0); 10830 10831 __ Cmp(x16, 0); 10832 __ Fcsel(s0, s16, s17, eq); 10833 __ Fcsel(s1, s16, s17, ne); 10834 __ Fcsel(d2, d18, d19, eq); 10835 __ Fcsel(d3, d18, d19, ne); 10836 // The Macro Assembler does not allow al or nv as condition. 10837 { 10838 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 10839 __ fcsel(s4, s16, s17, al); 10840 __ fcsel(d5, d18, d19, nv); 10841 } 10842 END(); 10843 10844 RUN(); 10845 10846 ASSERT_EQUAL_FP32(1.0, s0); 10847 ASSERT_EQUAL_FP32(2.0, s1); 10848 ASSERT_EQUAL_FP64(3.0, d2); 10849 ASSERT_EQUAL_FP64(4.0, d3); 10850 ASSERT_EQUAL_FP32(1.0, s4); 10851 ASSERT_EQUAL_FP64(3.0, d5); 10852 10853 TEARDOWN(); 10854 } 10855 10856 10857 TEST(fneg) { 10858 SETUP(); 10859 10860 START(); 10861 __ Fmov(s16, 1.0); 10862 __ Fmov(s17, 0.0); 10863 __ Fmov(s18, kFP32PositiveInfinity); 10864 __ Fmov(d19, 1.0); 10865 __ Fmov(d20, 0.0); 10866 __ Fmov(d21, kFP64PositiveInfinity); 10867 10868 __ Fneg(s0, s16); 10869 __ Fneg(s1, s0); 10870 __ Fneg(s2, s17); 10871 __ Fneg(s3, s2); 10872 __ Fneg(s4, s18); 10873 __ Fneg(s5, s4); 10874 __ Fneg(d6, d19); 10875 __ Fneg(d7, d6); 10876 __ Fneg(d8, d20); 10877 __ Fneg(d9, d8); 10878 __ Fneg(d10, d21); 10879 __ Fneg(d11, d10); 10880 END(); 10881 10882 RUN(); 10883 10884 ASSERT_EQUAL_FP32(-1.0, s0); 10885 ASSERT_EQUAL_FP32(1.0, s1); 10886 ASSERT_EQUAL_FP32(-0.0, s2); 10887 ASSERT_EQUAL_FP32(0.0, s3); 10888 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s4); 10889 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s5); 10890 ASSERT_EQUAL_FP64(-1.0, d6); 10891 ASSERT_EQUAL_FP64(1.0, d7); 10892 ASSERT_EQUAL_FP64(-0.0, d8); 10893 ASSERT_EQUAL_FP64(0.0, d9); 10894 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d10); 10895 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d11); 10896 10897 TEARDOWN(); 10898 } 10899 10900 10901 TEST(fabs) { 10902 SETUP(); 10903 10904 START(); 10905 __ Fmov(s16, -1.0); 10906 __ Fmov(s17, -0.0); 10907 __ Fmov(s18, kFP32NegativeInfinity); 10908 __ Fmov(d19, -1.0); 10909 __ Fmov(d20, -0.0); 10910 __ Fmov(d21, kFP64NegativeInfinity); 10911 10912 __ Fabs(s0, s16); 10913 __ Fabs(s1, s0); 10914 __ Fabs(s2, s17); 10915 __ Fabs(s3, s18); 10916 __ Fabs(d4, d19); 10917 __ Fabs(d5, d4); 10918 __ Fabs(d6, d20); 10919 __ Fabs(d7, d21); 10920 END(); 10921 10922 RUN(); 10923 10924 ASSERT_EQUAL_FP32(1.0, s0); 10925 ASSERT_EQUAL_FP32(1.0, s1); 10926 ASSERT_EQUAL_FP32(0.0, s2); 10927 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s3); 10928 ASSERT_EQUAL_FP64(1.0, d4); 10929 ASSERT_EQUAL_FP64(1.0, d5); 10930 ASSERT_EQUAL_FP64(0.0, d6); 10931 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d7); 10932 10933 TEARDOWN(); 10934 } 10935 10936 10937 TEST(fsqrt) { 10938 SETUP(); 10939 10940 START(); 10941 __ Fmov(s16, 0.0); 10942 __ Fmov(s17, 1.0); 10943 __ Fmov(s18, 0.25); 10944 __ Fmov(s19, 65536.0); 10945 __ Fmov(s20, -0.0); 10946 __ Fmov(s21, kFP32PositiveInfinity); 10947 __ Fmov(s22, -1.0); 10948 __ Fmov(d23, 0.0); 10949 __ Fmov(d24, 1.0); 10950 __ Fmov(d25, 0.25); 10951 __ Fmov(d26, 4294967296.0); 10952 __ Fmov(d27, -0.0); 10953 __ Fmov(d28, kFP64PositiveInfinity); 10954 __ Fmov(d29, -1.0); 10955 10956 __ Fsqrt(s0, s16); 10957 __ Fsqrt(s1, s17); 10958 __ Fsqrt(s2, s18); 10959 __ Fsqrt(s3, s19); 10960 __ Fsqrt(s4, s20); 10961 __ Fsqrt(s5, s21); 10962 __ Fsqrt(s6, s22); 10963 __ Fsqrt(d7, d23); 10964 __ Fsqrt(d8, d24); 10965 __ Fsqrt(d9, d25); 10966 __ Fsqrt(d10, d26); 10967 __ Fsqrt(d11, d27); 10968 __ Fsqrt(d12, d28); 10969 __ Fsqrt(d13, d29); 10970 END(); 10971 10972 RUN(); 10973 10974 ASSERT_EQUAL_FP32(0.0, s0); 10975 ASSERT_EQUAL_FP32(1.0, s1); 10976 ASSERT_EQUAL_FP32(0.5, s2); 10977 ASSERT_EQUAL_FP32(256.0, s3); 10978 ASSERT_EQUAL_FP32(-0.0, s4); 10979 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s5); 10980 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6); 10981 ASSERT_EQUAL_FP64(0.0, d7); 10982 ASSERT_EQUAL_FP64(1.0, d8); 10983 ASSERT_EQUAL_FP64(0.5, d9); 10984 ASSERT_EQUAL_FP64(65536.0, d10); 10985 ASSERT_EQUAL_FP64(-0.0, d11); 10986 ASSERT_EQUAL_FP64(kFP32PositiveInfinity, d12); 10987 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13); 10988 10989 TEARDOWN(); 10990 } 10991 10992 10993 TEST(frinta) { 10994 SETUP(); 10995 10996 START(); 10997 __ Fmov(s16, 1.0); 10998 __ Fmov(s17, 1.1); 10999 __ Fmov(s18, 1.5); 11000 __ Fmov(s19, 1.9); 11001 __ Fmov(s20, 2.5); 11002 __ Fmov(s21, -1.5); 11003 __ Fmov(s22, -2.5); 11004 __ Fmov(s23, kFP32PositiveInfinity); 11005 __ Fmov(s24, kFP32NegativeInfinity); 11006 __ Fmov(s25, 0.0); 11007 __ Fmov(s26, -0.0); 11008 __ Fmov(s27, -0.2); 11009 11010 __ Frinta(s0, s16); 11011 __ Frinta(s1, s17); 11012 __ Frinta(s2, s18); 11013 __ Frinta(s3, s19); 11014 __ Frinta(s4, s20); 11015 __ Frinta(s5, s21); 11016 __ Frinta(s6, s22); 11017 __ Frinta(s7, s23); 11018 __ Frinta(s8, s24); 11019 __ Frinta(s9, s25); 11020 __ Frinta(s10, s26); 11021 __ Frinta(s11, s27); 11022 11023 __ Fmov(d16, 1.0); 11024 __ Fmov(d17, 1.1); 11025 __ Fmov(d18, 1.5); 11026 __ Fmov(d19, 1.9); 11027 __ Fmov(d20, 2.5); 11028 __ Fmov(d21, -1.5); 11029 __ Fmov(d22, -2.5); 11030 __ Fmov(d23, kFP32PositiveInfinity); 11031 __ Fmov(d24, kFP32NegativeInfinity); 11032 __ Fmov(d25, 0.0); 11033 __ Fmov(d26, -0.0); 11034 __ Fmov(d27, -0.2); 11035 11036 __ Frinta(d12, d16); 11037 __ Frinta(d13, d17); 11038 __ Frinta(d14, d18); 11039 __ Frinta(d15, d19); 11040 __ Frinta(d16, d20); 11041 __ Frinta(d17, d21); 11042 __ Frinta(d18, d22); 11043 __ Frinta(d19, d23); 11044 __ Frinta(d20, d24); 11045 __ Frinta(d21, d25); 11046 __ Frinta(d22, d26); 11047 __ Frinta(d23, d27); 11048 END(); 11049 11050 RUN(); 11051 11052 ASSERT_EQUAL_FP32(1.0, s0); 11053 ASSERT_EQUAL_FP32(1.0, s1); 11054 ASSERT_EQUAL_FP32(2.0, s2); 11055 ASSERT_EQUAL_FP32(2.0, s3); 11056 ASSERT_EQUAL_FP32(3.0, s4); 11057 ASSERT_EQUAL_FP32(-2.0, s5); 11058 ASSERT_EQUAL_FP32(-3.0, s6); 11059 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7); 11060 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8); 11061 ASSERT_EQUAL_FP32(0.0, s9); 11062 ASSERT_EQUAL_FP32(-0.0, s10); 11063 ASSERT_EQUAL_FP32(-0.0, s11); 11064 ASSERT_EQUAL_FP64(1.0, d12); 11065 ASSERT_EQUAL_FP64(1.0, d13); 11066 ASSERT_EQUAL_FP64(2.0, d14); 11067 ASSERT_EQUAL_FP64(2.0, d15); 11068 ASSERT_EQUAL_FP64(3.0, d16); 11069 ASSERT_EQUAL_FP64(-2.0, d17); 11070 ASSERT_EQUAL_FP64(-3.0, d18); 11071 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19); 11072 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20); 11073 ASSERT_EQUAL_FP64(0.0, d21); 11074 ASSERT_EQUAL_FP64(-0.0, d22); 11075 ASSERT_EQUAL_FP64(-0.0, d23); 11076 11077 TEARDOWN(); 11078 } 11079 11080 11081 TEST(frinti) { 11082 // VIXL only supports the round-to-nearest FPCR mode, so this test has the 11083 // same results as frintn. 11084 SETUP(); 11085 11086 START(); 11087 __ Fmov(s16, 1.0); 11088 __ Fmov(s17, 1.1); 11089 __ Fmov(s18, 1.5); 11090 __ Fmov(s19, 1.9); 11091 __ Fmov(s20, 2.5); 11092 __ Fmov(s21, -1.5); 11093 __ Fmov(s22, -2.5); 11094 __ Fmov(s23, kFP32PositiveInfinity); 11095 __ Fmov(s24, kFP32NegativeInfinity); 11096 __ Fmov(s25, 0.0); 11097 __ Fmov(s26, -0.0); 11098 __ Fmov(s27, -0.2); 11099 11100 __ Frinti(s0, s16); 11101 __ Frinti(s1, s17); 11102 __ Frinti(s2, s18); 11103 __ Frinti(s3, s19); 11104 __ Frinti(s4, s20); 11105 __ Frinti(s5, s21); 11106 __ Frinti(s6, s22); 11107 __ Frinti(s7, s23); 11108 __ Frinti(s8, s24); 11109 __ Frinti(s9, s25); 11110 __ Frinti(s10, s26); 11111 __ Frinti(s11, s27); 11112 11113 __ Fmov(d16, 1.0); 11114 __ Fmov(d17, 1.1); 11115 __ Fmov(d18, 1.5); 11116 __ Fmov(d19, 1.9); 11117 __ Fmov(d20, 2.5); 11118 __ Fmov(d21, -1.5); 11119 __ Fmov(d22, -2.5); 11120 __ Fmov(d23, kFP32PositiveInfinity); 11121 __ Fmov(d24, kFP32NegativeInfinity); 11122 __ Fmov(d25, 0.0); 11123 __ Fmov(d26, -0.0); 11124 __ Fmov(d27, -0.2); 11125 11126 __ Frinti(d12, d16); 11127 __ Frinti(d13, d17); 11128 __ Frinti(d14, d18); 11129 __ Frinti(d15, d19); 11130 __ Frinti(d16, d20); 11131 __ Frinti(d17, d21); 11132 __ Frinti(d18, d22); 11133 __ Frinti(d19, d23); 11134 __ Frinti(d20, d24); 11135 __ Frinti(d21, d25); 11136 __ Frinti(d22, d26); 11137 __ Frinti(d23, d27); 11138 END(); 11139 11140 RUN(); 11141 11142 ASSERT_EQUAL_FP32(1.0, s0); 11143 ASSERT_EQUAL_FP32(1.0, s1); 11144 ASSERT_EQUAL_FP32(2.0, s2); 11145 ASSERT_EQUAL_FP32(2.0, s3); 11146 ASSERT_EQUAL_FP32(2.0, s4); 11147 ASSERT_EQUAL_FP32(-2.0, s5); 11148 ASSERT_EQUAL_FP32(-2.0, s6); 11149 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7); 11150 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8); 11151 ASSERT_EQUAL_FP32(0.0, s9); 11152 ASSERT_EQUAL_FP32(-0.0, s10); 11153 ASSERT_EQUAL_FP32(-0.0, s11); 11154 ASSERT_EQUAL_FP64(1.0, d12); 11155 ASSERT_EQUAL_FP64(1.0, d13); 11156 ASSERT_EQUAL_FP64(2.0, d14); 11157 ASSERT_EQUAL_FP64(2.0, d15); 11158 ASSERT_EQUAL_FP64(2.0, d16); 11159 ASSERT_EQUAL_FP64(-2.0, d17); 11160 ASSERT_EQUAL_FP64(-2.0, d18); 11161 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19); 11162 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20); 11163 ASSERT_EQUAL_FP64(0.0, d21); 11164 ASSERT_EQUAL_FP64(-0.0, d22); 11165 ASSERT_EQUAL_FP64(-0.0, d23); 11166 11167 TEARDOWN(); 11168 } 11169 11170 11171 TEST(frintm) { 11172 SETUP(); 11173 11174 START(); 11175 __ Fmov(s16, 1.0); 11176 __ Fmov(s17, 1.1); 11177 __ Fmov(s18, 1.5); 11178 __ Fmov(s19, 1.9); 11179 __ Fmov(s20, 2.5); 11180 __ Fmov(s21, -1.5); 11181 __ Fmov(s22, -2.5); 11182 __ Fmov(s23, kFP32PositiveInfinity); 11183 __ Fmov(s24, kFP32NegativeInfinity); 11184 __ Fmov(s25, 0.0); 11185 __ Fmov(s26, -0.0); 11186 __ Fmov(s27, -0.2); 11187 11188 __ Frintm(s0, s16); 11189 __ Frintm(s1, s17); 11190 __ Frintm(s2, s18); 11191 __ Frintm(s3, s19); 11192 __ Frintm(s4, s20); 11193 __ Frintm(s5, s21); 11194 __ Frintm(s6, s22); 11195 __ Frintm(s7, s23); 11196 __ Frintm(s8, s24); 11197 __ Frintm(s9, s25); 11198 __ Frintm(s10, s26); 11199 __ Frintm(s11, s27); 11200 11201 __ Fmov(d16, 1.0); 11202 __ Fmov(d17, 1.1); 11203 __ Fmov(d18, 1.5); 11204 __ Fmov(d19, 1.9); 11205 __ Fmov(d20, 2.5); 11206 __ Fmov(d21, -1.5); 11207 __ Fmov(d22, -2.5); 11208 __ Fmov(d23, kFP32PositiveInfinity); 11209 __ Fmov(d24, kFP32NegativeInfinity); 11210 __ Fmov(d25, 0.0); 11211 __ Fmov(d26, -0.0); 11212 __ Fmov(d27, -0.2); 11213 11214 __ Frintm(d12, d16); 11215 __ Frintm(d13, d17); 11216 __ Frintm(d14, d18); 11217 __ Frintm(d15, d19); 11218 __ Frintm(d16, d20); 11219 __ Frintm(d17, d21); 11220 __ Frintm(d18, d22); 11221 __ Frintm(d19, d23); 11222 __ Frintm(d20, d24); 11223 __ Frintm(d21, d25); 11224 __ Frintm(d22, d26); 11225 __ Frintm(d23, d27); 11226 END(); 11227 11228 RUN(); 11229 11230 ASSERT_EQUAL_FP32(1.0, s0); 11231 ASSERT_EQUAL_FP32(1.0, s1); 11232 ASSERT_EQUAL_FP32(1.0, s2); 11233 ASSERT_EQUAL_FP32(1.0, s3); 11234 ASSERT_EQUAL_FP32(2.0, s4); 11235 ASSERT_EQUAL_FP32(-2.0, s5); 11236 ASSERT_EQUAL_FP32(-3.0, s6); 11237 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7); 11238 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8); 11239 ASSERT_EQUAL_FP32(0.0, s9); 11240 ASSERT_EQUAL_FP32(-0.0, s10); 11241 ASSERT_EQUAL_FP32(-1.0, s11); 11242 ASSERT_EQUAL_FP64(1.0, d12); 11243 ASSERT_EQUAL_FP64(1.0, d13); 11244 ASSERT_EQUAL_FP64(1.0, d14); 11245 ASSERT_EQUAL_FP64(1.0, d15); 11246 ASSERT_EQUAL_FP64(2.0, d16); 11247 ASSERT_EQUAL_FP64(-2.0, d17); 11248 ASSERT_EQUAL_FP64(-3.0, d18); 11249 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19); 11250 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20); 11251 ASSERT_EQUAL_FP64(0.0, d21); 11252 ASSERT_EQUAL_FP64(-0.0, d22); 11253 ASSERT_EQUAL_FP64(-1.0, d23); 11254 11255 TEARDOWN(); 11256 } 11257 11258 11259 TEST(frintn) { 11260 SETUP(); 11261 11262 START(); 11263 __ Fmov(s16, 1.0); 11264 __ Fmov(s17, 1.1); 11265 __ Fmov(s18, 1.5); 11266 __ Fmov(s19, 1.9); 11267 __ Fmov(s20, 2.5); 11268 __ Fmov(s21, -1.5); 11269 __ Fmov(s22, -2.5); 11270 __ Fmov(s23, kFP32PositiveInfinity); 11271 __ Fmov(s24, kFP32NegativeInfinity); 11272 __ Fmov(s25, 0.0); 11273 __ Fmov(s26, -0.0); 11274 __ Fmov(s27, -0.2); 11275 11276 __ Frintn(s0, s16); 11277 __ Frintn(s1, s17); 11278 __ Frintn(s2, s18); 11279 __ Frintn(s3, s19); 11280 __ Frintn(s4, s20); 11281 __ Frintn(s5, s21); 11282 __ Frintn(s6, s22); 11283 __ Frintn(s7, s23); 11284 __ Frintn(s8, s24); 11285 __ Frintn(s9, s25); 11286 __ Frintn(s10, s26); 11287 __ Frintn(s11, s27); 11288 11289 __ Fmov(d16, 1.0); 11290 __ Fmov(d17, 1.1); 11291 __ Fmov(d18, 1.5); 11292 __ Fmov(d19, 1.9); 11293 __ Fmov(d20, 2.5); 11294 __ Fmov(d21, -1.5); 11295 __ Fmov(d22, -2.5); 11296 __ Fmov(d23, kFP32PositiveInfinity); 11297 __ Fmov(d24, kFP32NegativeInfinity); 11298 __ Fmov(d25, 0.0); 11299 __ Fmov(d26, -0.0); 11300 __ Fmov(d27, -0.2); 11301 11302 __ Frintn(d12, d16); 11303 __ Frintn(d13, d17); 11304 __ Frintn(d14, d18); 11305 __ Frintn(d15, d19); 11306 __ Frintn(d16, d20); 11307 __ Frintn(d17, d21); 11308 __ Frintn(d18, d22); 11309 __ Frintn(d19, d23); 11310 __ Frintn(d20, d24); 11311 __ Frintn(d21, d25); 11312 __ Frintn(d22, d26); 11313 __ Frintn(d23, d27); 11314 END(); 11315 11316 RUN(); 11317 11318 ASSERT_EQUAL_FP32(1.0, s0); 11319 ASSERT_EQUAL_FP32(1.0, s1); 11320 ASSERT_EQUAL_FP32(2.0, s2); 11321 ASSERT_EQUAL_FP32(2.0, s3); 11322 ASSERT_EQUAL_FP32(2.0, s4); 11323 ASSERT_EQUAL_FP32(-2.0, s5); 11324 ASSERT_EQUAL_FP32(-2.0, s6); 11325 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7); 11326 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8); 11327 ASSERT_EQUAL_FP32(0.0, s9); 11328 ASSERT_EQUAL_FP32(-0.0, s10); 11329 ASSERT_EQUAL_FP32(-0.0, s11); 11330 ASSERT_EQUAL_FP64(1.0, d12); 11331 ASSERT_EQUAL_FP64(1.0, d13); 11332 ASSERT_EQUAL_FP64(2.0, d14); 11333 ASSERT_EQUAL_FP64(2.0, d15); 11334 ASSERT_EQUAL_FP64(2.0, d16); 11335 ASSERT_EQUAL_FP64(-2.0, d17); 11336 ASSERT_EQUAL_FP64(-2.0, d18); 11337 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19); 11338 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20); 11339 ASSERT_EQUAL_FP64(0.0, d21); 11340 ASSERT_EQUAL_FP64(-0.0, d22); 11341 ASSERT_EQUAL_FP64(-0.0, d23); 11342 11343 TEARDOWN(); 11344 } 11345 11346 11347 TEST(frintp) { 11348 SETUP(); 11349 11350 START(); 11351 __ Fmov(s16, 1.0); 11352 __ Fmov(s17, 1.1); 11353 __ Fmov(s18, 1.5); 11354 __ Fmov(s19, 1.9); 11355 __ Fmov(s20, 2.5); 11356 __ Fmov(s21, -1.5); 11357 __ Fmov(s22, -2.5); 11358 __ Fmov(s23, kFP32PositiveInfinity); 11359 __ Fmov(s24, kFP32NegativeInfinity); 11360 __ Fmov(s25, 0.0); 11361 __ Fmov(s26, -0.0); 11362 __ Fmov(s27, -0.2); 11363 11364 __ Frintp(s0, s16); 11365 __ Frintp(s1, s17); 11366 __ Frintp(s2, s18); 11367 __ Frintp(s3, s19); 11368 __ Frintp(s4, s20); 11369 __ Frintp(s5, s21); 11370 __ Frintp(s6, s22); 11371 __ Frintp(s7, s23); 11372 __ Frintp(s8, s24); 11373 __ Frintp(s9, s25); 11374 __ Frintp(s10, s26); 11375 __ Frintp(s11, s27); 11376 11377 __ Fmov(d16, 1.0); 11378 __ Fmov(d17, 1.1); 11379 __ Fmov(d18, 1.5); 11380 __ Fmov(d19, 1.9); 11381 __ Fmov(d20, 2.5); 11382 __ Fmov(d21, -1.5); 11383 __ Fmov(d22, -2.5); 11384 __ Fmov(d23, kFP32PositiveInfinity); 11385 __ Fmov(d24, kFP32NegativeInfinity); 11386 __ Fmov(d25, 0.0); 11387 __ Fmov(d26, -0.0); 11388 __ Fmov(d27, -0.2); 11389 11390 __ Frintp(d12, d16); 11391 __ Frintp(d13, d17); 11392 __ Frintp(d14, d18); 11393 __ Frintp(d15, d19); 11394 __ Frintp(d16, d20); 11395 __ Frintp(d17, d21); 11396 __ Frintp(d18, d22); 11397 __ Frintp(d19, d23); 11398 __ Frintp(d20, d24); 11399 __ Frintp(d21, d25); 11400 __ Frintp(d22, d26); 11401 __ Frintp(d23, d27); 11402 END(); 11403 11404 RUN(); 11405 11406 ASSERT_EQUAL_FP32(1.0, s0); 11407 ASSERT_EQUAL_FP32(2.0, s1); 11408 ASSERT_EQUAL_FP32(2.0, s2); 11409 ASSERT_EQUAL_FP32(2.0, s3); 11410 ASSERT_EQUAL_FP32(3.0, s4); 11411 ASSERT_EQUAL_FP32(-1.0, s5); 11412 ASSERT_EQUAL_FP32(-2.0, s6); 11413 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7); 11414 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8); 11415 ASSERT_EQUAL_FP32(0.0, s9); 11416 ASSERT_EQUAL_FP32(-0.0, s10); 11417 ASSERT_EQUAL_FP32(-0.0, s11); 11418 ASSERT_EQUAL_FP64(1.0, d12); 11419 ASSERT_EQUAL_FP64(2.0, d13); 11420 ASSERT_EQUAL_FP64(2.0, d14); 11421 ASSERT_EQUAL_FP64(2.0, d15); 11422 ASSERT_EQUAL_FP64(3.0, d16); 11423 ASSERT_EQUAL_FP64(-1.0, d17); 11424 ASSERT_EQUAL_FP64(-2.0, d18); 11425 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19); 11426 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20); 11427 ASSERT_EQUAL_FP64(0.0, d21); 11428 ASSERT_EQUAL_FP64(-0.0, d22); 11429 ASSERT_EQUAL_FP64(-0.0, d23); 11430 11431 TEARDOWN(); 11432 } 11433 11434 11435 TEST(frintx) { 11436 // VIXL only supports the round-to-nearest FPCR mode, and it doesn't support 11437 // FP exceptions, so this test has the same results as frintn (and frinti). 11438 SETUP(); 11439 11440 START(); 11441 __ Fmov(s16, 1.0); 11442 __ Fmov(s17, 1.1); 11443 __ Fmov(s18, 1.5); 11444 __ Fmov(s19, 1.9); 11445 __ Fmov(s20, 2.5); 11446 __ Fmov(s21, -1.5); 11447 __ Fmov(s22, -2.5); 11448 __ Fmov(s23, kFP32PositiveInfinity); 11449 __ Fmov(s24, kFP32NegativeInfinity); 11450 __ Fmov(s25, 0.0); 11451 __ Fmov(s26, -0.0); 11452 __ Fmov(s27, -0.2); 11453 11454 __ Frintx(s0, s16); 11455 __ Frintx(s1, s17); 11456 __ Frintx(s2, s18); 11457 __ Frintx(s3, s19); 11458 __ Frintx(s4, s20); 11459 __ Frintx(s5, s21); 11460 __ Frintx(s6, s22); 11461 __ Frintx(s7, s23); 11462 __ Frintx(s8, s24); 11463 __ Frintx(s9, s25); 11464 __ Frintx(s10, s26); 11465 __ Frintx(s11, s27); 11466 11467 __ Fmov(d16, 1.0); 11468 __ Fmov(d17, 1.1); 11469 __ Fmov(d18, 1.5); 11470 __ Fmov(d19, 1.9); 11471 __ Fmov(d20, 2.5); 11472 __ Fmov(d21, -1.5); 11473 __ Fmov(d22, -2.5); 11474 __ Fmov(d23, kFP32PositiveInfinity); 11475 __ Fmov(d24, kFP32NegativeInfinity); 11476 __ Fmov(d25, 0.0); 11477 __ Fmov(d26, -0.0); 11478 __ Fmov(d27, -0.2); 11479 11480 __ Frintx(d12, d16); 11481 __ Frintx(d13, d17); 11482 __ Frintx(d14, d18); 11483 __ Frintx(d15, d19); 11484 __ Frintx(d16, d20); 11485 __ Frintx(d17, d21); 11486 __ Frintx(d18, d22); 11487 __ Frintx(d19, d23); 11488 __ Frintx(d20, d24); 11489 __ Frintx(d21, d25); 11490 __ Frintx(d22, d26); 11491 __ Frintx(d23, d27); 11492 END(); 11493 11494 RUN(); 11495 11496 ASSERT_EQUAL_FP32(1.0, s0); 11497 ASSERT_EQUAL_FP32(1.0, s1); 11498 ASSERT_EQUAL_FP32(2.0, s2); 11499 ASSERT_EQUAL_FP32(2.0, s3); 11500 ASSERT_EQUAL_FP32(2.0, s4); 11501 ASSERT_EQUAL_FP32(-2.0, s5); 11502 ASSERT_EQUAL_FP32(-2.0, s6); 11503 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7); 11504 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8); 11505 ASSERT_EQUAL_FP32(0.0, s9); 11506 ASSERT_EQUAL_FP32(-0.0, s10); 11507 ASSERT_EQUAL_FP32(-0.0, s11); 11508 ASSERT_EQUAL_FP64(1.0, d12); 11509 ASSERT_EQUAL_FP64(1.0, d13); 11510 ASSERT_EQUAL_FP64(2.0, d14); 11511 ASSERT_EQUAL_FP64(2.0, d15); 11512 ASSERT_EQUAL_FP64(2.0, d16); 11513 ASSERT_EQUAL_FP64(-2.0, d17); 11514 ASSERT_EQUAL_FP64(-2.0, d18); 11515 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19); 11516 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20); 11517 ASSERT_EQUAL_FP64(0.0, d21); 11518 ASSERT_EQUAL_FP64(-0.0, d22); 11519 ASSERT_EQUAL_FP64(-0.0, d23); 11520 11521 TEARDOWN(); 11522 } 11523 11524 11525 TEST(frintz) { 11526 SETUP(); 11527 11528 START(); 11529 __ Fmov(s16, 1.0); 11530 __ Fmov(s17, 1.1); 11531 __ Fmov(s18, 1.5); 11532 __ Fmov(s19, 1.9); 11533 __ Fmov(s20, 2.5); 11534 __ Fmov(s21, -1.5); 11535 __ Fmov(s22, -2.5); 11536 __ Fmov(s23, kFP32PositiveInfinity); 11537 __ Fmov(s24, kFP32NegativeInfinity); 11538 __ Fmov(s25, 0.0); 11539 __ Fmov(s26, -0.0); 11540 11541 __ Frintz(s0, s16); 11542 __ Frintz(s1, s17); 11543 __ Frintz(s2, s18); 11544 __ Frintz(s3, s19); 11545 __ Frintz(s4, s20); 11546 __ Frintz(s5, s21); 11547 __ Frintz(s6, s22); 11548 __ Frintz(s7, s23); 11549 __ Frintz(s8, s24); 11550 __ Frintz(s9, s25); 11551 __ Frintz(s10, s26); 11552 11553 __ Fmov(d16, 1.0); 11554 __ Fmov(d17, 1.1); 11555 __ Fmov(d18, 1.5); 11556 __ Fmov(d19, 1.9); 11557 __ Fmov(d20, 2.5); 11558 __ Fmov(d21, -1.5); 11559 __ Fmov(d22, -2.5); 11560 __ Fmov(d23, kFP32PositiveInfinity); 11561 __ Fmov(d24, kFP32NegativeInfinity); 11562 __ Fmov(d25, 0.0); 11563 __ Fmov(d26, -0.0); 11564 11565 __ Frintz(d11, d16); 11566 __ Frintz(d12, d17); 11567 __ Frintz(d13, d18); 11568 __ Frintz(d14, d19); 11569 __ Frintz(d15, d20); 11570 __ Frintz(d16, d21); 11571 __ Frintz(d17, d22); 11572 __ Frintz(d18, d23); 11573 __ Frintz(d19, d24); 11574 __ Frintz(d20, d25); 11575 __ Frintz(d21, d26); 11576 END(); 11577 11578 RUN(); 11579 11580 ASSERT_EQUAL_FP32(1.0, s0); 11581 ASSERT_EQUAL_FP32(1.0, s1); 11582 ASSERT_EQUAL_FP32(1.0, s2); 11583 ASSERT_EQUAL_FP32(1.0, s3); 11584 ASSERT_EQUAL_FP32(2.0, s4); 11585 ASSERT_EQUAL_FP32(-1.0, s5); 11586 ASSERT_EQUAL_FP32(-2.0, s6); 11587 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7); 11588 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8); 11589 ASSERT_EQUAL_FP32(0.0, s9); 11590 ASSERT_EQUAL_FP32(-0.0, s10); 11591 ASSERT_EQUAL_FP64(1.0, d11); 11592 ASSERT_EQUAL_FP64(1.0, d12); 11593 ASSERT_EQUAL_FP64(1.0, d13); 11594 ASSERT_EQUAL_FP64(1.0, d14); 11595 ASSERT_EQUAL_FP64(2.0, d15); 11596 ASSERT_EQUAL_FP64(-1.0, d16); 11597 ASSERT_EQUAL_FP64(-2.0, d17); 11598 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d18); 11599 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d19); 11600 ASSERT_EQUAL_FP64(0.0, d20); 11601 ASSERT_EQUAL_FP64(-0.0, d21); 11602 11603 TEARDOWN(); 11604 } 11605 11606 11607 TEST(fcvt_ds) { 11608 SETUP(); 11609 11610 START(); 11611 __ Fmov(s16, 1.0); 11612 __ Fmov(s17, 1.1); 11613 __ Fmov(s18, 1.5); 11614 __ Fmov(s19, 1.9); 11615 __ Fmov(s20, 2.5); 11616 __ Fmov(s21, -1.5); 11617 __ Fmov(s22, -2.5); 11618 __ Fmov(s23, kFP32PositiveInfinity); 11619 __ Fmov(s24, kFP32NegativeInfinity); 11620 __ Fmov(s25, 0.0); 11621 __ Fmov(s26, -0.0); 11622 __ Fmov(s27, FLT_MAX); 11623 __ Fmov(s28, FLT_MIN); 11624 __ Fmov(s29, RawbitsToFloat(0x7fc12345)); // Quiet NaN. 11625 __ Fmov(s30, RawbitsToFloat(0x7f812345)); // Signalling NaN. 11626 11627 __ Fcvt(d0, s16); 11628 __ Fcvt(d1, s17); 11629 __ Fcvt(d2, s18); 11630 __ Fcvt(d3, s19); 11631 __ Fcvt(d4, s20); 11632 __ Fcvt(d5, s21); 11633 __ Fcvt(d6, s22); 11634 __ Fcvt(d7, s23); 11635 __ Fcvt(d8, s24); 11636 __ Fcvt(d9, s25); 11637 __ Fcvt(d10, s26); 11638 __ Fcvt(d11, s27); 11639 __ Fcvt(d12, s28); 11640 __ Fcvt(d13, s29); 11641 __ Fcvt(d14, s30); 11642 END(); 11643 11644 RUN(); 11645 11646 ASSERT_EQUAL_FP64(1.0f, d0); 11647 ASSERT_EQUAL_FP64(1.1f, d1); 11648 ASSERT_EQUAL_FP64(1.5f, d2); 11649 ASSERT_EQUAL_FP64(1.9f, d3); 11650 ASSERT_EQUAL_FP64(2.5f, d4); 11651 ASSERT_EQUAL_FP64(-1.5f, d5); 11652 ASSERT_EQUAL_FP64(-2.5f, d6); 11653 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d7); 11654 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d8); 11655 ASSERT_EQUAL_FP64(0.0f, d9); 11656 ASSERT_EQUAL_FP64(-0.0f, d10); 11657 ASSERT_EQUAL_FP64(FLT_MAX, d11); 11658 ASSERT_EQUAL_FP64(FLT_MIN, d12); 11659 11660 // Check that the NaN payload is preserved according to Aarch64 conversion 11661 // rules: 11662 // - The sign bit is preserved. 11663 // - The top bit of the mantissa is forced to 1 (making it a quiet NaN). 11664 // - The remaining mantissa bits are copied until they run out. 11665 // - The low-order bits that haven't already been assigned are set to 0. 11666 ASSERT_EQUAL_FP64(RawbitsToDouble(0x7ff82468a0000000), d13); 11667 ASSERT_EQUAL_FP64(RawbitsToDouble(0x7ff82468a0000000), d14); 11668 11669 TEARDOWN(); 11670 } 11671 11672 11673 TEST(fcvt_sd) { 11674 // Test simple conversions here. Complex behaviour (such as rounding 11675 // specifics) are tested in the simulator tests. 11676 11677 SETUP(); 11678 11679 START(); 11680 __ Fmov(d16, 1.0); 11681 __ Fmov(d17, 1.1); 11682 __ Fmov(d18, 1.5); 11683 __ Fmov(d19, 1.9); 11684 __ Fmov(d20, 2.5); 11685 __ Fmov(d21, -1.5); 11686 __ Fmov(d22, -2.5); 11687 __ Fmov(d23, kFP32PositiveInfinity); 11688 __ Fmov(d24, kFP32NegativeInfinity); 11689 __ Fmov(d25, 0.0); 11690 __ Fmov(d26, -0.0); 11691 __ Fmov(d27, FLT_MAX); 11692 __ Fmov(d28, FLT_MIN); 11693 __ Fmov(d29, RawbitsToDouble(0x7ff82468a0000000)); // Quiet NaN. 11694 __ Fmov(d30, RawbitsToDouble(0x7ff02468a0000000)); // Signalling NaN. 11695 11696 __ Fcvt(s0, d16); 11697 __ Fcvt(s1, d17); 11698 __ Fcvt(s2, d18); 11699 __ Fcvt(s3, d19); 11700 __ Fcvt(s4, d20); 11701 __ Fcvt(s5, d21); 11702 __ Fcvt(s6, d22); 11703 __ Fcvt(s7, d23); 11704 __ Fcvt(s8, d24); 11705 __ Fcvt(s9, d25); 11706 __ Fcvt(s10, d26); 11707 __ Fcvt(s11, d27); 11708 __ Fcvt(s12, d28); 11709 __ Fcvt(s13, d29); 11710 __ Fcvt(s14, d30); 11711 END(); 11712 11713 RUN(); 11714 11715 ASSERT_EQUAL_FP32(1.0f, s0); 11716 ASSERT_EQUAL_FP32(1.1f, s1); 11717 ASSERT_EQUAL_FP32(1.5f, s2); 11718 ASSERT_EQUAL_FP32(1.9f, s3); 11719 ASSERT_EQUAL_FP32(2.5f, s4); 11720 ASSERT_EQUAL_FP32(-1.5f, s5); 11721 ASSERT_EQUAL_FP32(-2.5f, s6); 11722 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7); 11723 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8); 11724 ASSERT_EQUAL_FP32(0.0f, s9); 11725 ASSERT_EQUAL_FP32(-0.0f, s10); 11726 ASSERT_EQUAL_FP32(FLT_MAX, s11); 11727 ASSERT_EQUAL_FP32(FLT_MIN, s12); 11728 11729 // Check that the NaN payload is preserved according to Aarch64 conversion 11730 // rules: 11731 // - The sign bit is preserved. 11732 // - The top bit of the mantissa is forced to 1 (making it a quiet NaN). 11733 // - The remaining mantissa bits are copied until they run out. 11734 // - The low-order bits that haven't already been assigned are set to 0. 11735 ASSERT_EQUAL_FP32(RawbitsToFloat(0x7fc12345), s13); 11736 ASSERT_EQUAL_FP32(RawbitsToFloat(0x7fc12345), s14); 11737 11738 TEARDOWN(); 11739 } 11740 11741 11742 TEST(fcvt_half) { 11743 SETUP(); 11744 11745 START(); 11746 Label done; 11747 { 11748 // Check all exact conversions from half to float and back. 11749 Label ok, fail; 11750 __ Mov(w0, 0); 11751 for (int i = 0; i < 0xffff; i += 3) { 11752 if ((i & 0x7c00) == 0x7c00) continue; 11753 __ Mov(w1, i); 11754 __ Fmov(s1, w1); 11755 __ Fcvt(s2, h1); 11756 __ Fcvt(h2, s2); 11757 __ Fmov(w2, s2); 11758 __ Cmp(w1, w2); 11759 __ B(&fail, ne); 11760 } 11761 __ B(&ok); 11762 __ Bind(&fail); 11763 __ Mov(w0, 1); 11764 __ B(&done); 11765 __ Bind(&ok); 11766 } 11767 { 11768 // Check all exact conversions from half to double and back. 11769 Label ok, fail; 11770 for (int i = 0; i < 0xffff; i += 3) { 11771 if ((i & 0x7c00) == 0x7c00) continue; 11772 __ Mov(w1, i); 11773 __ Fmov(s1, w1); 11774 __ Fcvt(d2, h1); 11775 __ Fcvt(h2, d2); 11776 __ Mov(w2, v2.S(), 0); 11777 __ Cmp(w1, w2); 11778 __ B(&fail, ne); 11779 } 11780 __ B(&ok); 11781 __ Bind(&fail); 11782 __ Mov(w0, 2); 11783 __ Bind(&ok); 11784 } 11785 __ Bind(&done); 11786 11787 // Check some other interesting values. 11788 __ Fmov(s0, kFP32PositiveInfinity); 11789 __ Fmov(s1, kFP32NegativeInfinity); 11790 __ Fmov(s2, 65504); // Max half precision. 11791 __ Fmov(s3, 6.10352e-5); // Min positive normal. 11792 __ Fmov(s4, 6.09756e-5); // Max subnormal. 11793 __ Fmov(s5, 5.96046e-8); // Min positive subnormal. 11794 __ Fmov(s6, 5e-9); // Not representable -> zero. 11795 __ Fmov(s7, -0.0); 11796 __ Fcvt(h0, s0); 11797 __ Fcvt(h1, s1); 11798 __ Fcvt(h2, s2); 11799 __ Fcvt(h3, s3); 11800 __ Fcvt(h4, s4); 11801 __ Fcvt(h5, s5); 11802 __ Fcvt(h6, s6); 11803 __ Fcvt(h7, s7); 11804 11805 __ Fmov(d20, kFP64PositiveInfinity); 11806 __ Fmov(d21, kFP64NegativeInfinity); 11807 __ Fmov(d22, 65504); // Max half precision. 11808 __ Fmov(d23, 6.10352e-5); // Min positive normal. 11809 __ Fmov(d24, 6.09756e-5); // Max subnormal. 11810 __ Fmov(d25, 5.96046e-8); // Min positive subnormal. 11811 __ Fmov(d26, 5e-9); // Not representable -> zero. 11812 __ Fmov(d27, -0.0); 11813 __ Fcvt(h20, d20); 11814 __ Fcvt(h21, d21); 11815 __ Fcvt(h22, d22); 11816 __ Fcvt(h23, d23); 11817 __ Fcvt(h24, d24); 11818 __ Fcvt(h25, d25); 11819 __ Fcvt(h26, d26); 11820 __ Fcvt(h27, d27); 11821 END(); 11822 11823 RUN(); 11824 11825 ASSERT_EQUAL_32(0, w0); // 1 => float failed, 2 => double failed. 11826 ASSERT_EQUAL_128(0, kFP16PositiveInfinity, q0); 11827 ASSERT_EQUAL_128(0, kFP16NegativeInfinity, q1); 11828 ASSERT_EQUAL_128(0, 0x7bff, q2); 11829 ASSERT_EQUAL_128(0, 0x0400, q3); 11830 ASSERT_EQUAL_128(0, 0x03ff, q4); 11831 ASSERT_EQUAL_128(0, 0x0001, q5); 11832 ASSERT_EQUAL_128(0, 0, q6); 11833 ASSERT_EQUAL_128(0, 0x8000, q7); 11834 ASSERT_EQUAL_128(0, kFP16PositiveInfinity, q20); 11835 ASSERT_EQUAL_128(0, kFP16NegativeInfinity, q21); 11836 ASSERT_EQUAL_128(0, 0x7bff, q22); 11837 ASSERT_EQUAL_128(0, 0x0400, q23); 11838 ASSERT_EQUAL_128(0, 0x03ff, q24); 11839 ASSERT_EQUAL_128(0, 0x0001, q25); 11840 ASSERT_EQUAL_128(0, 0, q26); 11841 ASSERT_EQUAL_128(0, 0x8000, q27); 11842 TEARDOWN(); 11843 } 11844 11845 11846 TEST(fcvtas) { 11847 SETUP(); 11848 11849 START(); 11850 __ Fmov(s0, 1.0); 11851 __ Fmov(s1, 1.1); 11852 __ Fmov(s2, 2.5); 11853 __ Fmov(s3, -2.5); 11854 __ Fmov(s4, kFP32PositiveInfinity); 11855 __ Fmov(s5, kFP32NegativeInfinity); 11856 __ Fmov(s6, 0x7fffff80); // Largest float < INT32_MAX. 11857 __ Fneg(s7, s6); // Smallest float > INT32_MIN. 11858 __ Fmov(d8, 1.0); 11859 __ Fmov(d9, 1.1); 11860 __ Fmov(d10, 2.5); 11861 __ Fmov(d11, -2.5); 11862 __ Fmov(d12, kFP64PositiveInfinity); 11863 __ Fmov(d13, kFP64NegativeInfinity); 11864 __ Fmov(d14, kWMaxInt - 1); 11865 __ Fmov(d15, kWMinInt + 1); 11866 __ Fmov(s17, 1.1); 11867 __ Fmov(s18, 2.5); 11868 __ Fmov(s19, -2.5); 11869 __ Fmov(s20, kFP32PositiveInfinity); 11870 __ Fmov(s21, kFP32NegativeInfinity); 11871 __ Fmov(s22, 0x7fffff8000000000); // Largest float < INT64_MAX. 11872 __ Fneg(s23, s22); // Smallest float > INT64_MIN. 11873 __ Fmov(d24, 1.1); 11874 __ Fmov(d25, 2.5); 11875 __ Fmov(d26, -2.5); 11876 __ Fmov(d27, kFP64PositiveInfinity); 11877 __ Fmov(d28, kFP64NegativeInfinity); 11878 __ Fmov(d29, 0x7ffffffffffffc00); // Largest double < INT64_MAX. 11879 __ Fneg(d30, d29); // Smallest double > INT64_MIN. 11880 11881 __ Fcvtas(w0, s0); 11882 __ Fcvtas(w1, s1); 11883 __ Fcvtas(w2, s2); 11884 __ Fcvtas(w3, s3); 11885 __ Fcvtas(w4, s4); 11886 __ Fcvtas(w5, s5); 11887 __ Fcvtas(w6, s6); 11888 __ Fcvtas(w7, s7); 11889 __ Fcvtas(w8, d8); 11890 __ Fcvtas(w9, d9); 11891 __ Fcvtas(w10, d10); 11892 __ Fcvtas(w11, d11); 11893 __ Fcvtas(w12, d12); 11894 __ Fcvtas(w13, d13); 11895 __ Fcvtas(w14, d14); 11896 __ Fcvtas(w15, d15); 11897 __ Fcvtas(x17, s17); 11898 __ Fcvtas(x18, s18); 11899 __ Fcvtas(x19, s19); 11900 __ Fcvtas(x20, s20); 11901 __ Fcvtas(x21, s21); 11902 __ Fcvtas(x22, s22); 11903 __ Fcvtas(x23, s23); 11904 __ Fcvtas(x24, d24); 11905 __ Fcvtas(x25, d25); 11906 __ Fcvtas(x26, d26); 11907 __ Fcvtas(x27, d27); 11908 __ Fcvtas(x28, d28); 11909 __ Fcvtas(x29, d29); 11910 __ Fcvtas(x30, d30); 11911 END(); 11912 11913 RUN(); 11914 11915 ASSERT_EQUAL_64(1, x0); 11916 ASSERT_EQUAL_64(1, x1); 11917 ASSERT_EQUAL_64(3, x2); 11918 ASSERT_EQUAL_64(0xfffffffd, x3); 11919 ASSERT_EQUAL_64(0x7fffffff, x4); 11920 ASSERT_EQUAL_64(0x80000000, x5); 11921 ASSERT_EQUAL_64(0x7fffff80, x6); 11922 ASSERT_EQUAL_64(0x80000080, x7); 11923 ASSERT_EQUAL_64(1, x8); 11924 ASSERT_EQUAL_64(1, x9); 11925 ASSERT_EQUAL_64(3, x10); 11926 ASSERT_EQUAL_64(0xfffffffd, x11); 11927 ASSERT_EQUAL_64(0x7fffffff, x12); 11928 ASSERT_EQUAL_64(0x80000000, x13); 11929 ASSERT_EQUAL_64(0x7ffffffe, x14); 11930 ASSERT_EQUAL_64(0x80000001, x15); 11931 ASSERT_EQUAL_64(1, x17); 11932 ASSERT_EQUAL_64(3, x18); 11933 ASSERT_EQUAL_64(0xfffffffffffffffd, x19); 11934 ASSERT_EQUAL_64(0x7fffffffffffffff, x20); 11935 ASSERT_EQUAL_64(0x8000000000000000, x21); 11936 ASSERT_EQUAL_64(0x7fffff8000000000, x22); 11937 ASSERT_EQUAL_64(0x8000008000000000, x23); 11938 ASSERT_EQUAL_64(1, x24); 11939 ASSERT_EQUAL_64(3, x25); 11940 ASSERT_EQUAL_64(0xfffffffffffffffd, x26); 11941 ASSERT_EQUAL_64(0x7fffffffffffffff, x27); 11942 ASSERT_EQUAL_64(0x8000000000000000, x28); 11943 ASSERT_EQUAL_64(0x7ffffffffffffc00, x29); 11944 ASSERT_EQUAL_64(0x8000000000000400, x30); 11945 11946 TEARDOWN(); 11947 } 11948 11949 11950 TEST(fcvtau) { 11951 SETUP(); 11952 11953 START(); 11954 __ Fmov(s0, 1.0); 11955 __ Fmov(s1, 1.1); 11956 __ Fmov(s2, 2.5); 11957 __ Fmov(s3, -2.5); 11958 __ Fmov(s4, kFP32PositiveInfinity); 11959 __ Fmov(s5, kFP32NegativeInfinity); 11960 __ Fmov(s6, 0xffffff00); // Largest float < UINT32_MAX. 11961 __ Fmov(d8, 1.0); 11962 __ Fmov(d9, 1.1); 11963 __ Fmov(d10, 2.5); 11964 __ Fmov(d11, -2.5); 11965 __ Fmov(d12, kFP64PositiveInfinity); 11966 __ Fmov(d13, kFP64NegativeInfinity); 11967 __ Fmov(d14, 0xfffffffe); 11968 __ Fmov(s16, 1.0); 11969 __ Fmov(s17, 1.1); 11970 __ Fmov(s18, 2.5); 11971 __ Fmov(s19, -2.5); 11972 __ Fmov(s20, kFP32PositiveInfinity); 11973 __ Fmov(s21, kFP32NegativeInfinity); 11974 __ Fmov(s22, 0xffffff0000000000); // Largest float < UINT64_MAX. 11975 __ Fmov(d24, 1.1); 11976 __ Fmov(d25, 2.5); 11977 __ Fmov(d26, -2.5); 11978 __ Fmov(d27, kFP64PositiveInfinity); 11979 __ Fmov(d28, kFP64NegativeInfinity); 11980 __ Fmov(d29, 0xfffffffffffff800); // Largest double < UINT64_MAX. 11981 __ Fmov(s30, 0x100000000); 11982 11983 __ Fcvtau(w0, s0); 11984 __ Fcvtau(w1, s1); 11985 __ Fcvtau(w2, s2); 11986 __ Fcvtau(w3, s3); 11987 __ Fcvtau(w4, s4); 11988 __ Fcvtau(w5, s5); 11989 __ Fcvtau(w6, s6); 11990 __ Fcvtau(w8, d8); 11991 __ Fcvtau(w9, d9); 11992 __ Fcvtau(w10, d10); 11993 __ Fcvtau(w11, d11); 11994 __ Fcvtau(w12, d12); 11995 __ Fcvtau(w13, d13); 11996 __ Fcvtau(w14, d14); 11997 __ Fcvtau(w15, d15); 11998 __ Fcvtau(x16, s16); 11999 __ Fcvtau(x17, s17); 12000 __ Fcvtau(x18, s18); 12001 __ Fcvtau(x19, s19); 12002 __ Fcvtau(x20, s20); 12003 __ Fcvtau(x21, s21); 12004 __ Fcvtau(x22, s22); 12005 __ Fcvtau(x24, d24); 12006 __ Fcvtau(x25, d25); 12007 __ Fcvtau(x26, d26); 12008 __ Fcvtau(x27, d27); 12009 __ Fcvtau(x28, d28); 12010 __ Fcvtau(x29, d29); 12011 __ Fcvtau(w30, s30); 12012 END(); 12013 12014 RUN(); 12015 12016 ASSERT_EQUAL_64(1, x0); 12017 ASSERT_EQUAL_64(1, x1); 12018 ASSERT_EQUAL_64(3, x2); 12019 ASSERT_EQUAL_64(0, x3); 12020 ASSERT_EQUAL_64(0xffffffff, x4); 12021 ASSERT_EQUAL_64(0, x5); 12022 ASSERT_EQUAL_64(0xffffff00, x6); 12023 ASSERT_EQUAL_64(1, x8); 12024 ASSERT_EQUAL_64(1, x9); 12025 ASSERT_EQUAL_64(3, x10); 12026 ASSERT_EQUAL_64(0, x11); 12027 ASSERT_EQUAL_64(0xffffffff, x12); 12028 ASSERT_EQUAL_64(0, x13); 12029 ASSERT_EQUAL_64(0xfffffffe, x14); 12030 ASSERT_EQUAL_64(1, x16); 12031 ASSERT_EQUAL_64(1, x17); 12032 ASSERT_EQUAL_64(3, x18); 12033 ASSERT_EQUAL_64(0, x19); 12034 ASSERT_EQUAL_64(0xffffffffffffffff, x20); 12035 ASSERT_EQUAL_64(0, x21); 12036 ASSERT_EQUAL_64(0xffffff0000000000, x22); 12037 ASSERT_EQUAL_64(1, x24); 12038 ASSERT_EQUAL_64(3, x25); 12039 ASSERT_EQUAL_64(0, x26); 12040 ASSERT_EQUAL_64(0xffffffffffffffff, x27); 12041 ASSERT_EQUAL_64(0, x28); 12042 ASSERT_EQUAL_64(0xfffffffffffff800, x29); 12043 ASSERT_EQUAL_64(0xffffffff, x30); 12044 12045 TEARDOWN(); 12046 } 12047 12048 12049 TEST(fcvtms) { 12050 SETUP(); 12051 12052 START(); 12053 __ Fmov(s0, 1.0); 12054 __ Fmov(s1, 1.1); 12055 __ Fmov(s2, 1.5); 12056 __ Fmov(s3, -1.5); 12057 __ Fmov(s4, kFP32PositiveInfinity); 12058 __ Fmov(s5, kFP32NegativeInfinity); 12059 __ Fmov(s6, 0x7fffff80); // Largest float < INT32_MAX. 12060 __ Fneg(s7, s6); // Smallest float > INT32_MIN. 12061 __ Fmov(d8, 1.0); 12062 __ Fmov(d9, 1.1); 12063 __ Fmov(d10, 1.5); 12064 __ Fmov(d11, -1.5); 12065 __ Fmov(d12, kFP64PositiveInfinity); 12066 __ Fmov(d13, kFP64NegativeInfinity); 12067 __ Fmov(d14, kWMaxInt - 1); 12068 __ Fmov(d15, kWMinInt + 1); 12069 __ Fmov(s17, 1.1); 12070 __ Fmov(s18, 1.5); 12071 __ Fmov(s19, -1.5); 12072 __ Fmov(s20, kFP32PositiveInfinity); 12073 __ Fmov(s21, kFP32NegativeInfinity); 12074 __ Fmov(s22, 0x7fffff8000000000); // Largest float < INT64_MAX. 12075 __ Fneg(s23, s22); // Smallest float > INT64_MIN. 12076 __ Fmov(d24, 1.1); 12077 __ Fmov(d25, 1.5); 12078 __ Fmov(d26, -1.5); 12079 __ Fmov(d27, kFP64PositiveInfinity); 12080 __ Fmov(d28, kFP64NegativeInfinity); 12081 __ Fmov(d29, 0x7ffffffffffffc00); // Largest double < INT64_MAX. 12082 __ Fneg(d30, d29); // Smallest double > INT64_MIN. 12083 12084 __ Fcvtms(w0, s0); 12085 __ Fcvtms(w1, s1); 12086 __ Fcvtms(w2, s2); 12087 __ Fcvtms(w3, s3); 12088 __ Fcvtms(w4, s4); 12089 __ Fcvtms(w5, s5); 12090 __ Fcvtms(w6, s6); 12091 __ Fcvtms(w7, s7); 12092 __ Fcvtms(w8, d8); 12093 __ Fcvtms(w9, d9); 12094 __ Fcvtms(w10, d10); 12095 __ Fcvtms(w11, d11); 12096 __ Fcvtms(w12, d12); 12097 __ Fcvtms(w13, d13); 12098 __ Fcvtms(w14, d14); 12099 __ Fcvtms(w15, d15); 12100 __ Fcvtms(x17, s17); 12101 __ Fcvtms(x18, s18); 12102 __ Fcvtms(x19, s19); 12103 __ Fcvtms(x20, s20); 12104 __ Fcvtms(x21, s21); 12105 __ Fcvtms(x22, s22); 12106 __ Fcvtms(x23, s23); 12107 __ Fcvtms(x24, d24); 12108 __ Fcvtms(x25, d25); 12109 __ Fcvtms(x26, d26); 12110 __ Fcvtms(x27, d27); 12111 __ Fcvtms(x28, d28); 12112 __ Fcvtms(x29, d29); 12113 __ Fcvtms(x30, d30); 12114 END(); 12115 12116 RUN(); 12117 12118 ASSERT_EQUAL_64(1, x0); 12119 ASSERT_EQUAL_64(1, x1); 12120 ASSERT_EQUAL_64(1, x2); 12121 ASSERT_EQUAL_64(0xfffffffe, x3); 12122 ASSERT_EQUAL_64(0x7fffffff, x4); 12123 ASSERT_EQUAL_64(0x80000000, x5); 12124 ASSERT_EQUAL_64(0x7fffff80, x6); 12125 ASSERT_EQUAL_64(0x80000080, x7); 12126 ASSERT_EQUAL_64(1, x8); 12127 ASSERT_EQUAL_64(1, x9); 12128 ASSERT_EQUAL_64(1, x10); 12129 ASSERT_EQUAL_64(0xfffffffe, x11); 12130 ASSERT_EQUAL_64(0x7fffffff, x12); 12131 ASSERT_EQUAL_64(0x80000000, x13); 12132 ASSERT_EQUAL_64(0x7ffffffe, x14); 12133 ASSERT_EQUAL_64(0x80000001, x15); 12134 ASSERT_EQUAL_64(1, x17); 12135 ASSERT_EQUAL_64(1, x18); 12136 ASSERT_EQUAL_64(0xfffffffffffffffe, x19); 12137 ASSERT_EQUAL_64(0x7fffffffffffffff, x20); 12138 ASSERT_EQUAL_64(0x8000000000000000, x21); 12139 ASSERT_EQUAL_64(0x7fffff8000000000, x22); 12140 ASSERT_EQUAL_64(0x8000008000000000, x23); 12141 ASSERT_EQUAL_64(1, x24); 12142 ASSERT_EQUAL_64(1, x25); 12143 ASSERT_EQUAL_64(0xfffffffffffffffe, x26); 12144 ASSERT_EQUAL_64(0x7fffffffffffffff, x27); 12145 ASSERT_EQUAL_64(0x8000000000000000, x28); 12146 ASSERT_EQUAL_64(0x7ffffffffffffc00, x29); 12147 ASSERT_EQUAL_64(0x8000000000000400, x30); 12148 12149 TEARDOWN(); 12150 } 12151 12152 12153 TEST(fcvtmu) { 12154 SETUP(); 12155 12156 START(); 12157 __ Fmov(s0, 1.0); 12158 __ Fmov(s1, 1.1); 12159 __ Fmov(s2, 1.5); 12160 __ Fmov(s3, -1.5); 12161 __ Fmov(s4, kFP32PositiveInfinity); 12162 __ Fmov(s5, kFP32NegativeInfinity); 12163 __ Fmov(s6, 0x7fffff80); // Largest float < INT32_MAX. 12164 __ Fneg(s7, s6); // Smallest float > INT32_MIN. 12165 __ Fmov(d8, 1.0); 12166 __ Fmov(d9, 1.1); 12167 __ Fmov(d10, 1.5); 12168 __ Fmov(d11, -1.5); 12169 __ Fmov(d12, kFP64PositiveInfinity); 12170 __ Fmov(d13, kFP64NegativeInfinity); 12171 __ Fmov(d14, kWMaxInt - 1); 12172 __ Fmov(d15, kWMinInt + 1); 12173 __ Fmov(s17, 1.1); 12174 __ Fmov(s18, 1.5); 12175 __ Fmov(s19, -1.5); 12176 __ Fmov(s20, kFP32PositiveInfinity); 12177 __ Fmov(s21, kFP32NegativeInfinity); 12178 __ Fmov(s22, 0x7fffff8000000000); // Largest float < INT64_MAX. 12179 __ Fneg(s23, s22); // Smallest float > INT64_MIN. 12180 __ Fmov(d24, 1.1); 12181 __ Fmov(d25, 1.5); 12182 __ Fmov(d26, -1.5); 12183 __ Fmov(d27, kFP64PositiveInfinity); 12184 __ Fmov(d28, kFP64NegativeInfinity); 12185 __ Fmov(d29, 0x7ffffffffffffc00); // Largest double < INT64_MAX. 12186 __ Fneg(d30, d29); // Smallest double > INT64_MIN. 12187 12188 __ Fcvtmu(w0, s0); 12189 __ Fcvtmu(w1, s1); 12190 __ Fcvtmu(w2, s2); 12191 __ Fcvtmu(w3, s3); 12192 __ Fcvtmu(w4, s4); 12193 __ Fcvtmu(w5, s5); 12194 __ Fcvtmu(w6, s6); 12195 __ Fcvtmu(w7, s7); 12196 __ Fcvtmu(w8, d8); 12197 __ Fcvtmu(w9, d9); 12198 __ Fcvtmu(w10, d10); 12199 __ Fcvtmu(w11, d11); 12200 __ Fcvtmu(w12, d12); 12201 __ Fcvtmu(w13, d13); 12202 __ Fcvtmu(w14, d14); 12203 __ Fcvtmu(x17, s17); 12204 __ Fcvtmu(x18, s18); 12205 __ Fcvtmu(x19, s19); 12206 __ Fcvtmu(x20, s20); 12207 __ Fcvtmu(x21, s21); 12208 __ Fcvtmu(x22, s22); 12209 __ Fcvtmu(x23, s23); 12210 __ Fcvtmu(x24, d24); 12211 __ Fcvtmu(x25, d25); 12212 __ Fcvtmu(x26, d26); 12213 __ Fcvtmu(x27, d27); 12214 __ Fcvtmu(x28, d28); 12215 __ Fcvtmu(x29, d29); 12216 __ Fcvtmu(x30, d30); 12217 END(); 12218 12219 RUN(); 12220 12221 ASSERT_EQUAL_64(1, x0); 12222 ASSERT_EQUAL_64(1, x1); 12223 ASSERT_EQUAL_64(1, x2); 12224 ASSERT_EQUAL_64(0, x3); 12225 ASSERT_EQUAL_64(0xffffffff, x4); 12226 ASSERT_EQUAL_64(0, x5); 12227 ASSERT_EQUAL_64(0x7fffff80, x6); 12228 ASSERT_EQUAL_64(0, x7); 12229 ASSERT_EQUAL_64(1, x8); 12230 ASSERT_EQUAL_64(1, x9); 12231 ASSERT_EQUAL_64(1, x10); 12232 ASSERT_EQUAL_64(0, x11); 12233 ASSERT_EQUAL_64(0xffffffff, x12); 12234 ASSERT_EQUAL_64(0, x13); 12235 ASSERT_EQUAL_64(0x7ffffffe, x14); 12236 ASSERT_EQUAL_64(1, x17); 12237 ASSERT_EQUAL_64(1, x18); 12238 ASSERT_EQUAL_64(0, x19); 12239 ASSERT_EQUAL_64(0xffffffffffffffff, x20); 12240 ASSERT_EQUAL_64(0, x21); 12241 ASSERT_EQUAL_64(0x7fffff8000000000, x22); 12242 ASSERT_EQUAL_64(0, x23); 12243 ASSERT_EQUAL_64(1, x24); 12244 ASSERT_EQUAL_64(1, x25); 12245 ASSERT_EQUAL_64(0, x26); 12246 ASSERT_EQUAL_64(0xffffffffffffffff, x27); 12247 ASSERT_EQUAL_64(0, x28); 12248 ASSERT_EQUAL_64(0x7ffffffffffffc00, x29); 12249 ASSERT_EQUAL_64(0, x30); 12250 12251 TEARDOWN(); 12252 } 12253 12254 12255 TEST(fcvtns) { 12256 SETUP(); 12257 12258 START(); 12259 __ Fmov(s0, 1.0); 12260 __ Fmov(s1, 1.1); 12261 __ Fmov(s2, 1.5); 12262 __ Fmov(s3, -1.5); 12263 __ Fmov(s4, kFP32PositiveInfinity); 12264 __ Fmov(s5, kFP32NegativeInfinity); 12265 __ Fmov(s6, 0x7fffff80); // Largest float < INT32_MAX. 12266 __ Fneg(s7, s6); // Smallest float > INT32_MIN. 12267 __ Fmov(d8, 1.0); 12268 __ Fmov(d9, 1.1); 12269 __ Fmov(d10, 1.5); 12270 __ Fmov(d11, -1.5); 12271 __ Fmov(d12, kFP64PositiveInfinity); 12272 __ Fmov(d13, kFP64NegativeInfinity); 12273 __ Fmov(d14, kWMaxInt - 1); 12274 __ Fmov(d15, kWMinInt + 1); 12275 __ Fmov(s17, 1.1); 12276 __ Fmov(s18, 1.5); 12277 __ Fmov(s19, -1.5); 12278 __ Fmov(s20, kFP32PositiveInfinity); 12279 __ Fmov(s21, kFP32NegativeInfinity); 12280 __ Fmov(s22, 0x7fffff8000000000); // Largest float < INT64_MAX. 12281 __ Fneg(s23, s22); // Smallest float > INT64_MIN. 12282 __ Fmov(d24, 1.1); 12283 __ Fmov(d25, 1.5); 12284 __ Fmov(d26, -1.5); 12285 __ Fmov(d27, kFP64PositiveInfinity); 12286 __ Fmov(d28, kFP64NegativeInfinity); 12287 __ Fmov(d29, 0x7ffffffffffffc00); // Largest double < INT64_MAX. 12288 __ Fneg(d30, d29); // Smallest double > INT64_MIN. 12289 12290 __ Fcvtns(w0, s0); 12291 __ Fcvtns(w1, s1); 12292 __ Fcvtns(w2, s2); 12293 __ Fcvtns(w3, s3); 12294 __ Fcvtns(w4, s4); 12295 __ Fcvtns(w5, s5); 12296 __ Fcvtns(w6, s6); 12297 __ Fcvtns(w7, s7); 12298 __ Fcvtns(w8, d8); 12299 __ Fcvtns(w9, d9); 12300 __ Fcvtns(w10, d10); 12301 __ Fcvtns(w11, d11); 12302 __ Fcvtns(w12, d12); 12303 __ Fcvtns(w13, d13); 12304 __ Fcvtns(w14, d14); 12305 __ Fcvtns(w15, d15); 12306 __ Fcvtns(x17, s17); 12307 __ Fcvtns(x18, s18); 12308 __ Fcvtns(x19, s19); 12309 __ Fcvtns(x20, s20); 12310 __ Fcvtns(x21, s21); 12311 __ Fcvtns(x22, s22); 12312 __ Fcvtns(x23, s23); 12313 __ Fcvtns(x24, d24); 12314 __ Fcvtns(x25, d25); 12315 __ Fcvtns(x26, d26); 12316 __ Fcvtns(x27, d27); 12317 __ Fcvtns(x28, d28); 12318 __ Fcvtns(x29, d29); 12319 __ Fcvtns(x30, d30); 12320 END(); 12321 12322 RUN(); 12323 12324 ASSERT_EQUAL_64(1, x0); 12325 ASSERT_EQUAL_64(1, x1); 12326 ASSERT_EQUAL_64(2, x2); 12327 ASSERT_EQUAL_64(0xfffffffe, x3); 12328 ASSERT_EQUAL_64(0x7fffffff, x4); 12329 ASSERT_EQUAL_64(0x80000000, x5); 12330 ASSERT_EQUAL_64(0x7fffff80, x6); 12331 ASSERT_EQUAL_64(0x80000080, x7); 12332 ASSERT_EQUAL_64(1, x8); 12333 ASSERT_EQUAL_64(1, x9); 12334 ASSERT_EQUAL_64(2, x10); 12335 ASSERT_EQUAL_64(0xfffffffe, x11); 12336 ASSERT_EQUAL_64(0x7fffffff, x12); 12337 ASSERT_EQUAL_64(0x80000000, x13); 12338 ASSERT_EQUAL_64(0x7ffffffe, x14); 12339 ASSERT_EQUAL_64(0x80000001, x15); 12340 ASSERT_EQUAL_64(1, x17); 12341 ASSERT_EQUAL_64(2, x18); 12342 ASSERT_EQUAL_64(0xfffffffffffffffe, x19); 12343 ASSERT_EQUAL_64(0x7fffffffffffffff, x20); 12344 ASSERT_EQUAL_64(0x8000000000000000, x21); 12345 ASSERT_EQUAL_64(0x7fffff8000000000, x22); 12346 ASSERT_EQUAL_64(0x8000008000000000, x23); 12347 ASSERT_EQUAL_64(1, x24); 12348 ASSERT_EQUAL_64(2, x25); 12349 ASSERT_EQUAL_64(0xfffffffffffffffe, x26); 12350 ASSERT_EQUAL_64(0x7fffffffffffffff, x27); 12351 ASSERT_EQUAL_64(0x8000000000000000, x28); 12352 ASSERT_EQUAL_64(0x7ffffffffffffc00, x29); 12353 ASSERT_EQUAL_64(0x8000000000000400, x30); 12354 12355 TEARDOWN(); 12356 } 12357 12358 12359 TEST(fcvtnu) { 12360 SETUP(); 12361 12362 START(); 12363 __ Fmov(s0, 1.0); 12364 __ Fmov(s1, 1.1); 12365 __ Fmov(s2, 1.5); 12366 __ Fmov(s3, -1.5); 12367 __ Fmov(s4, kFP32PositiveInfinity); 12368 __ Fmov(s5, kFP32NegativeInfinity); 12369 __ Fmov(s6, 0xffffff00); // Largest float < UINT32_MAX. 12370 __ Fmov(d8, 1.0); 12371 __ Fmov(d9, 1.1); 12372 __ Fmov(d10, 1.5); 12373 __ Fmov(d11, -1.5); 12374 __ Fmov(d12, kFP64PositiveInfinity); 12375 __ Fmov(d13, kFP64NegativeInfinity); 12376 __ Fmov(d14, 0xfffffffe); 12377 __ Fmov(s16, 1.0); 12378 __ Fmov(s17, 1.1); 12379 __ Fmov(s18, 1.5); 12380 __ Fmov(s19, -1.5); 12381 __ Fmov(s20, kFP32PositiveInfinity); 12382 __ Fmov(s21, kFP32NegativeInfinity); 12383 __ Fmov(s22, 0xffffff0000000000); // Largest float < UINT64_MAX. 12384 __ Fmov(d24, 1.1); 12385 __ Fmov(d25, 1.5); 12386 __ Fmov(d26, -1.5); 12387 __ Fmov(d27, kFP64PositiveInfinity); 12388 __ Fmov(d28, kFP64NegativeInfinity); 12389 __ Fmov(d29, 0xfffffffffffff800); // Largest double < UINT64_MAX. 12390 __ Fmov(s30, 0x100000000); 12391 12392 __ Fcvtnu(w0, s0); 12393 __ Fcvtnu(w1, s1); 12394 __ Fcvtnu(w2, s2); 12395 __ Fcvtnu(w3, s3); 12396 __ Fcvtnu(w4, s4); 12397 __ Fcvtnu(w5, s5); 12398 __ Fcvtnu(w6, s6); 12399 __ Fcvtnu(w8, d8); 12400 __ Fcvtnu(w9, d9); 12401 __ Fcvtnu(w10, d10); 12402 __ Fcvtnu(w11, d11); 12403 __ Fcvtnu(w12, d12); 12404 __ Fcvtnu(w13, d13); 12405 __ Fcvtnu(w14, d14); 12406 __ Fcvtnu(w15, d15); 12407 __ Fcvtnu(x16, s16); 12408 __ Fcvtnu(x17, s17); 12409 __ Fcvtnu(x18, s18); 12410 __ Fcvtnu(x19, s19); 12411 __ Fcvtnu(x20, s20); 12412 __ Fcvtnu(x21, s21); 12413 __ Fcvtnu(x22, s22); 12414 __ Fcvtnu(x24, d24); 12415 __ Fcvtnu(x25, d25); 12416 __ Fcvtnu(x26, d26); 12417 __ Fcvtnu(x27, d27); 12418 __ Fcvtnu(x28, d28); 12419 __ Fcvtnu(x29, d29); 12420 __ Fcvtnu(w30, s30); 12421 END(); 12422 12423 RUN(); 12424 12425 ASSERT_EQUAL_64(1, x0); 12426 ASSERT_EQUAL_64(1, x1); 12427 ASSERT_EQUAL_64(2, x2); 12428 ASSERT_EQUAL_64(0, x3); 12429 ASSERT_EQUAL_64(0xffffffff, x4); 12430 ASSERT_EQUAL_64(0, x5); 12431 ASSERT_EQUAL_64(0xffffff00, x6); 12432 ASSERT_EQUAL_64(1, x8); 12433 ASSERT_EQUAL_64(1, x9); 12434 ASSERT_EQUAL_64(2, x10); 12435 ASSERT_EQUAL_64(0, x11); 12436 ASSERT_EQUAL_64(0xffffffff, x12); 12437 ASSERT_EQUAL_64(0, x13); 12438 ASSERT_EQUAL_64(0xfffffffe, x14); 12439 ASSERT_EQUAL_64(1, x16); 12440 ASSERT_EQUAL_64(1, x17); 12441 ASSERT_EQUAL_64(2, x18); 12442 ASSERT_EQUAL_64(0, x19); 12443 ASSERT_EQUAL_64(0xffffffffffffffff, x20); 12444 ASSERT_EQUAL_64(0, x21); 12445 ASSERT_EQUAL_64(0xffffff0000000000, x22); 12446 ASSERT_EQUAL_64(1, x24); 12447 ASSERT_EQUAL_64(2, x25); 12448 ASSERT_EQUAL_64(0, x26); 12449 ASSERT_EQUAL_64(0xffffffffffffffff, x27); 12450 ASSERT_EQUAL_64(0, x28); 12451 ASSERT_EQUAL_64(0xfffffffffffff800, x29); 12452 ASSERT_EQUAL_64(0xffffffff, x30); 12453 12454 TEARDOWN(); 12455 } 12456 12457 12458 TEST(fcvtzs) { 12459 SETUP(); 12460 12461 START(); 12462 __ Fmov(s0, 1.0); 12463 __ Fmov(s1, 1.1); 12464 __ Fmov(s2, 1.5); 12465 __ Fmov(s3, -1.5); 12466 __ Fmov(s4, kFP32PositiveInfinity); 12467 __ Fmov(s5, kFP32NegativeInfinity); 12468 __ Fmov(s6, 0x7fffff80); // Largest float < INT32_MAX. 12469 __ Fneg(s7, s6); // Smallest float > INT32_MIN. 12470 __ Fmov(d8, 1.0); 12471 __ Fmov(d9, 1.1); 12472 __ Fmov(d10, 1.5); 12473 __ Fmov(d11, -1.5); 12474 __ Fmov(d12, kFP64PositiveInfinity); 12475 __ Fmov(d13, kFP64NegativeInfinity); 12476 __ Fmov(d14, kWMaxInt - 1); 12477 __ Fmov(d15, kWMinInt + 1); 12478 __ Fmov(s17, 1.1); 12479 __ Fmov(s18, 1.5); 12480 __ Fmov(s19, -1.5); 12481 __ Fmov(s20, kFP32PositiveInfinity); 12482 __ Fmov(s21, kFP32NegativeInfinity); 12483 __ Fmov(s22, 0x7fffff8000000000); // Largest float < INT64_MAX. 12484 __ Fneg(s23, s22); // Smallest float > INT64_MIN. 12485 __ Fmov(d24, 1.1); 12486 __ Fmov(d25, 1.5); 12487 __ Fmov(d26, -1.5); 12488 __ Fmov(d27, kFP64PositiveInfinity); 12489 __ Fmov(d28, kFP64NegativeInfinity); 12490 __ Fmov(d29, 0x7ffffffffffffc00); // Largest double < INT64_MAX. 12491 __ Fneg(d30, d29); // Smallest double > INT64_MIN. 12492 12493 __ Fcvtzs(w0, s0); 12494 __ Fcvtzs(w1, s1); 12495 __ Fcvtzs(w2, s2); 12496 __ Fcvtzs(w3, s3); 12497 __ Fcvtzs(w4, s4); 12498 __ Fcvtzs(w5, s5); 12499 __ Fcvtzs(w6, s6); 12500 __ Fcvtzs(w7, s7); 12501 __ Fcvtzs(w8, d8); 12502 __ Fcvtzs(w9, d9); 12503 __ Fcvtzs(w10, d10); 12504 __ Fcvtzs(w11, d11); 12505 __ Fcvtzs(w12, d12); 12506 __ Fcvtzs(w13, d13); 12507 __ Fcvtzs(w14, d14); 12508 __ Fcvtzs(w15, d15); 12509 __ Fcvtzs(x17, s17); 12510 __ Fcvtzs(x18, s18); 12511 __ Fcvtzs(x19, s19); 12512 __ Fcvtzs(x20, s20); 12513 __ Fcvtzs(x21, s21); 12514 __ Fcvtzs(x22, s22); 12515 __ Fcvtzs(x23, s23); 12516 __ Fcvtzs(x24, d24); 12517 __ Fcvtzs(x25, d25); 12518 __ Fcvtzs(x26, d26); 12519 __ Fcvtzs(x27, d27); 12520 __ Fcvtzs(x28, d28); 12521 __ Fcvtzs(x29, d29); 12522 __ Fcvtzs(x30, d30); 12523 END(); 12524 12525 RUN(); 12526 12527 ASSERT_EQUAL_64(1, x0); 12528 ASSERT_EQUAL_64(1, x1); 12529 ASSERT_EQUAL_64(1, x2); 12530 ASSERT_EQUAL_64(0xffffffff, x3); 12531 ASSERT_EQUAL_64(0x7fffffff, x4); 12532 ASSERT_EQUAL_64(0x80000000, x5); 12533 ASSERT_EQUAL_64(0x7fffff80, x6); 12534 ASSERT_EQUAL_64(0x80000080, x7); 12535 ASSERT_EQUAL_64(1, x8); 12536 ASSERT_EQUAL_64(1, x9); 12537 ASSERT_EQUAL_64(1, x10); 12538 ASSERT_EQUAL_64(0xffffffff, x11); 12539 ASSERT_EQUAL_64(0x7fffffff, x12); 12540 ASSERT_EQUAL_64(0x80000000, x13); 12541 ASSERT_EQUAL_64(0x7ffffffe, x14); 12542 ASSERT_EQUAL_64(0x80000001, x15); 12543 ASSERT_EQUAL_64(1, x17); 12544 ASSERT_EQUAL_64(1, x18); 12545 ASSERT_EQUAL_64(0xffffffffffffffff, x19); 12546 ASSERT_EQUAL_64(0x7fffffffffffffff, x20); 12547 ASSERT_EQUAL_64(0x8000000000000000, x21); 12548 ASSERT_EQUAL_64(0x7fffff8000000000, x22); 12549 ASSERT_EQUAL_64(0x8000008000000000, x23); 12550 ASSERT_EQUAL_64(1, x24); 12551 ASSERT_EQUAL_64(1, x25); 12552 ASSERT_EQUAL_64(0xffffffffffffffff, x26); 12553 ASSERT_EQUAL_64(0x7fffffffffffffff, x27); 12554 ASSERT_EQUAL_64(0x8000000000000000, x28); 12555 ASSERT_EQUAL_64(0x7ffffffffffffc00, x29); 12556 ASSERT_EQUAL_64(0x8000000000000400, x30); 12557 12558 TEARDOWN(); 12559 } 12560 12561 TEST(fcvtzu) { 12562 SETUP(); 12563 12564 START(); 12565 __ Fmov(s0, 1.0); 12566 __ Fmov(s1, 1.1); 12567 __ Fmov(s2, 1.5); 12568 __ Fmov(s3, -1.5); 12569 __ Fmov(s4, kFP32PositiveInfinity); 12570 __ Fmov(s5, kFP32NegativeInfinity); 12571 __ Fmov(s6, 0x7fffff80); // Largest float < INT32_MAX. 12572 __ Fneg(s7, s6); // Smallest float > INT32_MIN. 12573 __ Fmov(d8, 1.0); 12574 __ Fmov(d9, 1.1); 12575 __ Fmov(d10, 1.5); 12576 __ Fmov(d11, -1.5); 12577 __ Fmov(d12, kFP64PositiveInfinity); 12578 __ Fmov(d13, kFP64NegativeInfinity); 12579 __ Fmov(d14, kWMaxInt - 1); 12580 __ Fmov(d15, kWMinInt + 1); 12581 __ Fmov(s17, 1.1); 12582 __ Fmov(s18, 1.5); 12583 __ Fmov(s19, -1.5); 12584 __ Fmov(s20, kFP32PositiveInfinity); 12585 __ Fmov(s21, kFP32NegativeInfinity); 12586 __ Fmov(s22, 0x7fffff8000000000); // Largest float < INT64_MAX. 12587 __ Fneg(s23, s22); // Smallest float > INT64_MIN. 12588 __ Fmov(d24, 1.1); 12589 __ Fmov(d25, 1.5); 12590 __ Fmov(d26, -1.5); 12591 __ Fmov(d27, kFP64PositiveInfinity); 12592 __ Fmov(d28, kFP64NegativeInfinity); 12593 __ Fmov(d29, 0x7ffffffffffffc00); // Largest double < INT64_MAX. 12594 __ Fneg(d30, d29); // Smallest double > INT64_MIN. 12595 12596 __ Fcvtzu(w0, s0); 12597 __ Fcvtzu(w1, s1); 12598 __ Fcvtzu(w2, s2); 12599 __ Fcvtzu(w3, s3); 12600 __ Fcvtzu(w4, s4); 12601 __ Fcvtzu(w5, s5); 12602 __ Fcvtzu(w6, s6); 12603 __ Fcvtzu(w7, s7); 12604 __ Fcvtzu(w8, d8); 12605 __ Fcvtzu(w9, d9); 12606 __ Fcvtzu(w10, d10); 12607 __ Fcvtzu(w11, d11); 12608 __ Fcvtzu(w12, d12); 12609 __ Fcvtzu(w13, d13); 12610 __ Fcvtzu(w14, d14); 12611 __ Fcvtzu(x17, s17); 12612 __ Fcvtzu(x18, s18); 12613 __ Fcvtzu(x19, s19); 12614 __ Fcvtzu(x20, s20); 12615 __ Fcvtzu(x21, s21); 12616 __ Fcvtzu(x22, s22); 12617 __ Fcvtzu(x23, s23); 12618 __ Fcvtzu(x24, d24); 12619 __ Fcvtzu(x25, d25); 12620 __ Fcvtzu(x26, d26); 12621 __ Fcvtzu(x27, d27); 12622 __ Fcvtzu(x28, d28); 12623 __ Fcvtzu(x29, d29); 12624 __ Fcvtzu(x30, d30); 12625 END(); 12626 12627 RUN(); 12628 12629 ASSERT_EQUAL_64(1, x0); 12630 ASSERT_EQUAL_64(1, x1); 12631 ASSERT_EQUAL_64(1, x2); 12632 ASSERT_EQUAL_64(0, x3); 12633 ASSERT_EQUAL_64(0xffffffff, x4); 12634 ASSERT_EQUAL_64(0, x5); 12635 ASSERT_EQUAL_64(0x7fffff80, x6); 12636 ASSERT_EQUAL_64(0, x7); 12637 ASSERT_EQUAL_64(1, x8); 12638 ASSERT_EQUAL_64(1, x9); 12639 ASSERT_EQUAL_64(1, x10); 12640 ASSERT_EQUAL_64(0, x11); 12641 ASSERT_EQUAL_64(0xffffffff, x12); 12642 ASSERT_EQUAL_64(0, x13); 12643 ASSERT_EQUAL_64(0x7ffffffe, x14); 12644 ASSERT_EQUAL_64(1, x17); 12645 ASSERT_EQUAL_64(1, x18); 12646 ASSERT_EQUAL_64(0, x19); 12647 ASSERT_EQUAL_64(0xffffffffffffffff, x20); 12648 ASSERT_EQUAL_64(0, x21); 12649 ASSERT_EQUAL_64(0x7fffff8000000000, x22); 12650 ASSERT_EQUAL_64(0, x23); 12651 ASSERT_EQUAL_64(1, x24); 12652 ASSERT_EQUAL_64(1, x25); 12653 ASSERT_EQUAL_64(0, x26); 12654 ASSERT_EQUAL_64(0xffffffffffffffff, x27); 12655 ASSERT_EQUAL_64(0, x28); 12656 ASSERT_EQUAL_64(0x7ffffffffffffc00, x29); 12657 ASSERT_EQUAL_64(0, x30); 12658 12659 TEARDOWN(); 12660 } 12661 12662 12663 TEST(neon_fcvtl) { 12664 SETUP(); 12665 12666 START(); 12667 12668 __ Movi(v0.V2D(), 0x000080007efffeff, 0x3100b1007c00fc00); 12669 __ Movi(v1.V2D(), 0x03ff83ff00038003, 0x000180017c01fc01); 12670 __ Movi(v2.V2D(), 0x3e200000be200000, 0x7f800000ff800000); 12671 __ Movi(v3.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff); 12672 __ Movi(v4.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001); 12673 __ Fcvtl(v16.V4S(), v0.V4H()); 12674 __ Fcvtl2(v17.V4S(), v0.V8H()); 12675 __ Fcvtl(v18.V4S(), v1.V4H()); 12676 __ Fcvtl2(v19.V4S(), v1.V8H()); 12677 12678 __ Fcvtl(v20.V2D(), v2.V2S()); 12679 __ Fcvtl2(v21.V2D(), v2.V4S()); 12680 __ Fcvtl(v22.V2D(), v3.V2S()); 12681 __ Fcvtl2(v23.V2D(), v3.V4S()); 12682 __ Fcvtl(v24.V2D(), v4.V2S()); 12683 __ Fcvtl2(v25.V2D(), v4.V4S()); 12684 12685 END(); 12686 12687 RUN(); 12688 ASSERT_EQUAL_128(0x3e200000be200000, 0x7f800000ff800000, q16); 12689 ASSERT_EQUAL_128(0x0000000080000000, 0x7fdfe000ffdfe000, q17); 12690 ASSERT_EQUAL_128(0x33800000b3800000, 0x7fc02000ffc02000, q18); 12691 ASSERT_EQUAL_128(0x387fc000b87fc000, 0x34400000b4400000, q19); 12692 ASSERT_EQUAL_128(0x7ff0000000000000, 0xfff0000000000000, q20); 12693 ASSERT_EQUAL_128(0x3fc4000000000000, 0xbfc4000000000000, q21); 12694 ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q22); 12695 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000000, q23); 12696 ASSERT_EQUAL_128(0x36a0000000000000, 0xb6a0000000000000, q24); 12697 ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q25); 12698 TEARDOWN(); 12699 } 12700 12701 12702 TEST(neon_fcvtn) { 12703 SETUP(); 12704 12705 START(); 12706 12707 __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000); 12708 __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff); 12709 __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001); 12710 __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000); 12711 __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000); 12712 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000); 12713 __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff); 12714 __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff); 12715 __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001); 12716 12717 __ Fcvtn(v16.V4H(), v0.V4S()); 12718 __ Fcvtn2(v16.V8H(), v1.V4S()); 12719 __ Fcvtn(v17.V4H(), v2.V4S()); 12720 __ Fcvtn(v18.V2S(), v3.V2D()); 12721 __ Fcvtn2(v18.V4S(), v4.V2D()); 12722 __ Fcvtn(v19.V2S(), v5.V2D()); 12723 __ Fcvtn2(v19.V4S(), v6.V2D()); 12724 __ Fcvtn(v20.V2S(), v7.V2D()); 12725 __ Fcvtn2(v20.V4S(), v8.V2D()); 12726 END(); 12727 12728 RUN(); 12729 ASSERT_EQUAL_128(0x000080007e7ffe7f, 0x3100b1007c00fc00, q16); 12730 ASSERT_EQUAL_64(0x7e7ffe7f00008000, d17); 12731 ASSERT_EQUAL_128(0x7f800000ff800000, 0x3e200000be200000, q18); 12732 ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x0000000080000000, q19); 12733 ASSERT_EQUAL_128(0x0000000080000000, 0x7fc7ffffffc7ffff, q20); 12734 TEARDOWN(); 12735 } 12736 12737 12738 TEST(neon_fcvtxn) { 12739 SETUP(); 12740 12741 START(); 12742 __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000); 12743 __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff); 12744 __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001); 12745 __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000); 12746 __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000); 12747 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000); 12748 __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff); 12749 __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff); 12750 __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001); 12751 __ Movi(v9.V2D(), 0x41ed000000000000, 0x41efffffffefffff); 12752 __ Fcvtxn(v16.V2S(), v0.V2D()); 12753 __ Fcvtxn2(v16.V4S(), v1.V2D()); 12754 __ Fcvtxn(v17.V2S(), v2.V2D()); 12755 __ Fcvtxn2(v17.V4S(), v3.V2D()); 12756 __ Fcvtxn(v18.V2S(), v4.V2D()); 12757 __ Fcvtxn2(v18.V4S(), v5.V2D()); 12758 __ Fcvtxn(v19.V2S(), v6.V2D()); 12759 __ Fcvtxn2(v19.V4S(), v7.V2D()); 12760 __ Fcvtxn(v20.V2S(), v8.V2D()); 12761 __ Fcvtxn2(v20.V4S(), v9.V2D()); 12762 __ Fcvtxn(s21, d0); 12763 END(); 12764 12765 RUN(); 12766 ASSERT_EQUAL_128(0x000000017f7fffff, 0x310000057f7fffff, q16); 12767 ASSERT_EQUAL_128(0x3e200000be200000, 0x7f7fffff00000001, q17); 12768 ASSERT_EQUAL_128(0x0000000080000000, 0x7f800000ff800000, q18); 12769 ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x7fc7ffffffc7ffff, q19); 12770 ASSERT_EQUAL_128(0x4f6800004f7fffff, 0x0000000180000001, q20); 12771 ASSERT_EQUAL_128(0, 0x7f7fffff, q21); 12772 TEARDOWN(); 12773 } 12774 12775 12776 // Test that scvtf and ucvtf can convert the 64-bit input into the expected 12777 // value. All possible values of 'fbits' are tested. The expected value is 12778 // modified accordingly in each case. 12779 // 12780 // The expected value is specified as the bit encoding of the expected double 12781 // produced by scvtf (expected_scvtf_bits) as well as ucvtf 12782 // (expected_ucvtf_bits). 12783 // 12784 // Where the input value is representable by int32_t or uint32_t, conversions 12785 // from W registers will also be tested. 12786 static void TestUScvtfHelper(uint64_t in, 12787 uint64_t expected_scvtf_bits, 12788 uint64_t expected_ucvtf_bits) { 12789 uint64_t u64 = in; 12790 uint32_t u32 = u64 & 0xffffffff; 12791 int64_t s64 = static_cast<int64_t>(in); 12792 int32_t s32 = s64 & 0x7fffffff; 12793 12794 bool cvtf_s32 = (s64 == s32); 12795 bool cvtf_u32 = (u64 == u32); 12796 12797 double results_scvtf_x[65]; 12798 double results_ucvtf_x[65]; 12799 double results_scvtf_w[33]; 12800 double results_ucvtf_w[33]; 12801 12802 SETUP(); 12803 START(); 12804 12805 __ Mov(x0, reinterpret_cast<uintptr_t>(results_scvtf_x)); 12806 __ Mov(x1, reinterpret_cast<uintptr_t>(results_ucvtf_x)); 12807 __ Mov(x2, reinterpret_cast<uintptr_t>(results_scvtf_w)); 12808 __ Mov(x3, reinterpret_cast<uintptr_t>(results_ucvtf_w)); 12809 12810 __ Mov(x10, s64); 12811 12812 // Corrupt the top word, in case it is accidentally used during W-register 12813 // conversions. 12814 __ Mov(x11, 0x5555555555555555); 12815 __ Bfi(x11, x10, 0, kWRegSize); 12816 12817 // Test integer conversions. 12818 __ Scvtf(d0, x10); 12819 __ Ucvtf(d1, x10); 12820 __ Scvtf(d2, w11); 12821 __ Ucvtf(d3, w11); 12822 __ Str(d0, MemOperand(x0)); 12823 __ Str(d1, MemOperand(x1)); 12824 __ Str(d2, MemOperand(x2)); 12825 __ Str(d3, MemOperand(x3)); 12826 12827 // Test all possible values of fbits. 12828 for (int fbits = 1; fbits <= 32; fbits++) { 12829 __ Scvtf(d0, x10, fbits); 12830 __ Ucvtf(d1, x10, fbits); 12831 __ Scvtf(d2, w11, fbits); 12832 __ Ucvtf(d3, w11, fbits); 12833 __ Str(d0, MemOperand(x0, fbits * kDRegSizeInBytes)); 12834 __ Str(d1, MemOperand(x1, fbits * kDRegSizeInBytes)); 12835 __ Str(d2, MemOperand(x2, fbits * kDRegSizeInBytes)); 12836 __ Str(d3, MemOperand(x3, fbits * kDRegSizeInBytes)); 12837 } 12838 12839 // Conversions from W registers can only handle fbits values <= 32, so just 12840 // test conversions from X registers for 32 < fbits <= 64. 12841 for (int fbits = 33; fbits <= 64; fbits++) { 12842 __ Scvtf(d0, x10, fbits); 12843 __ Ucvtf(d1, x10, fbits); 12844 __ Str(d0, MemOperand(x0, fbits * kDRegSizeInBytes)); 12845 __ Str(d1, MemOperand(x1, fbits * kDRegSizeInBytes)); 12846 } 12847 12848 END(); 12849 RUN(); 12850 12851 // Check the results. 12852 double expected_scvtf_base = RawbitsToDouble(expected_scvtf_bits); 12853 double expected_ucvtf_base = RawbitsToDouble(expected_ucvtf_bits); 12854 12855 for (int fbits = 0; fbits <= 32; fbits++) { 12856 double expected_scvtf = expected_scvtf_base / std::pow(2, fbits); 12857 double expected_ucvtf = expected_ucvtf_base / std::pow(2, fbits); 12858 ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_x[fbits]); 12859 ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_x[fbits]); 12860 if (cvtf_s32) ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_w[fbits]); 12861 if (cvtf_u32) ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_w[fbits]); 12862 } 12863 for (int fbits = 33; fbits <= 64; fbits++) { 12864 double expected_scvtf = expected_scvtf_base / std::pow(2, fbits); 12865 double expected_ucvtf = expected_ucvtf_base / std::pow(2, fbits); 12866 ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_x[fbits]); 12867 ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_x[fbits]); 12868 } 12869 12870 TEARDOWN(); 12871 } 12872 12873 12874 TEST(scvtf_ucvtf_double) { 12875 // Simple conversions of positive numbers which require no rounding; the 12876 // results should not depened on the rounding mode, and ucvtf and scvtf should 12877 // produce the same result. 12878 TestUScvtfHelper(0x0000000000000000, 0x0000000000000000, 0x0000000000000000); 12879 TestUScvtfHelper(0x0000000000000001, 0x3ff0000000000000, 0x3ff0000000000000); 12880 TestUScvtfHelper(0x0000000040000000, 0x41d0000000000000, 0x41d0000000000000); 12881 TestUScvtfHelper(0x0000000100000000, 0x41f0000000000000, 0x41f0000000000000); 12882 TestUScvtfHelper(0x4000000000000000, 0x43d0000000000000, 0x43d0000000000000); 12883 // Test mantissa extremities. 12884 TestUScvtfHelper(0x4000000000000400, 0x43d0000000000001, 0x43d0000000000001); 12885 // The largest int32_t that fits in a double. 12886 TestUScvtfHelper(0x000000007fffffff, 0x41dfffffffc00000, 0x41dfffffffc00000); 12887 // Values that would be negative if treated as an int32_t. 12888 TestUScvtfHelper(0x00000000ffffffff, 0x41efffffffe00000, 0x41efffffffe00000); 12889 TestUScvtfHelper(0x0000000080000000, 0x41e0000000000000, 0x41e0000000000000); 12890 TestUScvtfHelper(0x0000000080000001, 0x41e0000000200000, 0x41e0000000200000); 12891 // The largest int64_t that fits in a double. 12892 TestUScvtfHelper(0x7ffffffffffffc00, 0x43dfffffffffffff, 0x43dfffffffffffff); 12893 // Check for bit pattern reproduction. 12894 TestUScvtfHelper(0x0123456789abcde0, 0x43723456789abcde, 0x43723456789abcde); 12895 TestUScvtfHelper(0x0000000012345678, 0x41b2345678000000, 0x41b2345678000000); 12896 12897 // Simple conversions of negative int64_t values. These require no rounding, 12898 // and the results should not depend on the rounding mode. 12899 TestUScvtfHelper(0xffffffffc0000000, 0xc1d0000000000000, 0x43effffffff80000); 12900 TestUScvtfHelper(0xffffffff00000000, 0xc1f0000000000000, 0x43efffffffe00000); 12901 TestUScvtfHelper(0xc000000000000000, 0xc3d0000000000000, 0x43e8000000000000); 12902 12903 // Conversions which require rounding. 12904 TestUScvtfHelper(0x1000000000000000, 0x43b0000000000000, 0x43b0000000000000); 12905 TestUScvtfHelper(0x1000000000000001, 0x43b0000000000000, 0x43b0000000000000); 12906 TestUScvtfHelper(0x1000000000000080, 0x43b0000000000000, 0x43b0000000000000); 12907 TestUScvtfHelper(0x1000000000000081, 0x43b0000000000001, 0x43b0000000000001); 12908 TestUScvtfHelper(0x1000000000000100, 0x43b0000000000001, 0x43b0000000000001); 12909 TestUScvtfHelper(0x1000000000000101, 0x43b0000000000001, 0x43b0000000000001); 12910 TestUScvtfHelper(0x1000000000000180, 0x43b0000000000002, 0x43b0000000000002); 12911 TestUScvtfHelper(0x1000000000000181, 0x43b0000000000002, 0x43b0000000000002); 12912 TestUScvtfHelper(0x1000000000000200, 0x43b0000000000002, 0x43b0000000000002); 12913 TestUScvtfHelper(0x1000000000000201, 0x43b0000000000002, 0x43b0000000000002); 12914 TestUScvtfHelper(0x1000000000000280, 0x43b0000000000002, 0x43b0000000000002); 12915 TestUScvtfHelper(0x1000000000000281, 0x43b0000000000003, 0x43b0000000000003); 12916 TestUScvtfHelper(0x1000000000000300, 0x43b0000000000003, 0x43b0000000000003); 12917 // Check rounding of negative int64_t values (and large uint64_t values). 12918 TestUScvtfHelper(0x8000000000000000, 0xc3e0000000000000, 0x43e0000000000000); 12919 TestUScvtfHelper(0x8000000000000001, 0xc3e0000000000000, 0x43e0000000000000); 12920 TestUScvtfHelper(0x8000000000000200, 0xc3e0000000000000, 0x43e0000000000000); 12921 TestUScvtfHelper(0x8000000000000201, 0xc3dfffffffffffff, 0x43e0000000000000); 12922 TestUScvtfHelper(0x8000000000000400, 0xc3dfffffffffffff, 0x43e0000000000000); 12923 TestUScvtfHelper(0x8000000000000401, 0xc3dfffffffffffff, 0x43e0000000000001); 12924 TestUScvtfHelper(0x8000000000000600, 0xc3dffffffffffffe, 0x43e0000000000001); 12925 TestUScvtfHelper(0x8000000000000601, 0xc3dffffffffffffe, 0x43e0000000000001); 12926 TestUScvtfHelper(0x8000000000000800, 0xc3dffffffffffffe, 0x43e0000000000001); 12927 TestUScvtfHelper(0x8000000000000801, 0xc3dffffffffffffe, 0x43e0000000000001); 12928 TestUScvtfHelper(0x8000000000000a00, 0xc3dffffffffffffe, 0x43e0000000000001); 12929 TestUScvtfHelper(0x8000000000000a01, 0xc3dffffffffffffd, 0x43e0000000000001); 12930 TestUScvtfHelper(0x8000000000000c00, 0xc3dffffffffffffd, 0x43e0000000000002); 12931 // Round up to produce a result that's too big for the input to represent. 12932 TestUScvtfHelper(0x7ffffffffffffe00, 0x43e0000000000000, 0x43e0000000000000); 12933 TestUScvtfHelper(0x7fffffffffffffff, 0x43e0000000000000, 0x43e0000000000000); 12934 TestUScvtfHelper(0xfffffffffffffc00, 0xc090000000000000, 0x43f0000000000000); 12935 TestUScvtfHelper(0xffffffffffffffff, 0xbff0000000000000, 0x43f0000000000000); 12936 } 12937 12938 12939 // The same as TestUScvtfHelper, but convert to floats. 12940 static void TestUScvtf32Helper(uint64_t in, 12941 uint32_t expected_scvtf_bits, 12942 uint32_t expected_ucvtf_bits) { 12943 uint64_t u64 = in; 12944 uint32_t u32 = u64 & 0xffffffff; 12945 int64_t s64 = static_cast<int64_t>(in); 12946 int32_t s32 = s64 & 0x7fffffff; 12947 12948 bool cvtf_s32 = (s64 == s32); 12949 bool cvtf_u32 = (u64 == u32); 12950 12951 float results_scvtf_x[65]; 12952 float results_ucvtf_x[65]; 12953 float results_scvtf_w[33]; 12954 float results_ucvtf_w[33]; 12955 12956 SETUP(); 12957 START(); 12958 12959 __ Mov(x0, reinterpret_cast<uintptr_t>(results_scvtf_x)); 12960 __ Mov(x1, reinterpret_cast<uintptr_t>(results_ucvtf_x)); 12961 __ Mov(x2, reinterpret_cast<uintptr_t>(results_scvtf_w)); 12962 __ Mov(x3, reinterpret_cast<uintptr_t>(results_ucvtf_w)); 12963 12964 __ Mov(x10, s64); 12965 12966 // Corrupt the top word, in case it is accidentally used during W-register 12967 // conversions. 12968 __ Mov(x11, 0x5555555555555555); 12969 __ Bfi(x11, x10, 0, kWRegSize); 12970 12971 // Test integer conversions. 12972 __ Scvtf(s0, x10); 12973 __ Ucvtf(s1, x10); 12974 __ Scvtf(s2, w11); 12975 __ Ucvtf(s3, w11); 12976 __ Str(s0, MemOperand(x0)); 12977 __ Str(s1, MemOperand(x1)); 12978 __ Str(s2, MemOperand(x2)); 12979 __ Str(s3, MemOperand(x3)); 12980 12981 // Test all possible values of fbits. 12982 for (int fbits = 1; fbits <= 32; fbits++) { 12983 __ Scvtf(s0, x10, fbits); 12984 __ Ucvtf(s1, x10, fbits); 12985 __ Scvtf(s2, w11, fbits); 12986 __ Ucvtf(s3, w11, fbits); 12987 __ Str(s0, MemOperand(x0, fbits * kSRegSizeInBytes)); 12988 __ Str(s1, MemOperand(x1, fbits * kSRegSizeInBytes)); 12989 __ Str(s2, MemOperand(x2, fbits * kSRegSizeInBytes)); 12990 __ Str(s3, MemOperand(x3, fbits * kSRegSizeInBytes)); 12991 } 12992 12993 // Conversions from W registers can only handle fbits values <= 32, so just 12994 // test conversions from X registers for 32 < fbits <= 64. 12995 for (int fbits = 33; fbits <= 64; fbits++) { 12996 __ Scvtf(s0, x10, fbits); 12997 __ Ucvtf(s1, x10, fbits); 12998 __ Str(s0, MemOperand(x0, fbits * kSRegSizeInBytes)); 12999 __ Str(s1, MemOperand(x1, fbits * kSRegSizeInBytes)); 13000 } 13001 13002 END(); 13003 RUN(); 13004 13005 // Check the results. 13006 float expected_scvtf_base = RawbitsToFloat(expected_scvtf_bits); 13007 float expected_ucvtf_base = RawbitsToFloat(expected_ucvtf_bits); 13008 13009 for (int fbits = 0; fbits <= 32; fbits++) { 13010 float expected_scvtf = expected_scvtf_base / std::pow(2.0f, fbits); 13011 float expected_ucvtf = expected_ucvtf_base / std::pow(2.0f, fbits); 13012 ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_x[fbits]); 13013 ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_x[fbits]); 13014 if (cvtf_s32) ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_w[fbits]); 13015 if (cvtf_u32) ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_w[fbits]); 13016 } 13017 for (int fbits = 33; fbits <= 64; fbits++) { 13018 float expected_scvtf = expected_scvtf_base / std::pow(2.0f, fbits); 13019 float expected_ucvtf = expected_ucvtf_base / std::pow(2.0f, fbits); 13020 ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_x[fbits]); 13021 ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_x[fbits]); 13022 } 13023 13024 TEARDOWN(); 13025 } 13026 13027 13028 TEST(scvtf_ucvtf_float) { 13029 // Simple conversions of positive numbers which require no rounding; the 13030 // results should not depened on the rounding mode, and ucvtf and scvtf should 13031 // produce the same result. 13032 TestUScvtf32Helper(0x0000000000000000, 0x00000000, 0x00000000); 13033 TestUScvtf32Helper(0x0000000000000001, 0x3f800000, 0x3f800000); 13034 TestUScvtf32Helper(0x0000000040000000, 0x4e800000, 0x4e800000); 13035 TestUScvtf32Helper(0x0000000100000000, 0x4f800000, 0x4f800000); 13036 TestUScvtf32Helper(0x4000000000000000, 0x5e800000, 0x5e800000); 13037 // Test mantissa extremities. 13038 TestUScvtf32Helper(0x0000000000800001, 0x4b000001, 0x4b000001); 13039 TestUScvtf32Helper(0x4000008000000000, 0x5e800001, 0x5e800001); 13040 // The largest int32_t that fits in a float. 13041 TestUScvtf32Helper(0x000000007fffff80, 0x4effffff, 0x4effffff); 13042 // Values that would be negative if treated as an int32_t. 13043 TestUScvtf32Helper(0x00000000ffffff00, 0x4f7fffff, 0x4f7fffff); 13044 TestUScvtf32Helper(0x0000000080000000, 0x4f000000, 0x4f000000); 13045 TestUScvtf32Helper(0x0000000080000100, 0x4f000001, 0x4f000001); 13046 // The largest int64_t that fits in a float. 13047 TestUScvtf32Helper(0x7fffff8000000000, 0x5effffff, 0x5effffff); 13048 // Check for bit pattern reproduction. 13049 TestUScvtf32Helper(0x0000000000876543, 0x4b076543, 0x4b076543); 13050 13051 // Simple conversions of negative int64_t values. These require no rounding, 13052 // and the results should not depend on the rounding mode. 13053 TestUScvtf32Helper(0xfffffc0000000000, 0xd4800000, 0x5f7ffffc); 13054 TestUScvtf32Helper(0xc000000000000000, 0xde800000, 0x5f400000); 13055 13056 // Conversions which require rounding. 13057 TestUScvtf32Helper(0x0000800000000000, 0x57000000, 0x57000000); 13058 TestUScvtf32Helper(0x0000800000000001, 0x57000000, 0x57000000); 13059 TestUScvtf32Helper(0x0000800000800000, 0x57000000, 0x57000000); 13060 TestUScvtf32Helper(0x0000800000800001, 0x57000001, 0x57000001); 13061 TestUScvtf32Helper(0x0000800001000000, 0x57000001, 0x57000001); 13062 TestUScvtf32Helper(0x0000800001000001, 0x57000001, 0x57000001); 13063 TestUScvtf32Helper(0x0000800001800000, 0x57000002, 0x57000002); 13064 TestUScvtf32Helper(0x0000800001800001, 0x57000002, 0x57000002); 13065 TestUScvtf32Helper(0x0000800002000000, 0x57000002, 0x57000002); 13066 TestUScvtf32Helper(0x0000800002000001, 0x57000002, 0x57000002); 13067 TestUScvtf32Helper(0x0000800002800000, 0x57000002, 0x57000002); 13068 TestUScvtf32Helper(0x0000800002800001, 0x57000003, 0x57000003); 13069 TestUScvtf32Helper(0x0000800003000000, 0x57000003, 0x57000003); 13070 // Check rounding of negative int64_t values (and large uint64_t values). 13071 TestUScvtf32Helper(0x8000000000000000, 0xdf000000, 0x5f000000); 13072 TestUScvtf32Helper(0x8000000000000001, 0xdf000000, 0x5f000000); 13073 TestUScvtf32Helper(0x8000004000000000, 0xdf000000, 0x5f000000); 13074 TestUScvtf32Helper(0x8000004000000001, 0xdeffffff, 0x5f000000); 13075 TestUScvtf32Helper(0x8000008000000000, 0xdeffffff, 0x5f000000); 13076 TestUScvtf32Helper(0x8000008000000001, 0xdeffffff, 0x5f000001); 13077 TestUScvtf32Helper(0x800000c000000000, 0xdefffffe, 0x5f000001); 13078 TestUScvtf32Helper(0x800000c000000001, 0xdefffffe, 0x5f000001); 13079 TestUScvtf32Helper(0x8000010000000000, 0xdefffffe, 0x5f000001); 13080 TestUScvtf32Helper(0x8000010000000001, 0xdefffffe, 0x5f000001); 13081 TestUScvtf32Helper(0x8000014000000000, 0xdefffffe, 0x5f000001); 13082 TestUScvtf32Helper(0x8000014000000001, 0xdefffffd, 0x5f000001); 13083 TestUScvtf32Helper(0x8000018000000000, 0xdefffffd, 0x5f000002); 13084 // Round up to produce a result that's too big for the input to represent. 13085 TestUScvtf32Helper(0x000000007fffffc0, 0x4f000000, 0x4f000000); 13086 TestUScvtf32Helper(0x000000007fffffff, 0x4f000000, 0x4f000000); 13087 TestUScvtf32Helper(0x00000000ffffff80, 0x4f800000, 0x4f800000); 13088 TestUScvtf32Helper(0x00000000ffffffff, 0x4f800000, 0x4f800000); 13089 TestUScvtf32Helper(0x7fffffc000000000, 0x5f000000, 0x5f000000); 13090 TestUScvtf32Helper(0x7fffffffffffffff, 0x5f000000, 0x5f000000); 13091 TestUScvtf32Helper(0xffffff8000000000, 0xd3000000, 0x5f800000); 13092 TestUScvtf32Helper(0xffffffffffffffff, 0xbf800000, 0x5f800000); 13093 } 13094 13095 13096 TEST(system_mrs) { 13097 SETUP(); 13098 13099 START(); 13100 __ Mov(w0, 0); 13101 __ Mov(w1, 1); 13102 __ Mov(w2, 0x80000000); 13103 13104 // Set the Z and C flags. 13105 __ Cmp(w0, w0); 13106 __ Mrs(x3, NZCV); 13107 13108 // Set the N flag. 13109 __ Cmp(w0, w1); 13110 __ Mrs(x4, NZCV); 13111 13112 // Set the Z, C and V flags. 13113 __ Adds(w0, w2, w2); 13114 __ Mrs(x5, NZCV); 13115 13116 // Read the default FPCR. 13117 __ Mrs(x6, FPCR); 13118 END(); 13119 13120 RUN(); 13121 13122 // NZCV 13123 ASSERT_EQUAL_32(ZCFlag, w3); 13124 ASSERT_EQUAL_32(NFlag, w4); 13125 ASSERT_EQUAL_32(ZCVFlag, w5); 13126 13127 // FPCR 13128 // The default FPCR on Linux-based platforms is 0. 13129 ASSERT_EQUAL_32(0, w6); 13130 13131 TEARDOWN(); 13132 } 13133 13134 13135 TEST(system_msr) { 13136 // All FPCR fields that must be implemented: AHP, DN, FZ, RMode 13137 const uint64_t fpcr_core = 0x07c00000; 13138 13139 // All FPCR fields (including fields which may be read-as-zero): 13140 // Stride, Len 13141 // IDE, IXE, UFE, OFE, DZE, IOE 13142 const uint64_t fpcr_all = fpcr_core | 0x00379f00; 13143 13144 SETUP(); 13145 13146 START(); 13147 __ Mov(w0, 0); 13148 __ Mov(w1, 0x7fffffff); 13149 13150 __ Mov(x7, 0); 13151 13152 __ Mov(x10, NVFlag); 13153 __ Cmp(w0, w0); // Set Z and C. 13154 __ Msr(NZCV, x10); // Set N and V. 13155 // The Msr should have overwritten every flag set by the Cmp. 13156 __ Cinc(x7, x7, mi); // N 13157 __ Cinc(x7, x7, ne); // !Z 13158 __ Cinc(x7, x7, lo); // !C 13159 __ Cinc(x7, x7, vs); // V 13160 13161 __ Mov(x10, ZCFlag); 13162 __ Cmn(w1, w1); // Set N and V. 13163 __ Msr(NZCV, x10); // Set Z and C. 13164 // The Msr should have overwritten every flag set by the Cmn. 13165 __ Cinc(x7, x7, pl); // !N 13166 __ Cinc(x7, x7, eq); // Z 13167 __ Cinc(x7, x7, hs); // C 13168 __ Cinc(x7, x7, vc); // !V 13169 13170 // All core FPCR fields must be writable. 13171 __ Mov(x8, fpcr_core); 13172 __ Msr(FPCR, x8); 13173 __ Mrs(x8, FPCR); 13174 13175 // All FPCR fields, including optional ones. This part of the test doesn't 13176 // achieve much other than ensuring that supported fields can be cleared by 13177 // the next test. 13178 __ Mov(x9, fpcr_all); 13179 __ Msr(FPCR, x9); 13180 __ Mrs(x9, FPCR); 13181 __ And(x9, x9, fpcr_core); 13182 13183 // The undefined bits must ignore writes. 13184 // It's conceivable that a future version of the architecture could use these 13185 // fields (making this test fail), but in the meantime this is a useful test 13186 // for the simulator. 13187 __ Mov(x10, ~fpcr_all); 13188 __ Msr(FPCR, x10); 13189 __ Mrs(x10, FPCR); 13190 13191 END(); 13192 13193 RUN(); 13194 13195 // We should have incremented x7 (from 0) exactly 8 times. 13196 ASSERT_EQUAL_64(8, x7); 13197 13198 ASSERT_EQUAL_64(fpcr_core, x8); 13199 ASSERT_EQUAL_64(fpcr_core, x9); 13200 ASSERT_EQUAL_64(0, x10); 13201 13202 TEARDOWN(); 13203 } 13204 13205 13206 TEST(system_nop) { 13207 SETUP(); 13208 RegisterDump before; 13209 13210 START(); 13211 before.Dump(&masm); 13212 __ Nop(); 13213 END(); 13214 13215 RUN(); 13216 13217 ASSERT_EQUAL_REGISTERS(before); 13218 ASSERT_EQUAL_NZCV(before.flags_nzcv()); 13219 13220 TEARDOWN(); 13221 } 13222 13223 13224 TEST(zero_dest) { 13225 SETUP(); 13226 RegisterDump before; 13227 13228 START(); 13229 // Preserve the stack pointer, in case we clobber it. 13230 __ Mov(x30, sp); 13231 // Initialize the other registers used in this test. 13232 uint64_t literal_base = 0x0100001000100101; 13233 __ Mov(x0, 0); 13234 __ Mov(x1, literal_base); 13235 for (unsigned i = 2; i < x30.GetCode(); i++) { 13236 __ Add(Register::GetXRegFromCode(i), Register::GetXRegFromCode(i - 1), x1); 13237 } 13238 before.Dump(&masm); 13239 13240 // All of these instructions should be NOPs in these forms, but have 13241 // alternate forms which can write into the stack pointer. 13242 { 13243 ExactAssemblyScope scope(&masm, 3 * 7 * kInstructionSize); 13244 __ add(xzr, x0, x1); 13245 __ add(xzr, x1, xzr); 13246 __ add(xzr, xzr, x1); 13247 13248 __ and_(xzr, x0, x2); 13249 __ and_(xzr, x2, xzr); 13250 __ and_(xzr, xzr, x2); 13251 13252 __ bic(xzr, x0, x3); 13253 __ bic(xzr, x3, xzr); 13254 __ bic(xzr, xzr, x3); 13255 13256 __ eon(xzr, x0, x4); 13257 __ eon(xzr, x4, xzr); 13258 __ eon(xzr, xzr, x4); 13259 13260 __ eor(xzr, x0, x5); 13261 __ eor(xzr, x5, xzr); 13262 __ eor(xzr, xzr, x5); 13263 13264 __ orr(xzr, x0, x6); 13265 __ orr(xzr, x6, xzr); 13266 __ orr(xzr, xzr, x6); 13267 13268 __ sub(xzr, x0, x7); 13269 __ sub(xzr, x7, xzr); 13270 __ sub(xzr, xzr, x7); 13271 } 13272 13273 // Swap the saved stack pointer with the real one. If sp was written 13274 // during the test, it will show up in x30. This is done because the test 13275 // framework assumes that sp will be valid at the end of the test. 13276 __ Mov(x29, x30); 13277 __ Mov(x30, sp); 13278 __ Mov(sp, x29); 13279 // We used x29 as a scratch register, so reset it to make sure it doesn't 13280 // trigger a test failure. 13281 __ Add(x29, x28, x1); 13282 END(); 13283 13284 RUN(); 13285 13286 ASSERT_EQUAL_REGISTERS(before); 13287 ASSERT_EQUAL_NZCV(before.flags_nzcv()); 13288 13289 TEARDOWN(); 13290 } 13291 13292 13293 TEST(zero_dest_setflags) { 13294 SETUP(); 13295 RegisterDump before; 13296 13297 START(); 13298 // Preserve the stack pointer, in case we clobber it. 13299 __ Mov(x30, sp); 13300 // Initialize the other registers used in this test. 13301 uint64_t literal_base = 0x0100001000100101; 13302 __ Mov(x0, 0); 13303 __ Mov(x1, literal_base); 13304 for (int i = 2; i < 30; i++) { 13305 __ Add(Register::GetXRegFromCode(i), Register::GetXRegFromCode(i - 1), x1); 13306 } 13307 before.Dump(&masm); 13308 13309 // All of these instructions should only write to the flags in these forms, 13310 // but have alternate forms which can write into the stack pointer. 13311 { 13312 ExactAssemblyScope scope(&masm, 6 * kInstructionSize); 13313 __ adds(xzr, x0, Operand(x1, UXTX)); 13314 __ adds(xzr, x1, Operand(xzr, UXTX)); 13315 __ adds(xzr, x1, 1234); 13316 __ adds(xzr, x0, x1); 13317 __ adds(xzr, x1, xzr); 13318 __ adds(xzr, xzr, x1); 13319 } 13320 13321 { 13322 ExactAssemblyScope scope(&masm, 5 * kInstructionSize); 13323 __ ands(xzr, x2, ~0xf); 13324 __ ands(xzr, xzr, ~0xf); 13325 __ ands(xzr, x0, x2); 13326 __ ands(xzr, x2, xzr); 13327 __ ands(xzr, xzr, x2); 13328 } 13329 13330 { 13331 ExactAssemblyScope scope(&masm, 5 * kInstructionSize); 13332 __ bics(xzr, x3, ~0xf); 13333 __ bics(xzr, xzr, ~0xf); 13334 __ bics(xzr, x0, x3); 13335 __ bics(xzr, x3, xzr); 13336 __ bics(xzr, xzr, x3); 13337 } 13338 13339 { 13340 ExactAssemblyScope scope(&masm, 6 * kInstructionSize); 13341 __ subs(xzr, x0, Operand(x3, UXTX)); 13342 __ subs(xzr, x3, Operand(xzr, UXTX)); 13343 __ subs(xzr, x3, 1234); 13344 __ subs(xzr, x0, x3); 13345 __ subs(xzr, x3, xzr); 13346 __ subs(xzr, xzr, x3); 13347 } 13348 13349 // Swap the saved stack pointer with the real one. If sp was written 13350 // during the test, it will show up in x30. This is done because the test 13351 // framework assumes that sp will be valid at the end of the test. 13352 __ Mov(x29, x30); 13353 __ Mov(x30, sp); 13354 __ Mov(sp, x29); 13355 // We used x29 as a scratch register, so reset it to make sure it doesn't 13356 // trigger a test failure. 13357 __ Add(x29, x28, x1); 13358 END(); 13359 13360 RUN(); 13361 13362 ASSERT_EQUAL_REGISTERS(before); 13363 13364 TEARDOWN(); 13365 } 13366 13367 13368 TEST(register_bit) { 13369 // No code generation takes place in this test, so no need to setup and 13370 // teardown. 13371 13372 // Simple tests. 13373 VIXL_CHECK(x0.GetBit() == (UINT64_C(1) << 0)); 13374 VIXL_CHECK(x1.GetBit() == (UINT64_C(1) << 1)); 13375 VIXL_CHECK(x10.GetBit() == (UINT64_C(1) << 10)); 13376 13377 // AAPCS64 definitions. 13378 VIXL_CHECK(lr.GetBit() == (UINT64_C(1) << kLinkRegCode)); 13379 13380 // Fixed (hardware) definitions. 13381 VIXL_CHECK(xzr.GetBit() == (UINT64_C(1) << kZeroRegCode)); 13382 13383 // Internal ABI definitions. 13384 VIXL_CHECK(sp.GetBit() == (UINT64_C(1) << kSPRegInternalCode)); 13385 VIXL_CHECK(sp.GetBit() != xzr.GetBit()); 13386 13387 // xn.GetBit() == wn.GetBit() at all times, for the same n. 13388 VIXL_CHECK(x0.GetBit() == w0.GetBit()); 13389 VIXL_CHECK(x1.GetBit() == w1.GetBit()); 13390 VIXL_CHECK(x10.GetBit() == w10.GetBit()); 13391 VIXL_CHECK(xzr.GetBit() == wzr.GetBit()); 13392 VIXL_CHECK(sp.GetBit() == wsp.GetBit()); 13393 } 13394 13395 13396 TEST(stack_pointer_override) { 13397 // This test generates some stack maintenance code, but the test only checks 13398 // the reported state. 13399 SETUP(); 13400 START(); 13401 13402 // The default stack pointer in VIXL is sp. 13403 VIXL_CHECK(sp.Is(__ StackPointer())); 13404 __ SetStackPointer(x0); 13405 VIXL_CHECK(x0.Is(__ StackPointer())); 13406 __ SetStackPointer(x28); 13407 VIXL_CHECK(x28.Is(__ StackPointer())); 13408 __ SetStackPointer(sp); 13409 VIXL_CHECK(sp.Is(__ StackPointer())); 13410 13411 END(); 13412 RUN(); 13413 TEARDOWN(); 13414 } 13415 13416 13417 TEST(peek_poke_simple) { 13418 SETUP(); 13419 START(); 13420 13421 static const RegList x0_to_x3 = 13422 x0.GetBit() | x1.GetBit() | x2.GetBit() | x3.GetBit(); 13423 static const RegList x10_to_x13 = 13424 x10.GetBit() | x11.GetBit() | x12.GetBit() | x13.GetBit(); 13425 13426 // The literal base is chosen to have two useful properties: 13427 // * When multiplied by small values (such as a register index), this value 13428 // is clearly readable in the result. 13429 // * The value is not formed from repeating fixed-size smaller values, so it 13430 // can be used to detect endianness-related errors. 13431 uint64_t literal_base = 0x0100001000100101; 13432 13433 // Initialize the registers. 13434 __ Mov(x0, literal_base); 13435 __ Add(x1, x0, x0); 13436 __ Add(x2, x1, x0); 13437 __ Add(x3, x2, x0); 13438 13439 __ Claim(32); 13440 13441 // Simple exchange. 13442 // After this test: 13443 // x0-x3 should be unchanged. 13444 // w10-w13 should contain the lower words of x0-x3. 13445 __ Poke(x0, 0); 13446 __ Poke(x1, 8); 13447 __ Poke(x2, 16); 13448 __ Poke(x3, 24); 13449 Clobber(&masm, x0_to_x3); 13450 __ Peek(x0, 0); 13451 __ Peek(x1, 8); 13452 __ Peek(x2, 16); 13453 __ Peek(x3, 24); 13454 13455 __ Poke(w0, 0); 13456 __ Poke(w1, 4); 13457 __ Poke(w2, 8); 13458 __ Poke(w3, 12); 13459 Clobber(&masm, x10_to_x13); 13460 __ Peek(w10, 0); 13461 __ Peek(w11, 4); 13462 __ Peek(w12, 8); 13463 __ Peek(w13, 12); 13464 13465 __ Drop(32); 13466 13467 END(); 13468 RUN(); 13469 13470 ASSERT_EQUAL_64(literal_base * 1, x0); 13471 ASSERT_EQUAL_64(literal_base * 2, x1); 13472 ASSERT_EQUAL_64(literal_base * 3, x2); 13473 ASSERT_EQUAL_64(literal_base * 4, x3); 13474 13475 ASSERT_EQUAL_64((literal_base * 1) & 0xffffffff, x10); 13476 ASSERT_EQUAL_64((literal_base * 2) & 0xffffffff, x11); 13477 ASSERT_EQUAL_64((literal_base * 3) & 0xffffffff, x12); 13478 ASSERT_EQUAL_64((literal_base * 4) & 0xffffffff, x13); 13479 13480 TEARDOWN(); 13481 } 13482 13483 13484 TEST(peek_poke_unaligned) { 13485 SETUP(); 13486 START(); 13487 13488 // The literal base is chosen to have two useful properties: 13489 // * When multiplied by small values (such as a register index), this value 13490 // is clearly readable in the result. 13491 // * The value is not formed from repeating fixed-size smaller values, so it 13492 // can be used to detect endianness-related errors. 13493 uint64_t literal_base = 0x0100001000100101; 13494 13495 // Initialize the registers. 13496 __ Mov(x0, literal_base); 13497 __ Add(x1, x0, x0); 13498 __ Add(x2, x1, x0); 13499 __ Add(x3, x2, x0); 13500 __ Add(x4, x3, x0); 13501 __ Add(x5, x4, x0); 13502 __ Add(x6, x5, x0); 13503 13504 __ Claim(32); 13505 13506 // Unaligned exchanges. 13507 // After this test: 13508 // x0-x6 should be unchanged. 13509 // w10-w12 should contain the lower words of x0-x2. 13510 __ Poke(x0, 1); 13511 Clobber(&masm, x0.GetBit()); 13512 __ Peek(x0, 1); 13513 __ Poke(x1, 2); 13514 Clobber(&masm, x1.GetBit()); 13515 __ Peek(x1, 2); 13516 __ Poke(x2, 3); 13517 Clobber(&masm, x2.GetBit()); 13518 __ Peek(x2, 3); 13519 __ Poke(x3, 4); 13520 Clobber(&masm, x3.GetBit()); 13521 __ Peek(x3, 4); 13522 __ Poke(x4, 5); 13523 Clobber(&masm, x4.GetBit()); 13524 __ Peek(x4, 5); 13525 __ Poke(x5, 6); 13526 Clobber(&masm, x5.GetBit()); 13527 __ Peek(x5, 6); 13528 __ Poke(x6, 7); 13529 Clobber(&masm, x6.GetBit()); 13530 __ Peek(x6, 7); 13531 13532 __ Poke(w0, 1); 13533 Clobber(&masm, w10.GetBit()); 13534 __ Peek(w10, 1); 13535 __ Poke(w1, 2); 13536 Clobber(&masm, w11.GetBit()); 13537 __ Peek(w11, 2); 13538 __ Poke(w2, 3); 13539 Clobber(&masm, w12.GetBit()); 13540 __ Peek(w12, 3); 13541 13542 __ Drop(32); 13543 13544 END(); 13545 RUN(); 13546 13547 ASSERT_EQUAL_64(literal_base * 1, x0); 13548 ASSERT_EQUAL_64(literal_base * 2, x1); 13549 ASSERT_EQUAL_64(literal_base * 3, x2); 13550 ASSERT_EQUAL_64(literal_base * 4, x3); 13551 ASSERT_EQUAL_64(literal_base * 5, x4); 13552 ASSERT_EQUAL_64(literal_base * 6, x5); 13553 ASSERT_EQUAL_64(literal_base * 7, x6); 13554 13555 ASSERT_EQUAL_64((literal_base * 1) & 0xffffffff, x10); 13556 ASSERT_EQUAL_64((literal_base * 2) & 0xffffffff, x11); 13557 ASSERT_EQUAL_64((literal_base * 3) & 0xffffffff, x12); 13558 13559 TEARDOWN(); 13560 } 13561 13562 13563 TEST(peek_poke_endianness) { 13564 SETUP(); 13565 START(); 13566 13567 // The literal base is chosen to have two useful properties: 13568 // * When multiplied by small values (such as a register index), this value 13569 // is clearly readable in the result. 13570 // * The value is not formed from repeating fixed-size smaller values, so it 13571 // can be used to detect endianness-related errors. 13572 uint64_t literal_base = 0x0100001000100101; 13573 13574 // Initialize the registers. 13575 __ Mov(x0, literal_base); 13576 __ Add(x1, x0, x0); 13577 13578 __ Claim(32); 13579 13580 // Endianness tests. 13581 // After this section: 13582 // x4 should match x0[31:0]:x0[63:32] 13583 // w5 should match w1[15:0]:w1[31:16] 13584 __ Poke(x0, 0); 13585 __ Poke(x0, 8); 13586 __ Peek(x4, 4); 13587 13588 __ Poke(w1, 0); 13589 __ Poke(w1, 4); 13590 __ Peek(w5, 2); 13591 13592 __ Drop(32); 13593 13594 END(); 13595 RUN(); 13596 13597 uint64_t x0_expected = literal_base * 1; 13598 uint64_t x1_expected = literal_base * 2; 13599 uint64_t x4_expected = (x0_expected << 32) | (x0_expected >> 32); 13600 uint64_t x5_expected = 13601 ((x1_expected << 16) & 0xffff0000) | ((x1_expected >> 16) & 0x0000ffff); 13602 13603 ASSERT_EQUAL_64(x0_expected, x0); 13604 ASSERT_EQUAL_64(x1_expected, x1); 13605 ASSERT_EQUAL_64(x4_expected, x4); 13606 ASSERT_EQUAL_64(x5_expected, x5); 13607 13608 TEARDOWN(); 13609 } 13610 13611 13612 TEST(peek_poke_mixed) { 13613 SETUP(); 13614 START(); 13615 13616 // Acquire all temps from the MacroAssembler. They are used arbitrarily below. 13617 UseScratchRegisterScope temps(&masm); 13618 temps.ExcludeAll(); 13619 13620 // The literal base is chosen to have two useful properties: 13621 // * When multiplied by small values (such as a register index), this value 13622 // is clearly readable in the result. 13623 // * The value is not formed from repeating fixed-size smaller values, so it 13624 // can be used to detect endianness-related errors. 13625 uint64_t literal_base = 0x0100001000100101; 13626 13627 // Initialize the registers. 13628 __ Mov(x0, literal_base); 13629 __ Add(x1, x0, x0); 13630 __ Add(x2, x1, x0); 13631 __ Add(x3, x2, x0); 13632 13633 __ Claim(32); 13634 13635 // Mix with other stack operations. 13636 // After this section: 13637 // x0-x3 should be unchanged. 13638 // x6 should match x1[31:0]:x0[63:32] 13639 // w7 should match x1[15:0]:x0[63:48] 13640 __ Poke(x1, 8); 13641 __ Poke(x0, 0); 13642 { 13643 VIXL_ASSERT(__ StackPointer().Is(sp)); 13644 __ Mov(x4, __ StackPointer()); 13645 __ SetStackPointer(x4); 13646 13647 __ Poke(wzr, 0); // Clobber the space we're about to drop. 13648 __ Drop(4); 13649 __ Peek(x6, 0); 13650 __ Claim(8); 13651 __ Peek(w7, 10); 13652 __ Poke(x3, 28); 13653 __ Poke(xzr, 0); // Clobber the space we're about to drop. 13654 __ Drop(8); 13655 __ Poke(x2, 12); 13656 __ Push(w0); 13657 13658 __ Mov(sp, __ StackPointer()); 13659 __ SetStackPointer(sp); 13660 } 13661 13662 __ Pop(x0, x1, x2, x3); 13663 13664 END(); 13665 RUN(); 13666 13667 uint64_t x0_expected = literal_base * 1; 13668 uint64_t x1_expected = literal_base * 2; 13669 uint64_t x2_expected = literal_base * 3; 13670 uint64_t x3_expected = literal_base * 4; 13671 uint64_t x6_expected = (x1_expected << 32) | (x0_expected >> 32); 13672 uint64_t x7_expected = 13673 ((x1_expected << 16) & 0xffff0000) | ((x0_expected >> 48) & 0x0000ffff); 13674 13675 ASSERT_EQUAL_64(x0_expected, x0); 13676 ASSERT_EQUAL_64(x1_expected, x1); 13677 ASSERT_EQUAL_64(x2_expected, x2); 13678 ASSERT_EQUAL_64(x3_expected, x3); 13679 ASSERT_EQUAL_64(x6_expected, x6); 13680 ASSERT_EQUAL_64(x7_expected, x7); 13681 13682 TEARDOWN(); 13683 } 13684 13685 13686 TEST(peek_poke_reglist) { 13687 SETUP(); 13688 START(); 13689 13690 // Acquire all temps from the MacroAssembler. They are used arbitrarily below. 13691 UseScratchRegisterScope temps(&masm); 13692 temps.ExcludeAll(); 13693 13694 // The literal base is chosen to have two useful properties: 13695 // * When multiplied by small values (such as a register index), this value 13696 // is clearly readable in the result. 13697 // * The value is not formed from repeating fixed-size smaller values, so it 13698 // can be used to detect endianness-related errors. 13699 uint64_t base = 0x0100001000100101; 13700 13701 // Initialize the registers. 13702 __ Mov(x1, base); 13703 __ Add(x2, x1, x1); 13704 __ Add(x3, x2, x1); 13705 __ Add(x4, x3, x1); 13706 13707 CPURegList list_1(x1, x2, x3, x4); 13708 CPURegList list_2(x11, x12, x13, x14); 13709 int list_1_size = list_1.GetTotalSizeInBytes(); 13710 13711 __ Claim(2 * list_1_size); 13712 13713 __ PokeCPURegList(list_1, 0); 13714 __ PokeXRegList(list_1.GetList(), list_1_size); 13715 __ PeekCPURegList(list_2, 2 * kXRegSizeInBytes); 13716 __ PeekXRegList(x15.GetBit(), kWRegSizeInBytes); 13717 __ PeekWRegList(w16.GetBit() | w17.GetBit(), 3 * kXRegSizeInBytes); 13718 13719 __ Drop(2 * list_1_size); 13720 13721 13722 uint64_t base_d = 0x1010010001000010; 13723 13724 // Initialize the registers. 13725 __ Mov(x1, base_d); 13726 __ Add(x2, x1, x1); 13727 __ Add(x3, x2, x1); 13728 __ Add(x4, x3, x1); 13729 __ Fmov(d1, x1); 13730 __ Fmov(d2, x2); 13731 __ Fmov(d3, x3); 13732 __ Fmov(d4, x4); 13733 13734 CPURegList list_d_1(d1, d2, d3, d4); 13735 CPURegList list_d_2(d11, d12, d13, d14); 13736 int list_d_1_size = list_d_1.GetTotalSizeInBytes(); 13737 13738 __ Claim(2 * list_d_1_size); 13739 13740 __ PokeCPURegList(list_d_1, 0); 13741 __ PokeDRegList(list_d_1.GetList(), list_d_1_size); 13742 __ PeekCPURegList(list_d_2, 2 * kDRegSizeInBytes); 13743 __ PeekDRegList(d15.GetBit(), kSRegSizeInBytes); 13744 __ PeekSRegList(s16.GetBit() | s17.GetBit(), 3 * kDRegSizeInBytes); 13745 13746 __ Drop(2 * list_d_1_size); 13747 13748 13749 END(); 13750 RUN(); 13751 13752 ASSERT_EQUAL_64(3 * base, x11); 13753 ASSERT_EQUAL_64(4 * base, x12); 13754 ASSERT_EQUAL_64(1 * base, x13); 13755 ASSERT_EQUAL_64(2 * base, x14); 13756 ASSERT_EQUAL_64(((1 * base) >> kWRegSize) | ((2 * base) << kWRegSize), x15); 13757 ASSERT_EQUAL_64(2 * base, x14); 13758 ASSERT_EQUAL_32((4 * base) & kWRegMask, w16); 13759 ASSERT_EQUAL_32((4 * base) >> kWRegSize, w17); 13760 13761 ASSERT_EQUAL_FP64(RawbitsToDouble(3 * base_d), d11); 13762 ASSERT_EQUAL_FP64(RawbitsToDouble(4 * base_d), d12); 13763 ASSERT_EQUAL_FP64(RawbitsToDouble(1 * base_d), d13); 13764 ASSERT_EQUAL_FP64(RawbitsToDouble(2 * base_d), d14); 13765 ASSERT_EQUAL_FP64(RawbitsToDouble((base_d >> kSRegSize) | 13766 ((2 * base_d) << kSRegSize)), 13767 d15); 13768 ASSERT_EQUAL_FP64(RawbitsToDouble(2 * base_d), d14); 13769 ASSERT_EQUAL_FP32(RawbitsToFloat((4 * base_d) & kSRegMask), s16); 13770 ASSERT_EQUAL_FP32(RawbitsToFloat((4 * base_d) >> kSRegSize), s17); 13771 13772 TEARDOWN(); 13773 } 13774 13775 13776 TEST(load_store_reglist) { 13777 SETUP(); 13778 START(); 13779 13780 // The literal base is chosen to have two useful properties: 13781 // * When multiplied by small values (such as a register index), this value 13782 // is clearly readable in the result. 13783 // * The value is not formed from repeating fixed-size smaller values, so it 13784 // can be used to detect endianness-related errors. 13785 uint64_t high_base = UINT32_C(0x01000010); 13786 uint64_t low_base = UINT32_C(0x00100101); 13787 uint64_t base = (high_base << 32) | low_base; 13788 uint64_t array[21]; 13789 memset(array, 0, sizeof(array)); 13790 13791 // Initialize the registers. 13792 __ Mov(x1, base); 13793 __ Add(x2, x1, x1); 13794 __ Add(x3, x2, x1); 13795 __ Add(x4, x3, x1); 13796 __ Fmov(d1, x1); 13797 __ Fmov(d2, x2); 13798 __ Fmov(d3, x3); 13799 __ Fmov(d4, x4); 13800 __ Fmov(d5, x1); 13801 __ Fmov(d6, x2); 13802 __ Fmov(d7, x3); 13803 __ Fmov(d8, x4); 13804 13805 Register reg_base = x20; 13806 Register reg_index = x21; 13807 int size_stored = 0; 13808 13809 __ Mov(reg_base, reinterpret_cast<uintptr_t>(&array)); 13810 13811 // Test aligned accesses. 13812 CPURegList list_src(w1, w2, w3, w4); 13813 CPURegList list_dst(w11, w12, w13, w14); 13814 CPURegList list_fp_src_1(d1, d2, d3, d4); 13815 CPURegList list_fp_dst_1(d11, d12, d13, d14); 13816 13817 __ StoreCPURegList(list_src, MemOperand(reg_base, 0 * sizeof(uint64_t))); 13818 __ LoadCPURegList(list_dst, MemOperand(reg_base, 0 * sizeof(uint64_t))); 13819 size_stored += 4 * kWRegSizeInBytes; 13820 13821 __ Mov(reg_index, size_stored); 13822 __ StoreCPURegList(list_src, MemOperand(reg_base, reg_index)); 13823 __ LoadCPURegList(list_dst, MemOperand(reg_base, reg_index)); 13824 size_stored += 4 * kWRegSizeInBytes; 13825 13826 __ StoreCPURegList(list_fp_src_1, MemOperand(reg_base, size_stored)); 13827 __ LoadCPURegList(list_fp_dst_1, MemOperand(reg_base, size_stored)); 13828 size_stored += 4 * kDRegSizeInBytes; 13829 13830 __ Mov(reg_index, size_stored); 13831 __ StoreCPURegList(list_fp_src_1, MemOperand(reg_base, reg_index)); 13832 __ LoadCPURegList(list_fp_dst_1, MemOperand(reg_base, reg_index)); 13833 size_stored += 4 * kDRegSizeInBytes; 13834 13835 // Test unaligned accesses. 13836 CPURegList list_fp_src_2(d5, d6, d7, d8); 13837 CPURegList list_fp_dst_2(d15, d16, d17, d18); 13838 13839 __ Str(wzr, MemOperand(reg_base, size_stored)); 13840 size_stored += 1 * kWRegSizeInBytes; 13841 __ StoreCPURegList(list_fp_src_2, MemOperand(reg_base, size_stored)); 13842 __ LoadCPURegList(list_fp_dst_2, MemOperand(reg_base, size_stored)); 13843 size_stored += 4 * kDRegSizeInBytes; 13844 13845 __ Mov(reg_index, size_stored); 13846 __ StoreCPURegList(list_fp_src_2, MemOperand(reg_base, reg_index)); 13847 __ LoadCPURegList(list_fp_dst_2, MemOperand(reg_base, reg_index)); 13848 13849 END(); 13850 RUN(); 13851 13852 VIXL_CHECK(array[0] == (1 * low_base) + (2 * low_base << kWRegSize)); 13853 VIXL_CHECK(array[1] == (3 * low_base) + (4 * low_base << kWRegSize)); 13854 VIXL_CHECK(array[2] == (1 * low_base) + (2 * low_base << kWRegSize)); 13855 VIXL_CHECK(array[3] == (3 * low_base) + (4 * low_base << kWRegSize)); 13856 VIXL_CHECK(array[4] == 1 * base); 13857 VIXL_CHECK(array[5] == 2 * base); 13858 VIXL_CHECK(array[6] == 3 * base); 13859 VIXL_CHECK(array[7] == 4 * base); 13860 VIXL_CHECK(array[8] == 1 * base); 13861 VIXL_CHECK(array[9] == 2 * base); 13862 VIXL_CHECK(array[10] == 3 * base); 13863 VIXL_CHECK(array[11] == 4 * base); 13864 VIXL_CHECK(array[12] == ((1 * low_base) << kSRegSize)); 13865 VIXL_CHECK(array[13] == (((2 * low_base) << kSRegSize) | (1 * high_base))); 13866 VIXL_CHECK(array[14] == (((3 * low_base) << kSRegSize) | (2 * high_base))); 13867 VIXL_CHECK(array[15] == (((4 * low_base) << kSRegSize) | (3 * high_base))); 13868 VIXL_CHECK(array[16] == (((1 * low_base) << kSRegSize) | (4 * high_base))); 13869 VIXL_CHECK(array[17] == (((2 * low_base) << kSRegSize) | (1 * high_base))); 13870 VIXL_CHECK(array[18] == (((3 * low_base) << kSRegSize) | (2 * high_base))); 13871 VIXL_CHECK(array[19] == (((4 * low_base) << kSRegSize) | (3 * high_base))); 13872 VIXL_CHECK(array[20] == (4 * high_base)); 13873 13874 ASSERT_EQUAL_64(1 * low_base, x11); 13875 ASSERT_EQUAL_64(2 * low_base, x12); 13876 ASSERT_EQUAL_64(3 * low_base, x13); 13877 ASSERT_EQUAL_64(4 * low_base, x14); 13878 ASSERT_EQUAL_FP64(RawbitsToDouble(1 * base), d11); 13879 ASSERT_EQUAL_FP64(RawbitsToDouble(2 * base), d12); 13880 ASSERT_EQUAL_FP64(RawbitsToDouble(3 * base), d13); 13881 ASSERT_EQUAL_FP64(RawbitsToDouble(4 * base), d14); 13882 ASSERT_EQUAL_FP64(RawbitsToDouble(1 * base), d15); 13883 ASSERT_EQUAL_FP64(RawbitsToDouble(2 * base), d16); 13884 ASSERT_EQUAL_FP64(RawbitsToDouble(3 * base), d17); 13885 ASSERT_EQUAL_FP64(RawbitsToDouble(4 * base), d18); 13886 13887 TEARDOWN(); 13888 } 13889 13890 13891 // This enum is used only as an argument to the push-pop test helpers. 13892 enum PushPopMethod { 13893 // Push or Pop using the Push and Pop methods, with blocks of up to four 13894 // registers. (Smaller blocks will be used if necessary.) 13895 PushPopByFour, 13896 13897 // Use Push<Size>RegList and Pop<Size>RegList to transfer the registers. 13898 PushPopRegList 13899 }; 13900 13901 13902 // The maximum number of registers that can be used by the PushPopXReg* tests, 13903 // where a reg_count field is provided. 13904 static int const kPushPopXRegMaxRegCount = -1; 13905 13906 // Test a simple push-pop pattern: 13907 // * Claim <claim> bytes to set the stack alignment. 13908 // * Push <reg_count> registers with size <reg_size>. 13909 // * Clobber the register contents. 13910 // * Pop <reg_count> registers to restore the original contents. 13911 // * Drop <claim> bytes to restore the original stack pointer. 13912 // 13913 // Different push and pop methods can be specified independently to test for 13914 // proper word-endian behaviour. 13915 static void PushPopXRegSimpleHelper(int reg_count, 13916 int claim, 13917 int reg_size, 13918 PushPopMethod push_method, 13919 PushPopMethod pop_method) { 13920 SETUP(); 13921 13922 START(); 13923 13924 // Arbitrarily pick a register to use as a stack pointer. 13925 const Register& stack_pointer = x20; 13926 const RegList allowed = ~stack_pointer.GetBit(); 13927 if (reg_count == kPushPopXRegMaxRegCount) { 13928 reg_count = CountSetBits(allowed, kNumberOfRegisters); 13929 } 13930 // Work out which registers to use, based on reg_size. 13931 Register r[kNumberOfRegisters]; 13932 Register x[kNumberOfRegisters]; 13933 RegList list = 13934 PopulateRegisterArray(NULL, x, r, reg_size, reg_count, allowed); 13935 13936 // Acquire all temps from the MacroAssembler. They are used arbitrarily below. 13937 UseScratchRegisterScope temps(&masm); 13938 temps.ExcludeAll(); 13939 13940 // The literal base is chosen to have two useful properties: 13941 // * When multiplied by small values (such as a register index), this value 13942 // is clearly readable in the result. 13943 // * The value is not formed from repeating fixed-size smaller values, so it 13944 // can be used to detect endianness-related errors. 13945 uint64_t literal_base = 0x0100001000100101; 13946 13947 { 13948 VIXL_ASSERT(__ StackPointer().Is(sp)); 13949 __ Mov(stack_pointer, __ StackPointer()); 13950 __ SetStackPointer(stack_pointer); 13951 13952 int i; 13953 13954 // Initialize the registers. 13955 for (i = 0; i < reg_count; i++) { 13956 // Always write into the X register, to ensure that the upper word is 13957 // properly ignored by Push when testing W registers. 13958 __ Mov(x[i], literal_base * i); 13959 } 13960 13961 // Claim memory first, as requested. 13962 __ Claim(claim); 13963 13964 switch (push_method) { 13965 case PushPopByFour: 13966 // Push high-numbered registers first (to the highest addresses). 13967 for (i = reg_count; i >= 4; i -= 4) { 13968 __ Push(r[i - 1], r[i - 2], r[i - 3], r[i - 4]); 13969 } 13970 // Finish off the leftovers. 13971 switch (i) { 13972 case 3: 13973 __ Push(r[2], r[1], r[0]); 13974 break; 13975 case 2: 13976 __ Push(r[1], r[0]); 13977 break; 13978 case 1: 13979 __ Push(r[0]); 13980 break; 13981 default: 13982 VIXL_ASSERT(i == 0); 13983 break; 13984 } 13985 break; 13986 case PushPopRegList: 13987 __ PushSizeRegList(list, reg_size); 13988 break; 13989 } 13990 13991 // Clobber all the registers, to ensure that they get repopulated by Pop. 13992 Clobber(&masm, list); 13993 13994 switch (pop_method) { 13995 case PushPopByFour: 13996 // Pop low-numbered registers first (from the lowest addresses). 13997 for (i = 0; i <= (reg_count - 4); i += 4) { 13998 __ Pop(r[i], r[i + 1], r[i + 2], r[i + 3]); 13999 } 14000 // Finish off the leftovers. 14001 switch (reg_count - i) { 14002 case 3: 14003 __ Pop(r[i], r[i + 1], r[i + 2]); 14004 break; 14005 case 2: 14006 __ Pop(r[i], r[i + 1]); 14007 break; 14008 case 1: 14009 __ Pop(r[i]); 14010 break; 14011 default: 14012 VIXL_ASSERT(i == reg_count); 14013 break; 14014 } 14015 break; 14016 case PushPopRegList: 14017 __ PopSizeRegList(list, reg_size); 14018 break; 14019 } 14020 14021 // Drop memory to restore stack_pointer. 14022 __ Drop(claim); 14023 14024 __ Mov(sp, __ StackPointer()); 14025 __ SetStackPointer(sp); 14026 } 14027 14028 END(); 14029 14030 RUN(); 14031 14032 // Check that the register contents were preserved. 14033 // Always use ASSERT_EQUAL_64, even when testing W registers, so we can test 14034 // that the upper word was properly cleared by Pop. 14035 literal_base &= (0xffffffffffffffff >> (64 - reg_size)); 14036 for (int i = 0; i < reg_count; i++) { 14037 if (x[i].Is(xzr)) { 14038 ASSERT_EQUAL_64(0, x[i]); 14039 } else { 14040 ASSERT_EQUAL_64(literal_base * i, x[i]); 14041 } 14042 } 14043 14044 TEARDOWN(); 14045 } 14046 14047 14048 TEST(push_pop_xreg_simple_32) { 14049 for (int claim = 0; claim <= 8; claim++) { 14050 for (int count = 0; count <= 8; count++) { 14051 PushPopXRegSimpleHelper(count, 14052 claim, 14053 kWRegSize, 14054 PushPopByFour, 14055 PushPopByFour); 14056 PushPopXRegSimpleHelper(count, 14057 claim, 14058 kWRegSize, 14059 PushPopByFour, 14060 PushPopRegList); 14061 PushPopXRegSimpleHelper(count, 14062 claim, 14063 kWRegSize, 14064 PushPopRegList, 14065 PushPopByFour); 14066 PushPopXRegSimpleHelper(count, 14067 claim, 14068 kWRegSize, 14069 PushPopRegList, 14070 PushPopRegList); 14071 } 14072 // Test with the maximum number of registers. 14073 PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount, 14074 claim, 14075 kWRegSize, 14076 PushPopByFour, 14077 PushPopByFour); 14078 PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount, 14079 claim, 14080 kWRegSize, 14081 PushPopByFour, 14082 PushPopRegList); 14083 PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount, 14084 claim, 14085 kWRegSize, 14086 PushPopRegList, 14087 PushPopByFour); 14088 PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount, 14089 claim, 14090 kWRegSize, 14091 PushPopRegList, 14092 PushPopRegList); 14093 } 14094 } 14095 14096 14097 TEST(push_pop_xreg_simple_64) { 14098 for (int claim = 0; claim <= 8; claim++) { 14099 for (int count = 0; count <= 8; count++) { 14100 PushPopXRegSimpleHelper(count, 14101 claim, 14102 kXRegSize, 14103 PushPopByFour, 14104 PushPopByFour); 14105 PushPopXRegSimpleHelper(count, 14106 claim, 14107 kXRegSize, 14108 PushPopByFour, 14109 PushPopRegList); 14110 PushPopXRegSimpleHelper(count, 14111 claim, 14112 kXRegSize, 14113 PushPopRegList, 14114 PushPopByFour); 14115 PushPopXRegSimpleHelper(count, 14116 claim, 14117 kXRegSize, 14118 PushPopRegList, 14119 PushPopRegList); 14120 } 14121 // Test with the maximum number of registers. 14122 PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount, 14123 claim, 14124 kXRegSize, 14125 PushPopByFour, 14126 PushPopByFour); 14127 PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount, 14128 claim, 14129 kXRegSize, 14130 PushPopByFour, 14131 PushPopRegList); 14132 PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount, 14133 claim, 14134 kXRegSize, 14135 PushPopRegList, 14136 PushPopByFour); 14137 PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount, 14138 claim, 14139 kXRegSize, 14140 PushPopRegList, 14141 PushPopRegList); 14142 } 14143 } 14144 14145 14146 // The maximum number of registers that can be used by the PushPopFPXReg* tests, 14147 // where a reg_count field is provided. 14148 static int const kPushPopFPXRegMaxRegCount = -1; 14149 14150 // Test a simple push-pop pattern: 14151 // * Claim <claim> bytes to set the stack alignment. 14152 // * Push <reg_count> FP registers with size <reg_size>. 14153 // * Clobber the register contents. 14154 // * Pop <reg_count> FP registers to restore the original contents. 14155 // * Drop <claim> bytes to restore the original stack pointer. 14156 // 14157 // Different push and pop methods can be specified independently to test for 14158 // proper word-endian behaviour. 14159 static void PushPopFPXRegSimpleHelper(int reg_count, 14160 int claim, 14161 int reg_size, 14162 PushPopMethod push_method, 14163 PushPopMethod pop_method) { 14164 SETUP(); 14165 14166 START(); 14167 14168 // We can use any floating-point register. None of them are reserved for 14169 // debug code, for example. 14170 static RegList const allowed = ~0; 14171 if (reg_count == kPushPopFPXRegMaxRegCount) { 14172 reg_count = CountSetBits(allowed, kNumberOfFPRegisters); 14173 } 14174 // Work out which registers to use, based on reg_size. 14175 FPRegister v[kNumberOfRegisters]; 14176 FPRegister d[kNumberOfRegisters]; 14177 RegList list = 14178 PopulateFPRegisterArray(NULL, d, v, reg_size, reg_count, allowed); 14179 14180 // Arbitrarily pick a register to use as a stack pointer. 14181 const Register& stack_pointer = x10; 14182 14183 // Acquire all temps from the MacroAssembler. They are used arbitrarily below. 14184 UseScratchRegisterScope temps(&masm); 14185 temps.ExcludeAll(); 14186 14187 // The literal base is chosen to have two useful properties: 14188 // * When multiplied (using an integer) by small values (such as a register 14189 // index), this value is clearly readable in the result. 14190 // * The value is not formed from repeating fixed-size smaller values, so it 14191 // can be used to detect endianness-related errors. 14192 // * It is never a floating-point NaN, and will therefore always compare 14193 // equal to itself. 14194 uint64_t literal_base = 0x0100001000100101; 14195 14196 { 14197 VIXL_ASSERT(__ StackPointer().Is(sp)); 14198 __ Mov(stack_pointer, __ StackPointer()); 14199 __ SetStackPointer(stack_pointer); 14200 14201 int i; 14202 14203 // Initialize the registers, using X registers to load the literal. 14204 __ Mov(x0, 0); 14205 __ Mov(x1, literal_base); 14206 for (i = 0; i < reg_count; i++) { 14207 // Always write into the D register, to ensure that the upper word is 14208 // properly ignored by Push when testing S registers. 14209 __ Fmov(d[i], x0); 14210 // Calculate the next literal. 14211 __ Add(x0, x0, x1); 14212 } 14213 14214 // Claim memory first, as requested. 14215 __ Claim(claim); 14216 14217 switch (push_method) { 14218 case PushPopByFour: 14219 // Push high-numbered registers first (to the highest addresses). 14220 for (i = reg_count; i >= 4; i -= 4) { 14221 __ Push(v[i - 1], v[i - 2], v[i - 3], v[i - 4]); 14222 } 14223 // Finish off the leftovers. 14224 switch (i) { 14225 case 3: 14226 __ Push(v[2], v[1], v[0]); 14227 break; 14228 case 2: 14229 __ Push(v[1], v[0]); 14230 break; 14231 case 1: 14232 __ Push(v[0]); 14233 break; 14234 default: 14235 VIXL_ASSERT(i == 0); 14236 break; 14237 } 14238 break; 14239 case PushPopRegList: 14240 __ PushSizeRegList(list, reg_size, CPURegister::kVRegister); 14241 break; 14242 } 14243 14244 // Clobber all the registers, to ensure that they get repopulated by Pop. 14245 ClobberFP(&masm, list); 14246 14247 switch (pop_method) { 14248 case PushPopByFour: 14249 // Pop low-numbered registers first (from the lowest addresses). 14250 for (i = 0; i <= (reg_count - 4); i += 4) { 14251 __ Pop(v[i], v[i + 1], v[i + 2], v[i + 3]); 14252 } 14253 // Finish off the leftovers. 14254 switch (reg_count - i) { 14255 case 3: 14256 __ Pop(v[i], v[i + 1], v[i + 2]); 14257 break; 14258 case 2: 14259 __ Pop(v[i], v[i + 1]); 14260 break; 14261 case 1: 14262 __ Pop(v[i]); 14263 break; 14264 default: 14265 VIXL_ASSERT(i == reg_count); 14266 break; 14267 } 14268 break; 14269 case PushPopRegList: 14270 __ PopSizeRegList(list, reg_size, CPURegister::kVRegister); 14271 break; 14272 } 14273 14274 // Drop memory to restore the stack pointer. 14275 __ Drop(claim); 14276 14277 __ Mov(sp, __ StackPointer()); 14278 __ SetStackPointer(sp); 14279 } 14280 14281 END(); 14282 14283 RUN(); 14284 14285 // Check that the register contents were preserved. 14286 // Always use ASSERT_EQUAL_FP64, even when testing S registers, so we can 14287 // test that the upper word was properly cleared by Pop. 14288 literal_base &= (0xffffffffffffffff >> (64 - reg_size)); 14289 for (int i = 0; i < reg_count; i++) { 14290 uint64_t literal = literal_base * i; 14291 double expected; 14292 memcpy(&expected, &literal, sizeof(expected)); 14293 ASSERT_EQUAL_FP64(expected, d[i]); 14294 } 14295 14296 TEARDOWN(); 14297 } 14298 14299 14300 TEST(push_pop_fp_xreg_simple_32) { 14301 for (int claim = 0; claim <= 8; claim++) { 14302 for (int count = 0; count <= 8; count++) { 14303 PushPopFPXRegSimpleHelper(count, 14304 claim, 14305 kSRegSize, 14306 PushPopByFour, 14307 PushPopByFour); 14308 PushPopFPXRegSimpleHelper(count, 14309 claim, 14310 kSRegSize, 14311 PushPopByFour, 14312 PushPopRegList); 14313 PushPopFPXRegSimpleHelper(count, 14314 claim, 14315 kSRegSize, 14316 PushPopRegList, 14317 PushPopByFour); 14318 PushPopFPXRegSimpleHelper(count, 14319 claim, 14320 kSRegSize, 14321 PushPopRegList, 14322 PushPopRegList); 14323 } 14324 // Test with the maximum number of registers. 14325 PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, 14326 claim, 14327 kSRegSize, 14328 PushPopByFour, 14329 PushPopByFour); 14330 PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, 14331 claim, 14332 kSRegSize, 14333 PushPopByFour, 14334 PushPopRegList); 14335 PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, 14336 claim, 14337 kSRegSize, 14338 PushPopRegList, 14339 PushPopByFour); 14340 PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, 14341 claim, 14342 kSRegSize, 14343 PushPopRegList, 14344 PushPopRegList); 14345 } 14346 } 14347 14348 14349 TEST(push_pop_fp_xreg_simple_64) { 14350 for (int claim = 0; claim <= 8; claim++) { 14351 for (int count = 0; count <= 8; count++) { 14352 PushPopFPXRegSimpleHelper(count, 14353 claim, 14354 kDRegSize, 14355 PushPopByFour, 14356 PushPopByFour); 14357 PushPopFPXRegSimpleHelper(count, 14358 claim, 14359 kDRegSize, 14360 PushPopByFour, 14361 PushPopRegList); 14362 PushPopFPXRegSimpleHelper(count, 14363 claim, 14364 kDRegSize, 14365 PushPopRegList, 14366 PushPopByFour); 14367 PushPopFPXRegSimpleHelper(count, 14368 claim, 14369 kDRegSize, 14370 PushPopRegList, 14371 PushPopRegList); 14372 } 14373 // Test with the maximum number of registers. 14374 PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, 14375 claim, 14376 kDRegSize, 14377 PushPopByFour, 14378 PushPopByFour); 14379 PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, 14380 claim, 14381 kDRegSize, 14382 PushPopByFour, 14383 PushPopRegList); 14384 PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, 14385 claim, 14386 kDRegSize, 14387 PushPopRegList, 14388 PushPopByFour); 14389 PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, 14390 claim, 14391 kDRegSize, 14392 PushPopRegList, 14393 PushPopRegList); 14394 } 14395 } 14396 14397 14398 // Push and pop data using an overlapping combination of Push/Pop and 14399 // RegList-based methods. 14400 static void PushPopXRegMixedMethodsHelper(int claim, int reg_size) { 14401 SETUP(); 14402 14403 // Arbitrarily pick a register to use as a stack pointer. 14404 const Register& stack_pointer = x5; 14405 const RegList allowed = ~stack_pointer.GetBit(); 14406 // Work out which registers to use, based on reg_size. 14407 Register r[10]; 14408 Register x[10]; 14409 PopulateRegisterArray(NULL, x, r, reg_size, 10, allowed); 14410 14411 // Calculate some handy register lists. 14412 RegList r0_to_r3 = 0; 14413 for (int i = 0; i <= 3; i++) { 14414 r0_to_r3 |= x[i].GetBit(); 14415 } 14416 RegList r4_to_r5 = 0; 14417 for (int i = 4; i <= 5; i++) { 14418 r4_to_r5 |= x[i].GetBit(); 14419 } 14420 RegList r6_to_r9 = 0; 14421 for (int i = 6; i <= 9; i++) { 14422 r6_to_r9 |= x[i].GetBit(); 14423 } 14424 14425 // Acquire all temps from the MacroAssembler. They are used arbitrarily below. 14426 UseScratchRegisterScope temps(&masm); 14427 temps.ExcludeAll(); 14428 14429 // The literal base is chosen to have two useful properties: 14430 // * When multiplied by small values (such as a register index), this value 14431 // is clearly readable in the result. 14432 // * The value is not formed from repeating fixed-size smaller values, so it 14433 // can be used to detect endianness-related errors. 14434 uint64_t literal_base = 0x0100001000100101; 14435 14436 START(); 14437 { 14438 VIXL_ASSERT(__ StackPointer().Is(sp)); 14439 __ Mov(stack_pointer, __ StackPointer()); 14440 __ SetStackPointer(stack_pointer); 14441 14442 // Claim memory first, as requested. 14443 __ Claim(claim); 14444 14445 __ Mov(x[3], literal_base * 3); 14446 __ Mov(x[2], literal_base * 2); 14447 __ Mov(x[1], literal_base * 1); 14448 __ Mov(x[0], literal_base * 0); 14449 14450 __ PushSizeRegList(r0_to_r3, reg_size); 14451 __ Push(r[3], r[2]); 14452 14453 Clobber(&masm, r0_to_r3); 14454 __ PopSizeRegList(r0_to_r3, reg_size); 14455 14456 __ Push(r[2], r[1], r[3], r[0]); 14457 14458 Clobber(&masm, r4_to_r5); 14459 __ Pop(r[4], r[5]); 14460 Clobber(&masm, r6_to_r9); 14461 __ Pop(r[6], r[7], r[8], r[9]); 14462 14463 // Drop memory to restore stack_pointer. 14464 __ Drop(claim); 14465 14466 __ Mov(sp, __ StackPointer()); 14467 __ SetStackPointer(sp); 14468 } 14469 14470 END(); 14471 14472 RUN(); 14473 14474 // Always use ASSERT_EQUAL_64, even when testing W registers, so we can test 14475 // that the upper word was properly cleared by Pop. 14476 literal_base &= (0xffffffffffffffff >> (64 - reg_size)); 14477 14478 ASSERT_EQUAL_64(literal_base * 3, x[9]); 14479 ASSERT_EQUAL_64(literal_base * 2, x[8]); 14480 ASSERT_EQUAL_64(literal_base * 0, x[7]); 14481 ASSERT_EQUAL_64(literal_base * 3, x[6]); 14482 ASSERT_EQUAL_64(literal_base * 1, x[5]); 14483 ASSERT_EQUAL_64(literal_base * 2, x[4]); 14484 14485 TEARDOWN(); 14486 } 14487 14488 14489 TEST(push_pop_xreg_mixed_methods_64) { 14490 for (int claim = 0; claim <= 8; claim++) { 14491 PushPopXRegMixedMethodsHelper(claim, kXRegSize); 14492 } 14493 } 14494 14495 14496 TEST(push_pop_xreg_mixed_methods_32) { 14497 for (int claim = 0; claim <= 8; claim++) { 14498 PushPopXRegMixedMethodsHelper(claim, kWRegSize); 14499 } 14500 } 14501 14502 14503 // Push and pop data using overlapping X- and W-sized quantities. 14504 static void PushPopXRegWXOverlapHelper(int reg_count, int claim) { 14505 SETUP(); 14506 14507 // Arbitrarily pick a register to use as a stack pointer. 14508 const Register& stack_pointer = x10; 14509 const RegList allowed = ~stack_pointer.GetBit(); 14510 if (reg_count == kPushPopXRegMaxRegCount) { 14511 reg_count = CountSetBits(allowed, kNumberOfRegisters); 14512 } 14513 // Work out which registers to use, based on reg_size. 14514 Register w[kNumberOfRegisters]; 14515 Register x[kNumberOfRegisters]; 14516 RegList list = PopulateRegisterArray(w, x, NULL, 0, reg_count, allowed); 14517 14518 // The number of W-sized slots we expect to pop. When we pop, we alternate 14519 // between W and X registers, so we need reg_count*1.5 W-sized slots. 14520 int const requested_w_slots = reg_count + reg_count / 2; 14521 14522 // Track what _should_ be on the stack, using W-sized slots. 14523 static int const kMaxWSlots = kNumberOfRegisters + kNumberOfRegisters / 2; 14524 uint32_t stack[kMaxWSlots]; 14525 for (int i = 0; i < kMaxWSlots; i++) { 14526 stack[i] = 0xdeadbeef; 14527 } 14528 14529 // Acquire all temps from the MacroAssembler. They are used arbitrarily below. 14530 UseScratchRegisterScope temps(&masm); 14531 temps.ExcludeAll(); 14532 14533 // The literal base is chosen to have two useful properties: 14534 // * When multiplied by small values (such as a register index), this value 14535 // is clearly readable in the result. 14536 // * The value is not formed from repeating fixed-size smaller values, so it 14537 // can be used to detect endianness-related errors. 14538 static uint64_t const literal_base = 0x0100001000100101; 14539 static uint64_t const literal_base_hi = literal_base >> 32; 14540 static uint64_t const literal_base_lo = literal_base & 0xffffffff; 14541 static uint64_t const literal_base_w = literal_base & 0xffffffff; 14542 14543 START(); 14544 { 14545 VIXL_ASSERT(__ StackPointer().Is(sp)); 14546 __ Mov(stack_pointer, __ StackPointer()); 14547 __ SetStackPointer(stack_pointer); 14548 14549 // Initialize the registers. 14550 for (int i = 0; i < reg_count; i++) { 14551 // Always write into the X register, to ensure that the upper word is 14552 // properly ignored by Push when testing W registers. 14553 __ Mov(x[i], literal_base * i); 14554 } 14555 14556 // Claim memory first, as requested. 14557 __ Claim(claim); 14558 14559 // The push-pop pattern is as follows: 14560 // Push: Pop: 14561 // x[0](hi) -> w[0] 14562 // x[0](lo) -> x[1](hi) 14563 // w[1] -> x[1](lo) 14564 // w[1] -> w[2] 14565 // x[2](hi) -> x[2](hi) 14566 // x[2](lo) -> x[2](lo) 14567 // x[2](hi) -> w[3] 14568 // x[2](lo) -> x[4](hi) 14569 // x[2](hi) -> x[4](lo) 14570 // x[2](lo) -> w[5] 14571 // w[3] -> x[5](hi) 14572 // w[3] -> x[6](lo) 14573 // w[3] -> w[7] 14574 // w[3] -> x[8](hi) 14575 // x[4](hi) -> x[8](lo) 14576 // x[4](lo) -> w[9] 14577 // ... pattern continues ... 14578 // 14579 // That is, registers are pushed starting with the lower numbers, 14580 // alternating between x and w registers, and pushing i%4+1 copies of each, 14581 // where i is the register number. 14582 // Registers are popped starting with the higher numbers one-by-one, 14583 // alternating between x and w registers, but only popping one at a time. 14584 // 14585 // This pattern provides a wide variety of alignment effects and overlaps. 14586 14587 // ---- Push ---- 14588 14589 int active_w_slots = 0; 14590 for (int i = 0; active_w_slots < requested_w_slots; i++) { 14591 VIXL_ASSERT(i < reg_count); 14592 // In order to test various arguments to PushMultipleTimes, and to try to 14593 // exercise different alignment and overlap effects, we push each 14594 // register a different number of times. 14595 int times = i % 4 + 1; 14596 if (i & 1) { 14597 // Push odd-numbered registers as W registers. 14598 __ PushMultipleTimes(times, w[i]); 14599 // Fill in the expected stack slots. 14600 for (int j = 0; j < times; j++) { 14601 if (w[i].Is(wzr)) { 14602 // The zero register always writes zeroes. 14603 stack[active_w_slots++] = 0; 14604 } else { 14605 stack[active_w_slots++] = literal_base_w * i; 14606 } 14607 } 14608 } else { 14609 // Push even-numbered registers as X registers. 14610 __ PushMultipleTimes(times, x[i]); 14611 // Fill in the expected stack slots. 14612 for (int j = 0; j < times; j++) { 14613 if (x[i].Is(xzr)) { 14614 // The zero register always writes zeroes. 14615 stack[active_w_slots++] = 0; 14616 stack[active_w_slots++] = 0; 14617 } else { 14618 stack[active_w_slots++] = literal_base_hi * i; 14619 stack[active_w_slots++] = literal_base_lo * i; 14620 } 14621 } 14622 } 14623 } 14624 // Because we were pushing several registers at a time, we probably pushed 14625 // more than we needed to. 14626 if (active_w_slots > requested_w_slots) { 14627 __ Drop((active_w_slots - requested_w_slots) * kWRegSizeInBytes); 14628 // Bump the number of active W-sized slots back to where it should be, 14629 // and fill the empty space with a dummy value. 14630 do { 14631 stack[active_w_slots--] = 0xdeadbeef; 14632 } while (active_w_slots > requested_w_slots); 14633 } 14634 14635 // ---- Pop ---- 14636 14637 Clobber(&masm, list); 14638 14639 // If popping an even number of registers, the first one will be X-sized. 14640 // Otherwise, the first one will be W-sized. 14641 bool next_is_64 = !(reg_count & 1); 14642 for (int i = reg_count - 1; i >= 0; i--) { 14643 if (next_is_64) { 14644 __ Pop(x[i]); 14645 active_w_slots -= 2; 14646 } else { 14647 __ Pop(w[i]); 14648 active_w_slots -= 1; 14649 } 14650 next_is_64 = !next_is_64; 14651 } 14652 VIXL_ASSERT(active_w_slots == 0); 14653 14654 // Drop memory to restore stack_pointer. 14655 __ Drop(claim); 14656 14657 __ Mov(sp, __ StackPointer()); 14658 __ SetStackPointer(sp); 14659 } 14660 14661 END(); 14662 14663 RUN(); 14664 14665 int slot = 0; 14666 for (int i = 0; i < reg_count; i++) { 14667 // Even-numbered registers were written as W registers. 14668 // Odd-numbered registers were written as X registers. 14669 bool expect_64 = (i & 1); 14670 uint64_t expected; 14671 14672 if (expect_64) { 14673 uint64_t hi = stack[slot++]; 14674 uint64_t lo = stack[slot++]; 14675 expected = (hi << 32) | lo; 14676 } else { 14677 expected = stack[slot++]; 14678 } 14679 14680 // Always use ASSERT_EQUAL_64, even when testing W registers, so we can 14681 // test that the upper word was properly cleared by Pop. 14682 if (x[i].Is(xzr)) { 14683 ASSERT_EQUAL_64(0, x[i]); 14684 } else { 14685 ASSERT_EQUAL_64(expected, x[i]); 14686 } 14687 } 14688 VIXL_ASSERT(slot == requested_w_slots); 14689 14690 TEARDOWN(); 14691 } 14692 14693 14694 TEST(push_pop_xreg_wx_overlap) { 14695 for (int claim = 0; claim <= 8; claim++) { 14696 for (int count = 1; count <= 8; count++) { 14697 PushPopXRegWXOverlapHelper(count, claim); 14698 } 14699 // Test with the maximum number of registers. 14700 PushPopXRegWXOverlapHelper(kPushPopXRegMaxRegCount, claim); 14701 } 14702 } 14703 14704 14705 TEST(push_pop_sp) { 14706 SETUP(); 14707 14708 START(); 14709 14710 VIXL_ASSERT(sp.Is(__ StackPointer())); 14711 14712 // Acquire all temps from the MacroAssembler. They are used arbitrarily below. 14713 UseScratchRegisterScope temps(&masm); 14714 temps.ExcludeAll(); 14715 14716 __ Mov(x3, 0x3333333333333333); 14717 __ Mov(x2, 0x2222222222222222); 14718 __ Mov(x1, 0x1111111111111111); 14719 __ Mov(x0, 0x0000000000000000); 14720 __ Claim(2 * kXRegSizeInBytes); 14721 __ PushXRegList(x0.GetBit() | x1.GetBit() | x2.GetBit() | x3.GetBit()); 14722 __ Push(x3, x2); 14723 __ PopXRegList(x0.GetBit() | x1.GetBit() | x2.GetBit() | x3.GetBit()); 14724 __ Push(x2, x1, x3, x0); 14725 __ Pop(x4, x5); 14726 __ Pop(x6, x7, x8, x9); 14727 14728 __ Claim(2 * kXRegSizeInBytes); 14729 __ PushWRegList(w0.GetBit() | w1.GetBit() | w2.GetBit() | w3.GetBit()); 14730 __ Push(w3, w1, w2, w0); 14731 __ PopWRegList(w10.GetBit() | w11.GetBit() | w12.GetBit() | w13.GetBit()); 14732 __ Pop(w14, w15, w16, w17); 14733 14734 __ Claim(2 * kXRegSizeInBytes); 14735 __ Push(w2, w2, w1, w1); 14736 __ Push(x3, x3); 14737 __ Pop(w18, w19, w20, w21); 14738 __ Pop(x22, x23); 14739 14740 __ Claim(2 * kXRegSizeInBytes); 14741 __ PushXRegList(x1.GetBit() | x22.GetBit()); 14742 __ PopXRegList(x24.GetBit() | x26.GetBit()); 14743 14744 __ Claim(2 * kXRegSizeInBytes); 14745 __ PushWRegList(w1.GetBit() | w2.GetBit() | w4.GetBit() | w22.GetBit()); 14746 __ PopWRegList(w25.GetBit() | w27.GetBit() | w28.GetBit() | w29.GetBit()); 14747 14748 __ Claim(2 * kXRegSizeInBytes); 14749 __ PushXRegList(0); 14750 __ PopXRegList(0); 14751 __ PushXRegList(0xffffffff); 14752 __ PopXRegList(0xffffffff); 14753 __ Drop(12 * kXRegSizeInBytes); 14754 END(); 14755 14756 RUN(); 14757 14758 ASSERT_EQUAL_64(0x1111111111111111, x3); 14759 ASSERT_EQUAL_64(0x0000000000000000, x2); 14760 ASSERT_EQUAL_64(0x3333333333333333, x1); 14761 ASSERT_EQUAL_64(0x2222222222222222, x0); 14762 ASSERT_EQUAL_64(0x3333333333333333, x9); 14763 ASSERT_EQUAL_64(0x2222222222222222, x8); 14764 ASSERT_EQUAL_64(0x0000000000000000, x7); 14765 ASSERT_EQUAL_64(0x3333333333333333, x6); 14766 ASSERT_EQUAL_64(0x1111111111111111, x5); 14767 ASSERT_EQUAL_64(0x2222222222222222, x4); 14768 14769 ASSERT_EQUAL_32(0x11111111U, w13); 14770 ASSERT_EQUAL_32(0x33333333U, w12); 14771 ASSERT_EQUAL_32(0x00000000U, w11); 14772 ASSERT_EQUAL_32(0x22222222U, w10); 14773 ASSERT_EQUAL_32(0x11111111U, w17); 14774 ASSERT_EQUAL_32(0x00000000U, w16); 14775 ASSERT_EQUAL_32(0x33333333U, w15); 14776 ASSERT_EQUAL_32(0x22222222U, w14); 14777 14778 ASSERT_EQUAL_32(0x11111111U, w18); 14779 ASSERT_EQUAL_32(0x11111111U, w19); 14780 ASSERT_EQUAL_32(0x11111111U, w20); 14781 ASSERT_EQUAL_32(0x11111111U, w21); 14782 ASSERT_EQUAL_64(0x3333333333333333, x22); 14783 ASSERT_EQUAL_64(0x0000000000000000, x23); 14784 14785 ASSERT_EQUAL_64(0x3333333333333333, x24); 14786 ASSERT_EQUAL_64(0x3333333333333333, x26); 14787 14788 ASSERT_EQUAL_32(0x33333333U, w25); 14789 ASSERT_EQUAL_32(0x00000000U, w27); 14790 ASSERT_EQUAL_32(0x22222222U, w28); 14791 ASSERT_EQUAL_32(0x33333333U, w29); 14792 TEARDOWN(); 14793 } 14794 14795 14796 TEST(noreg) { 14797 // This test doesn't generate any code, but it verifies some invariants 14798 // related to NoReg. 14799 VIXL_CHECK(NoReg.Is(NoFPReg)); 14800 VIXL_CHECK(NoFPReg.Is(NoReg)); 14801 14802 VIXL_CHECK(NoVReg.Is(NoReg)); 14803 VIXL_CHECK(NoReg.Is(NoVReg)); 14804 14805 VIXL_CHECK(NoReg.Is(NoCPUReg)); 14806 VIXL_CHECK(NoCPUReg.Is(NoReg)); 14807 14808 VIXL_CHECK(NoFPReg.Is(NoCPUReg)); 14809 VIXL_CHECK(NoCPUReg.Is(NoFPReg)); 14810 14811 VIXL_CHECK(NoVReg.Is(NoCPUReg)); 14812 VIXL_CHECK(NoCPUReg.Is(NoVReg)); 14813 14814 VIXL_CHECK(NoReg.IsNone()); 14815 VIXL_CHECK(NoFPReg.IsNone()); 14816 VIXL_CHECK(NoVReg.IsNone()); 14817 VIXL_CHECK(NoCPUReg.IsNone()); 14818 } 14819 14820 14821 TEST(isvalid) { 14822 // This test doesn't generate any code, but it verifies some invariants 14823 // related to IsValid(). 14824 VIXL_CHECK(!NoReg.IsValid()); 14825 VIXL_CHECK(!NoFPReg.IsValid()); 14826 VIXL_CHECK(!NoVReg.IsValid()); 14827 VIXL_CHECK(!NoCPUReg.IsValid()); 14828 14829 VIXL_CHECK(x0.IsValid()); 14830 VIXL_CHECK(w0.IsValid()); 14831 VIXL_CHECK(x30.IsValid()); 14832 VIXL_CHECK(w30.IsValid()); 14833 VIXL_CHECK(xzr.IsValid()); 14834 VIXL_CHECK(wzr.IsValid()); 14835 14836 VIXL_CHECK(sp.IsValid()); 14837 VIXL_CHECK(wsp.IsValid()); 14838 14839 VIXL_CHECK(d0.IsValid()); 14840 VIXL_CHECK(s0.IsValid()); 14841 VIXL_CHECK(d31.IsValid()); 14842 VIXL_CHECK(s31.IsValid()); 14843 14844 VIXL_CHECK(x0.IsValidRegister()); 14845 VIXL_CHECK(w0.IsValidRegister()); 14846 VIXL_CHECK(xzr.IsValidRegister()); 14847 VIXL_CHECK(wzr.IsValidRegister()); 14848 VIXL_CHECK(sp.IsValidRegister()); 14849 VIXL_CHECK(wsp.IsValidRegister()); 14850 VIXL_CHECK(!x0.IsValidFPRegister()); 14851 VIXL_CHECK(!w0.IsValidFPRegister()); 14852 VIXL_CHECK(!xzr.IsValidFPRegister()); 14853 VIXL_CHECK(!wzr.IsValidFPRegister()); 14854 VIXL_CHECK(!sp.IsValidFPRegister()); 14855 VIXL_CHECK(!wsp.IsValidFPRegister()); 14856 14857 VIXL_CHECK(d0.IsValidFPRegister()); 14858 VIXL_CHECK(s0.IsValidFPRegister()); 14859 VIXL_CHECK(!d0.IsValidRegister()); 14860 VIXL_CHECK(!s0.IsValidRegister()); 14861 14862 // Test the same as before, but using CPURegister types. This shouldn't make 14863 // any difference. 14864 VIXL_CHECK(static_cast<CPURegister>(x0).IsValid()); 14865 VIXL_CHECK(static_cast<CPURegister>(w0).IsValid()); 14866 VIXL_CHECK(static_cast<CPURegister>(x30).IsValid()); 14867 VIXL_CHECK(static_cast<CPURegister>(w30).IsValid()); 14868 VIXL_CHECK(static_cast<CPURegister>(xzr).IsValid()); 14869 VIXL_CHECK(static_cast<CPURegister>(wzr).IsValid()); 14870 14871 VIXL_CHECK(static_cast<CPURegister>(sp).IsValid()); 14872 VIXL_CHECK(static_cast<CPURegister>(wsp).IsValid()); 14873 14874 VIXL_CHECK(static_cast<CPURegister>(d0).IsValid()); 14875 VIXL_CHECK(static_cast<CPURegister>(s0).IsValid()); 14876 VIXL_CHECK(static_cast<CPURegister>(d31).IsValid()); 14877 VIXL_CHECK(static_cast<CPURegister>(s31).IsValid()); 14878 14879 VIXL_CHECK(static_cast<CPURegister>(x0).IsValidRegister()); 14880 VIXL_CHECK(static_cast<CPURegister>(w0).IsValidRegister()); 14881 VIXL_CHECK(static_cast<CPURegister>(xzr).IsValidRegister()); 14882 VIXL_CHECK(static_cast<CPURegister>(wzr).IsValidRegister()); 14883 VIXL_CHECK(static_cast<CPURegister>(sp).IsValidRegister()); 14884 VIXL_CHECK(static_cast<CPURegister>(wsp).IsValidRegister()); 14885 VIXL_CHECK(!static_cast<CPURegister>(x0).IsValidFPRegister()); 14886 VIXL_CHECK(!static_cast<CPURegister>(w0).IsValidFPRegister()); 14887 VIXL_CHECK(!static_cast<CPURegister>(xzr).IsValidFPRegister()); 14888 VIXL_CHECK(!static_cast<CPURegister>(wzr).IsValidFPRegister()); 14889 VIXL_CHECK(!static_cast<CPURegister>(sp).IsValidFPRegister()); 14890 VIXL_CHECK(!static_cast<CPURegister>(wsp).IsValidFPRegister()); 14891 14892 VIXL_CHECK(static_cast<CPURegister>(d0).IsValidFPRegister()); 14893 VIXL_CHECK(static_cast<CPURegister>(s0).IsValidFPRegister()); 14894 VIXL_CHECK(!static_cast<CPURegister>(d0).IsValidRegister()); 14895 VIXL_CHECK(!static_cast<CPURegister>(s0).IsValidRegister()); 14896 } 14897 14898 14899 TEST(areconsecutive) { 14900 // This test generates no code; it just checks that AreConsecutive works. 14901 VIXL_CHECK(AreConsecutive(b0, NoVReg)); 14902 VIXL_CHECK(AreConsecutive(b1, b2)); 14903 VIXL_CHECK(AreConsecutive(b3, b4, b5)); 14904 VIXL_CHECK(AreConsecutive(b6, b7, b8, b9)); 14905 VIXL_CHECK(AreConsecutive(h10, NoVReg)); 14906 VIXL_CHECK(AreConsecutive(h11, h12)); 14907 VIXL_CHECK(AreConsecutive(h13, h14, h15)); 14908 VIXL_CHECK(AreConsecutive(h16, h17, h18, h19)); 14909 VIXL_CHECK(AreConsecutive(s20, NoVReg)); 14910 VIXL_CHECK(AreConsecutive(s21, s22)); 14911 VIXL_CHECK(AreConsecutive(s23, s24, s25)); 14912 VIXL_CHECK(AreConsecutive(s26, s27, s28, s29)); 14913 VIXL_CHECK(AreConsecutive(d30, NoVReg)); 14914 VIXL_CHECK(AreConsecutive(d31, d0)); 14915 VIXL_CHECK(AreConsecutive(d1, d2, d3)); 14916 VIXL_CHECK(AreConsecutive(d4, d5, d6, d7)); 14917 VIXL_CHECK(AreConsecutive(q8, NoVReg)); 14918 VIXL_CHECK(AreConsecutive(q9, q10)); 14919 VIXL_CHECK(AreConsecutive(q11, q12, q13)); 14920 VIXL_CHECK(AreConsecutive(q14, q15, q16, q17)); 14921 VIXL_CHECK(AreConsecutive(v18, NoVReg)); 14922 VIXL_CHECK(AreConsecutive(v19, v20)); 14923 VIXL_CHECK(AreConsecutive(v21, v22, v23)); 14924 VIXL_CHECK(AreConsecutive(v24, v25, v26, v27)); 14925 VIXL_CHECK(AreConsecutive(b29, h30)); 14926 VIXL_CHECK(AreConsecutive(s31, d0, q1)); 14927 VIXL_CHECK(AreConsecutive(v2, b3, h4, s5)); 14928 14929 VIXL_CHECK(!AreConsecutive(b0, b2)); 14930 VIXL_CHECK(!AreConsecutive(h1, h0)); 14931 VIXL_CHECK(!AreConsecutive(s31, s1)); 14932 VIXL_CHECK(!AreConsecutive(d12, d12)); 14933 VIXL_CHECK(!AreConsecutive(q31, q1)); 14934 14935 VIXL_CHECK(!AreConsecutive(b0, b1, b3)); 14936 VIXL_CHECK(!AreConsecutive(h4, h5, h6, h6)); 14937 VIXL_CHECK(!AreConsecutive(d11, d13, NoVReg, d14)); 14938 VIXL_CHECK(!AreConsecutive(d15, d16, d18, NoVReg)); 14939 VIXL_CHECK(!AreConsecutive(b26, b28, NoVReg, b29)); 14940 VIXL_CHECK(!AreConsecutive(s28, s30, NoVReg, NoVReg)); 14941 14942 VIXL_CHECK(AreConsecutive(q19, NoVReg, NoVReg, q22)); 14943 VIXL_CHECK(AreConsecutive(v23, NoVReg, v25, NoVReg)); 14944 VIXL_CHECK(AreConsecutive(b26, b27, NoVReg, NoVReg)); 14945 VIXL_CHECK(AreConsecutive(h28, NoVReg, NoVReg, NoVReg)); 14946 VIXL_CHECK(AreConsecutive(s30, s31, NoVReg, s2)); 14947 VIXL_CHECK(AreConsecutive(d3, NoVReg, d6, d7)); 14948 } 14949 14950 14951 TEST(printf) { 14952 SETUP(); 14953 START(); 14954 14955 char const* test_plain_string = "Printf with no arguments.\n"; 14956 char const* test_substring = "'This is a substring.'"; 14957 RegisterDump before; 14958 14959 // Initialize x29 to the value of the stack pointer. We will use x29 as a 14960 // temporary stack pointer later, and initializing it in this way allows the 14961 // RegisterDump check to pass. 14962 __ Mov(x29, __ StackPointer()); 14963 14964 // Test simple integer arguments. 14965 __ Mov(x0, 1234); 14966 __ Mov(x1, 0x1234); 14967 14968 // Test simple floating-point arguments. 14969 __ Fmov(d0, 1.234); 14970 14971 // Test pointer (string) arguments. 14972 __ Mov(x2, reinterpret_cast<uintptr_t>(test_substring)); 14973 14974 // Test the maximum number of arguments, and sign extension. 14975 __ Mov(w3, 0xffffffff); 14976 __ Mov(w4, 0xffffffff); 14977 __ Mov(x5, 0xffffffffffffffff); 14978 __ Mov(x6, 0xffffffffffffffff); 14979 __ Fmov(s1, 1.234); 14980 __ Fmov(s2, 2.345); 14981 __ Fmov(d3, 3.456); 14982 __ Fmov(d4, 4.567); 14983 14984 // Test printing callee-saved registers. 14985 __ Mov(x28, 0x123456789abcdef); 14986 __ Fmov(d10, 42.0); 14987 14988 // Test with three arguments. 14989 __ Mov(x10, 3); 14990 __ Mov(x11, 40); 14991 __ Mov(x12, 500); 14992 14993 // A single character. 14994 __ Mov(w13, 'x'); 14995 14996 // Check that we don't clobber any registers. 14997 before.Dump(&masm); 14998 14999 __ Printf(test_plain_string); // NOLINT(runtime/printf) 15000 __ Printf("x0: %" PRId64 ", x1: 0x%08" PRIx64 "\n", x0, x1); 15001 __ Printf("w5: %" PRId32 ", x5: %" PRId64 "\n", w5, x5); 15002 __ Printf("d0: %f\n", d0); 15003 __ Printf("Test %%s: %s\n", x2); 15004 __ Printf("w3(uint32): %" PRIu32 "\nw4(int32): %" PRId32 15005 "\n" 15006 "x5(uint64): %" PRIu64 "\nx6(int64): %" PRId64 "\n", 15007 w3, 15008 w4, 15009 x5, 15010 x6); 15011 __ Printf("%%f: %f\n%%g: %g\n%%e: %e\n%%E: %E\n", s1, s2, d3, d4); 15012 __ Printf("0x%" PRIx32 ", 0x%" PRIx64 "\n", w28, x28); 15013 __ Printf("%g\n", d10); 15014 __ Printf("%%%%%s%%%c%%\n", x2, w13); 15015 15016 // Print the stack pointer (sp). 15017 __ Printf("StackPointer(sp): 0x%016" PRIx64 ", 0x%08" PRIx32 "\n", 15018 __ StackPointer(), 15019 __ StackPointer().W()); 15020 15021 // Test with a different stack pointer. 15022 const Register old_stack_pointer = __ StackPointer(); 15023 __ Mov(x29, old_stack_pointer); 15024 __ SetStackPointer(x29); 15025 // Print the stack pointer (not sp). 15026 __ Printf("StackPointer(not sp): 0x%016" PRIx64 ", 0x%08" PRIx32 "\n", 15027 __ StackPointer(), 15028 __ StackPointer().W()); 15029 __ Mov(old_stack_pointer, __ StackPointer()); 15030 __ SetStackPointer(old_stack_pointer); 15031 15032 // Test with three arguments. 15033 __ Printf("3=%u, 4=%u, 5=%u\n", x10, x11, x12); 15034 15035 // Mixed argument types. 15036 __ Printf("w3: %" PRIu32 ", s1: %f, x5: %" PRIu64 ", d3: %f\n", 15037 w3, 15038 s1, 15039 x5, 15040 d3); 15041 __ Printf("s1: %f, d3: %f, w3: %" PRId32 ", x5: %" PRId64 "\n", 15042 s1, 15043 d3, 15044 w3, 15045 x5); 15046 15047 END(); 15048 RUN(); 15049 15050 // We cannot easily test the output of the Printf sequences, and because 15051 // Printf preserves all registers by default, we can't look at the number of 15052 // bytes that were printed. However, the printf_no_preserve test should check 15053 // that, and here we just test that we didn't clobber any registers. 15054 ASSERT_EQUAL_REGISTERS(before); 15055 15056 TEARDOWN(); 15057 } 15058 15059 15060 TEST(printf_no_preserve) { 15061 SETUP(); 15062 START(); 15063 15064 char const* test_plain_string = "Printf with no arguments.\n"; 15065 char const* test_substring = "'This is a substring.'"; 15066 15067 __ PrintfNoPreserve(test_plain_string); 15068 __ Mov(x19, x0); 15069 15070 // Test simple integer arguments. 15071 __ Mov(x0, 1234); 15072 __ Mov(x1, 0x1234); 15073 __ PrintfNoPreserve("x0: %" PRId64 ", x1: 0x%08" PRIx64 "\n", x0, x1); 15074 __ Mov(x20, x0); 15075 15076 // Test simple floating-point arguments. 15077 __ Fmov(d0, 1.234); 15078 __ PrintfNoPreserve("d0: %f\n", d0); 15079 __ Mov(x21, x0); 15080 15081 // Test pointer (string) arguments. 15082 __ Mov(x2, reinterpret_cast<uintptr_t>(test_substring)); 15083 __ PrintfNoPreserve("Test %%s: %s\n", x2); 15084 __ Mov(x22, x0); 15085 15086 // Test the maximum number of arguments, and sign extension. 15087 __ Mov(w3, 0xffffffff); 15088 __ Mov(w4, 0xffffffff); 15089 __ Mov(x5, 0xffffffffffffffff); 15090 __ Mov(x6, 0xffffffffffffffff); 15091 __ PrintfNoPreserve("w3(uint32): %" PRIu32 "\nw4(int32): %" PRId32 15092 "\n" 15093 "x5(uint64): %" PRIu64 "\nx6(int64): %" PRId64 "\n", 15094 w3, 15095 w4, 15096 x5, 15097 x6); 15098 __ Mov(x23, x0); 15099 15100 __ Fmov(s1, 1.234); 15101 __ Fmov(s2, 2.345); 15102 __ Fmov(d3, 3.456); 15103 __ Fmov(d4, 4.567); 15104 __ PrintfNoPreserve("%%f: %f\n%%g: %g\n%%e: %e\n%%E: %E\n", s1, s2, d3, d4); 15105 __ Mov(x24, x0); 15106 15107 // Test printing callee-saved registers. 15108 __ Mov(x28, 0x123456789abcdef); 15109 __ PrintfNoPreserve("0x%" PRIx32 ", 0x%" PRIx64 "\n", w28, x28); 15110 __ Mov(x25, x0); 15111 15112 __ Fmov(d10, 42.0); 15113 __ PrintfNoPreserve("%g\n", d10); 15114 __ Mov(x26, x0); 15115 15116 // Test with a different stack pointer. 15117 const Register old_stack_pointer = __ StackPointer(); 15118 __ Mov(x29, old_stack_pointer); 15119 __ SetStackPointer(x29); 15120 // Print the stack pointer (not sp). 15121 __ PrintfNoPreserve("StackPointer(not sp): 0x%016" PRIx64 ", 0x%08" PRIx32 15122 "\n", 15123 __ StackPointer(), 15124 __ StackPointer().W()); 15125 __ Mov(x27, x0); 15126 __ Mov(old_stack_pointer, __ StackPointer()); 15127 __ SetStackPointer(old_stack_pointer); 15128 15129 // Test with three arguments. 15130 __ Mov(x3, 3); 15131 __ Mov(x4, 40); 15132 __ Mov(x5, 500); 15133 __ PrintfNoPreserve("3=%u, 4=%u, 5=%u\n", x3, x4, x5); 15134 __ Mov(x28, x0); 15135 15136 // Mixed argument types. 15137 __ Mov(w3, 0xffffffff); 15138 __ Fmov(s1, 1.234); 15139 __ Mov(x5, 0xffffffffffffffff); 15140 __ Fmov(d3, 3.456); 15141 __ PrintfNoPreserve("w3: %" PRIu32 ", s1: %f, x5: %" PRIu64 ", d3: %f\n", 15142 w3, 15143 s1, 15144 x5, 15145 d3); 15146 __ Mov(x29, x0); 15147 15148 END(); 15149 RUN(); 15150 15151 // We cannot easily test the exact output of the Printf sequences, but we can 15152 // use the return code to check that the string length was correct. 15153 15154 // Printf with no arguments. 15155 ASSERT_EQUAL_64(strlen(test_plain_string), x19); 15156 // x0: 1234, x1: 0x00001234 15157 ASSERT_EQUAL_64(25, x20); 15158 // d0: 1.234000 15159 ASSERT_EQUAL_64(13, x21); 15160 // Test %s: 'This is a substring.' 15161 ASSERT_EQUAL_64(32, x22); 15162 // w3(uint32): 4294967295 15163 // w4(int32): -1 15164 // x5(uint64): 18446744073709551615 15165 // x6(int64): -1 15166 ASSERT_EQUAL_64(23 + 14 + 33 + 14, x23); 15167 // %f: 1.234000 15168 // %g: 2.345 15169 // %e: 3.456000e+00 15170 // %E: 4.567000E+00 15171 ASSERT_EQUAL_64(13 + 10 + 17 + 17, x24); 15172 // 0x89abcdef, 0x123456789abcdef 15173 ASSERT_EQUAL_64(30, x25); 15174 // 42 15175 ASSERT_EQUAL_64(3, x26); 15176 // StackPointer(not sp): 0x00007fb037ae2370, 0x37ae2370 15177 // Note: This is an example value, but the field width is fixed here so the 15178 // string length is still predictable. 15179 ASSERT_EQUAL_64(53, x27); 15180 // 3=3, 4=40, 5=500 15181 ASSERT_EQUAL_64(17, x28); 15182 // w3: 4294967295, s1: 1.234000, x5: 18446744073709551615, d3: 3.456000 15183 ASSERT_EQUAL_64(69, x29); 15184 15185 TEARDOWN(); 15186 } 15187 15188 15189 #ifndef VIXL_INCLUDE_SIMULATOR_AARCH64 15190 TEST(trace) { 15191 // The Trace helper should not generate any code unless the simulator (or 15192 // debugger) is being used. 15193 SETUP(); 15194 START(); 15195 15196 Label start; 15197 __ Bind(&start); 15198 __ Trace(LOG_ALL, TRACE_ENABLE); 15199 __ Trace(LOG_ALL, TRACE_DISABLE); 15200 VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&start) == 0); 15201 15202 END(); 15203 TEARDOWN(); 15204 } 15205 #endif 15206 15207 15208 #ifndef VIXL_INCLUDE_SIMULATOR_AARCH64 15209 TEST(log) { 15210 // The Log helper should not generate any code unless the simulator (or 15211 // debugger) is being used. 15212 SETUP(); 15213 START(); 15214 15215 Label start; 15216 __ Bind(&start); 15217 __ Log(LOG_ALL); 15218 VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&start) == 0); 15219 15220 END(); 15221 TEARDOWN(); 15222 } 15223 #endif 15224 15225 15226 TEST(blr_lr) { 15227 // A simple test to check that the simulator correcty handle "blr lr". 15228 SETUP(); 15229 15230 START(); 15231 Label target; 15232 Label end; 15233 15234 __ Mov(x0, 0x0); 15235 __ Adr(lr, &target); 15236 15237 __ Blr(lr); 15238 __ Mov(x0, 0xdeadbeef); 15239 __ B(&end); 15240 15241 __ Bind(&target); 15242 __ Mov(x0, 0xc001c0de); 15243 15244 __ Bind(&end); 15245 END(); 15246 15247 RUN(); 15248 15249 ASSERT_EQUAL_64(0xc001c0de, x0); 15250 15251 TEARDOWN(); 15252 } 15253 15254 15255 TEST(barriers) { 15256 // Generate all supported barriers, this is just a smoke test 15257 SETUP(); 15258 15259 START(); 15260 15261 // DMB 15262 __ Dmb(FullSystem, BarrierAll); 15263 __ Dmb(FullSystem, BarrierReads); 15264 __ Dmb(FullSystem, BarrierWrites); 15265 __ Dmb(FullSystem, BarrierOther); 15266 15267 __ Dmb(InnerShareable, BarrierAll); 15268 __ Dmb(InnerShareable, BarrierReads); 15269 __ Dmb(InnerShareable, BarrierWrites); 15270 __ Dmb(InnerShareable, BarrierOther); 15271 15272 __ Dmb(NonShareable, BarrierAll); 15273 __ Dmb(NonShareable, BarrierReads); 15274 __ Dmb(NonShareable, BarrierWrites); 15275 __ Dmb(NonShareable, BarrierOther); 15276 15277 __ Dmb(OuterShareable, BarrierAll); 15278 __ Dmb(OuterShareable, BarrierReads); 15279 __ Dmb(OuterShareable, BarrierWrites); 15280 __ Dmb(OuterShareable, BarrierOther); 15281 15282 // DSB 15283 __ Dsb(FullSystem, BarrierAll); 15284 __ Dsb(FullSystem, BarrierReads); 15285 __ Dsb(FullSystem, BarrierWrites); 15286 __ Dsb(FullSystem, BarrierOther); 15287 15288 __ Dsb(InnerShareable, BarrierAll); 15289 __ Dsb(InnerShareable, BarrierReads); 15290 __ Dsb(InnerShareable, BarrierWrites); 15291 __ Dsb(InnerShareable, BarrierOther); 15292 15293 __ Dsb(NonShareable, BarrierAll); 15294 __ Dsb(NonShareable, BarrierReads); 15295 __ Dsb(NonShareable, BarrierWrites); 15296 __ Dsb(NonShareable, BarrierOther); 15297 15298 __ Dsb(OuterShareable, BarrierAll); 15299 __ Dsb(OuterShareable, BarrierReads); 15300 __ Dsb(OuterShareable, BarrierWrites); 15301 __ Dsb(OuterShareable, BarrierOther); 15302 15303 // ISB 15304 __ Isb(); 15305 15306 END(); 15307 15308 RUN(); 15309 15310 TEARDOWN(); 15311 } 15312 15313 15314 TEST(process_nan_double) { 15315 // Make sure that NaN propagation works correctly. 15316 double sn = RawbitsToDouble(0x7ff5555511111111); 15317 double qn = RawbitsToDouble(0x7ffaaaaa11111111); 15318 VIXL_ASSERT(IsSignallingNaN(sn)); 15319 VIXL_ASSERT(IsQuietNaN(qn)); 15320 15321 // The input NaNs after passing through ProcessNaN. 15322 double sn_proc = RawbitsToDouble(0x7ffd555511111111); 15323 double qn_proc = qn; 15324 VIXL_ASSERT(IsQuietNaN(sn_proc)); 15325 VIXL_ASSERT(IsQuietNaN(qn_proc)); 15326 15327 SETUP(); 15328 START(); 15329 15330 // Execute a number of instructions which all use ProcessNaN, and check that 15331 // they all handle the NaN correctly. 15332 __ Fmov(d0, sn); 15333 __ Fmov(d10, qn); 15334 15335 // Operations that always propagate NaNs unchanged, even signalling NaNs. 15336 // - Signalling NaN 15337 __ Fmov(d1, d0); 15338 __ Fabs(d2, d0); 15339 __ Fneg(d3, d0); 15340 // - Quiet NaN 15341 __ Fmov(d11, d10); 15342 __ Fabs(d12, d10); 15343 __ Fneg(d13, d10); 15344 15345 // Operations that use ProcessNaN. 15346 // - Signalling NaN 15347 __ Fsqrt(d4, d0); 15348 __ Frinta(d5, d0); 15349 __ Frintn(d6, d0); 15350 __ Frintz(d7, d0); 15351 // - Quiet NaN 15352 __ Fsqrt(d14, d10); 15353 __ Frinta(d15, d10); 15354 __ Frintn(d16, d10); 15355 __ Frintz(d17, d10); 15356 15357 // The behaviour of fcvt is checked in TEST(fcvt_sd). 15358 15359 END(); 15360 RUN(); 15361 15362 uint64_t qn_raw = DoubleToRawbits(qn); 15363 uint64_t sn_raw = DoubleToRawbits(sn); 15364 15365 // - Signalling NaN 15366 ASSERT_EQUAL_FP64(sn, d1); 15367 ASSERT_EQUAL_FP64(RawbitsToDouble(sn_raw & ~kDSignMask), d2); 15368 ASSERT_EQUAL_FP64(RawbitsToDouble(sn_raw ^ kDSignMask), d3); 15369 // - Quiet NaN 15370 ASSERT_EQUAL_FP64(qn, d11); 15371 ASSERT_EQUAL_FP64(RawbitsToDouble(qn_raw & ~kDSignMask), d12); 15372 ASSERT_EQUAL_FP64(RawbitsToDouble(qn_raw ^ kDSignMask), d13); 15373 15374 // - Signalling NaN 15375 ASSERT_EQUAL_FP64(sn_proc, d4); 15376 ASSERT_EQUAL_FP64(sn_proc, d5); 15377 ASSERT_EQUAL_FP64(sn_proc, d6); 15378 ASSERT_EQUAL_FP64(sn_proc, d7); 15379 // - Quiet NaN 15380 ASSERT_EQUAL_FP64(qn_proc, d14); 15381 ASSERT_EQUAL_FP64(qn_proc, d15); 15382 ASSERT_EQUAL_FP64(qn_proc, d16); 15383 ASSERT_EQUAL_FP64(qn_proc, d17); 15384 15385 TEARDOWN(); 15386 } 15387 15388 15389 TEST(process_nan_float) { 15390 // Make sure that NaN propagation works correctly. 15391 float sn = RawbitsToFloat(0x7f951111); 15392 float qn = RawbitsToFloat(0x7fea1111); 15393 VIXL_ASSERT(IsSignallingNaN(sn)); 15394 VIXL_ASSERT(IsQuietNaN(qn)); 15395 15396 // The input NaNs after passing through ProcessNaN. 15397 float sn_proc = RawbitsToFloat(0x7fd51111); 15398 float qn_proc = qn; 15399 VIXL_ASSERT(IsQuietNaN(sn_proc)); 15400 VIXL_ASSERT(IsQuietNaN(qn_proc)); 15401 15402 SETUP(); 15403 START(); 15404 15405 // Execute a number of instructions which all use ProcessNaN, and check that 15406 // they all handle the NaN correctly. 15407 __ Fmov(s0, sn); 15408 __ Fmov(s10, qn); 15409 15410 // Operations that always propagate NaNs unchanged, even signalling NaNs. 15411 // - Signalling NaN 15412 __ Fmov(s1, s0); 15413 __ Fabs(s2, s0); 15414 __ Fneg(s3, s0); 15415 // - Quiet NaN 15416 __ Fmov(s11, s10); 15417 __ Fabs(s12, s10); 15418 __ Fneg(s13, s10); 15419 15420 // Operations that use ProcessNaN. 15421 // - Signalling NaN 15422 __ Fsqrt(s4, s0); 15423 __ Frinta(s5, s0); 15424 __ Frintn(s6, s0); 15425 __ Frintz(s7, s0); 15426 // - Quiet NaN 15427 __ Fsqrt(s14, s10); 15428 __ Frinta(s15, s10); 15429 __ Frintn(s16, s10); 15430 __ Frintz(s17, s10); 15431 15432 // The behaviour of fcvt is checked in TEST(fcvt_sd). 15433 15434 END(); 15435 RUN(); 15436 15437 uint32_t qn_raw = FloatToRawbits(qn); 15438 uint32_t sn_raw = FloatToRawbits(sn); 15439 15440 // - Signalling NaN 15441 ASSERT_EQUAL_FP32(sn, s1); 15442 ASSERT_EQUAL_FP32(RawbitsToFloat(sn_raw & ~kSSignMask), s2); 15443 ASSERT_EQUAL_FP32(RawbitsToFloat(sn_raw ^ kSSignMask), s3); 15444 // - Quiet NaN 15445 ASSERT_EQUAL_FP32(qn, s11); 15446 ASSERT_EQUAL_FP32(RawbitsToFloat(qn_raw & ~kSSignMask), s12); 15447 ASSERT_EQUAL_FP32(RawbitsToFloat(qn_raw ^ kSSignMask), s13); 15448 15449 // - Signalling NaN 15450 ASSERT_EQUAL_FP32(sn_proc, s4); 15451 ASSERT_EQUAL_FP32(sn_proc, s5); 15452 ASSERT_EQUAL_FP32(sn_proc, s6); 15453 ASSERT_EQUAL_FP32(sn_proc, s7); 15454 // - Quiet NaN 15455 ASSERT_EQUAL_FP32(qn_proc, s14); 15456 ASSERT_EQUAL_FP32(qn_proc, s15); 15457 ASSERT_EQUAL_FP32(qn_proc, s16); 15458 ASSERT_EQUAL_FP32(qn_proc, s17); 15459 15460 TEARDOWN(); 15461 } 15462 15463 15464 static void ProcessNaNsHelper(double n, double m, double expected) { 15465 VIXL_ASSERT(std::isnan(n) || std::isnan(m)); 15466 VIXL_ASSERT(std::isnan(expected)); 15467 15468 SETUP(); 15469 START(); 15470 15471 // Execute a number of instructions which all use ProcessNaNs, and check that 15472 // they all propagate NaNs correctly. 15473 __ Fmov(d0, n); 15474 __ Fmov(d1, m); 15475 15476 __ Fadd(d2, d0, d1); 15477 __ Fsub(d3, d0, d1); 15478 __ Fmul(d4, d0, d1); 15479 __ Fdiv(d5, d0, d1); 15480 __ Fmax(d6, d0, d1); 15481 __ Fmin(d7, d0, d1); 15482 15483 END(); 15484 RUN(); 15485 15486 ASSERT_EQUAL_FP64(expected, d2); 15487 ASSERT_EQUAL_FP64(expected, d3); 15488 ASSERT_EQUAL_FP64(expected, d4); 15489 ASSERT_EQUAL_FP64(expected, d5); 15490 ASSERT_EQUAL_FP64(expected, d6); 15491 ASSERT_EQUAL_FP64(expected, d7); 15492 15493 TEARDOWN(); 15494 } 15495 15496 15497 TEST(process_nans_double) { 15498 // Make sure that NaN propagation works correctly. 15499 double sn = RawbitsToDouble(0x7ff5555511111111); 15500 double sm = RawbitsToDouble(0x7ff5555522222222); 15501 double qn = RawbitsToDouble(0x7ffaaaaa11111111); 15502 double qm = RawbitsToDouble(0x7ffaaaaa22222222); 15503 VIXL_ASSERT(IsSignallingNaN(sn)); 15504 VIXL_ASSERT(IsSignallingNaN(sm)); 15505 VIXL_ASSERT(IsQuietNaN(qn)); 15506 VIXL_ASSERT(IsQuietNaN(qm)); 15507 15508 // The input NaNs after passing through ProcessNaN. 15509 double sn_proc = RawbitsToDouble(0x7ffd555511111111); 15510 double sm_proc = RawbitsToDouble(0x7ffd555522222222); 15511 double qn_proc = qn; 15512 double qm_proc = qm; 15513 VIXL_ASSERT(IsQuietNaN(sn_proc)); 15514 VIXL_ASSERT(IsQuietNaN(sm_proc)); 15515 VIXL_ASSERT(IsQuietNaN(qn_proc)); 15516 VIXL_ASSERT(IsQuietNaN(qm_proc)); 15517 15518 // Quiet NaNs are propagated. 15519 ProcessNaNsHelper(qn, 0, qn_proc); 15520 ProcessNaNsHelper(0, qm, qm_proc); 15521 ProcessNaNsHelper(qn, qm, qn_proc); 15522 15523 // Signalling NaNs are propagated, and made quiet. 15524 ProcessNaNsHelper(sn, 0, sn_proc); 15525 ProcessNaNsHelper(0, sm, sm_proc); 15526 ProcessNaNsHelper(sn, sm, sn_proc); 15527 15528 // Signalling NaNs take precedence over quiet NaNs. 15529 ProcessNaNsHelper(sn, qm, sn_proc); 15530 ProcessNaNsHelper(qn, sm, sm_proc); 15531 ProcessNaNsHelper(sn, sm, sn_proc); 15532 } 15533 15534 15535 static void ProcessNaNsHelper(float n, float m, float expected) { 15536 VIXL_ASSERT(std::isnan(n) || std::isnan(m)); 15537 VIXL_ASSERT(std::isnan(expected)); 15538 15539 SETUP(); 15540 START(); 15541 15542 // Execute a number of instructions which all use ProcessNaNs, and check that 15543 // they all propagate NaNs correctly. 15544 __ Fmov(s0, n); 15545 __ Fmov(s1, m); 15546 15547 __ Fadd(s2, s0, s1); 15548 __ Fsub(s3, s0, s1); 15549 __ Fmul(s4, s0, s1); 15550 __ Fdiv(s5, s0, s1); 15551 __ Fmax(s6, s0, s1); 15552 __ Fmin(s7, s0, s1); 15553 15554 END(); 15555 RUN(); 15556 15557 ASSERT_EQUAL_FP32(expected, s2); 15558 ASSERT_EQUAL_FP32(expected, s3); 15559 ASSERT_EQUAL_FP32(expected, s4); 15560 ASSERT_EQUAL_FP32(expected, s5); 15561 ASSERT_EQUAL_FP32(expected, s6); 15562 ASSERT_EQUAL_FP32(expected, s7); 15563 15564 TEARDOWN(); 15565 } 15566 15567 15568 TEST(process_nans_float) { 15569 // Make sure that NaN propagation works correctly. 15570 float sn = RawbitsToFloat(0x7f951111); 15571 float sm = RawbitsToFloat(0x7f952222); 15572 float qn = RawbitsToFloat(0x7fea1111); 15573 float qm = RawbitsToFloat(0x7fea2222); 15574 VIXL_ASSERT(IsSignallingNaN(sn)); 15575 VIXL_ASSERT(IsSignallingNaN(sm)); 15576 VIXL_ASSERT(IsQuietNaN(qn)); 15577 VIXL_ASSERT(IsQuietNaN(qm)); 15578 15579 // The input NaNs after passing through ProcessNaN. 15580 float sn_proc = RawbitsToFloat(0x7fd51111); 15581 float sm_proc = RawbitsToFloat(0x7fd52222); 15582 float qn_proc = qn; 15583 float qm_proc = qm; 15584 VIXL_ASSERT(IsQuietNaN(sn_proc)); 15585 VIXL_ASSERT(IsQuietNaN(sm_proc)); 15586 VIXL_ASSERT(IsQuietNaN(qn_proc)); 15587 VIXL_ASSERT(IsQuietNaN(qm_proc)); 15588 15589 // Quiet NaNs are propagated. 15590 ProcessNaNsHelper(qn, 0, qn_proc); 15591 ProcessNaNsHelper(0, qm, qm_proc); 15592 ProcessNaNsHelper(qn, qm, qn_proc); 15593 15594 // Signalling NaNs are propagated, and made quiet. 15595 ProcessNaNsHelper(sn, 0, sn_proc); 15596 ProcessNaNsHelper(0, sm, sm_proc); 15597 ProcessNaNsHelper(sn, sm, sn_proc); 15598 15599 // Signalling NaNs take precedence over quiet NaNs. 15600 ProcessNaNsHelper(sn, qm, sn_proc); 15601 ProcessNaNsHelper(qn, sm, sm_proc); 15602 ProcessNaNsHelper(sn, sm, sn_proc); 15603 } 15604 15605 15606 static void DefaultNaNHelper(float n, float m, float a) { 15607 VIXL_ASSERT(std::isnan(n) || std::isnan(m) || std::isnan(a)); 15608 15609 bool test_1op = std::isnan(n); 15610 bool test_2op = std::isnan(n) || std::isnan(m); 15611 15612 SETUP(); 15613 START(); 15614 15615 // Enable Default-NaN mode in the FPCR. 15616 __ Mrs(x0, FPCR); 15617 __ Orr(x1, x0, DN_mask); 15618 __ Msr(FPCR, x1); 15619 15620 // Execute a number of instructions which all use ProcessNaNs, and check that 15621 // they all produce the default NaN. 15622 __ Fmov(s0, n); 15623 __ Fmov(s1, m); 15624 __ Fmov(s2, a); 15625 15626 if (test_1op) { 15627 // Operations that always propagate NaNs unchanged, even signalling NaNs. 15628 __ Fmov(s10, s0); 15629 __ Fabs(s11, s0); 15630 __ Fneg(s12, s0); 15631 15632 // Operations that use ProcessNaN. 15633 __ Fsqrt(s13, s0); 15634 __ Frinta(s14, s0); 15635 __ Frintn(s15, s0); 15636 __ Frintz(s16, s0); 15637 15638 // Fcvt usually has special NaN handling, but it respects default-NaN mode. 15639 __ Fcvt(d17, s0); 15640 } 15641 15642 if (test_2op) { 15643 __ Fadd(s18, s0, s1); 15644 __ Fsub(s19, s0, s1); 15645 __ Fmul(s20, s0, s1); 15646 __ Fdiv(s21, s0, s1); 15647 __ Fmax(s22, s0, s1); 15648 __ Fmin(s23, s0, s1); 15649 } 15650 15651 __ Fmadd(s24, s0, s1, s2); 15652 __ Fmsub(s25, s0, s1, s2); 15653 __ Fnmadd(s26, s0, s1, s2); 15654 __ Fnmsub(s27, s0, s1, s2); 15655 15656 // Restore FPCR. 15657 __ Msr(FPCR, x0); 15658 15659 END(); 15660 RUN(); 15661 15662 if (test_1op) { 15663 uint32_t n_raw = FloatToRawbits(n); 15664 ASSERT_EQUAL_FP32(n, s10); 15665 ASSERT_EQUAL_FP32(RawbitsToFloat(n_raw & ~kSSignMask), s11); 15666 ASSERT_EQUAL_FP32(RawbitsToFloat(n_raw ^ kSSignMask), s12); 15667 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s13); 15668 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s14); 15669 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s15); 15670 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s16); 15671 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d17); 15672 } 15673 15674 if (test_2op) { 15675 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s18); 15676 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s19); 15677 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s20); 15678 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s21); 15679 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s22); 15680 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s23); 15681 } 15682 15683 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s24); 15684 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s25); 15685 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s26); 15686 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s27); 15687 15688 TEARDOWN(); 15689 } 15690 15691 15692 TEST(default_nan_float) { 15693 float sn = RawbitsToFloat(0x7f951111); 15694 float sm = RawbitsToFloat(0x7f952222); 15695 float sa = RawbitsToFloat(0x7f95aaaa); 15696 float qn = RawbitsToFloat(0x7fea1111); 15697 float qm = RawbitsToFloat(0x7fea2222); 15698 float qa = RawbitsToFloat(0x7feaaaaa); 15699 VIXL_ASSERT(IsSignallingNaN(sn)); 15700 VIXL_ASSERT(IsSignallingNaN(sm)); 15701 VIXL_ASSERT(IsSignallingNaN(sa)); 15702 VIXL_ASSERT(IsQuietNaN(qn)); 15703 VIXL_ASSERT(IsQuietNaN(qm)); 15704 VIXL_ASSERT(IsQuietNaN(qa)); 15705 15706 // - Signalling NaNs 15707 DefaultNaNHelper(sn, 0.0f, 0.0f); 15708 DefaultNaNHelper(0.0f, sm, 0.0f); 15709 DefaultNaNHelper(0.0f, 0.0f, sa); 15710 DefaultNaNHelper(sn, sm, 0.0f); 15711 DefaultNaNHelper(0.0f, sm, sa); 15712 DefaultNaNHelper(sn, 0.0f, sa); 15713 DefaultNaNHelper(sn, sm, sa); 15714 // - Quiet NaNs 15715 DefaultNaNHelper(qn, 0.0f, 0.0f); 15716 DefaultNaNHelper(0.0f, qm, 0.0f); 15717 DefaultNaNHelper(0.0f, 0.0f, qa); 15718 DefaultNaNHelper(qn, qm, 0.0f); 15719 DefaultNaNHelper(0.0f, qm, qa); 15720 DefaultNaNHelper(qn, 0.0f, qa); 15721 DefaultNaNHelper(qn, qm, qa); 15722 // - Mixed NaNs 15723 DefaultNaNHelper(qn, sm, sa); 15724 DefaultNaNHelper(sn, qm, sa); 15725 DefaultNaNHelper(sn, sm, qa); 15726 DefaultNaNHelper(qn, qm, sa); 15727 DefaultNaNHelper(sn, qm, qa); 15728 DefaultNaNHelper(qn, sm, qa); 15729 DefaultNaNHelper(qn, qm, qa); 15730 } 15731 15732 15733 static void DefaultNaNHelper(double n, double m, double a) { 15734 VIXL_ASSERT(std::isnan(n) || std::isnan(m) || std::isnan(a)); 15735 15736 bool test_1op = std::isnan(n); 15737 bool test_2op = std::isnan(n) || std::isnan(m); 15738 15739 SETUP(); 15740 START(); 15741 15742 // Enable Default-NaN mode in the FPCR. 15743 __ Mrs(x0, FPCR); 15744 __ Orr(x1, x0, DN_mask); 15745 __ Msr(FPCR, x1); 15746 15747 // Execute a number of instructions which all use ProcessNaNs, and check that 15748 // they all produce the default NaN. 15749 __ Fmov(d0, n); 15750 __ Fmov(d1, m); 15751 __ Fmov(d2, a); 15752 15753 if (test_1op) { 15754 // Operations that always propagate NaNs unchanged, even signalling NaNs. 15755 __ Fmov(d10, d0); 15756 __ Fabs(d11, d0); 15757 __ Fneg(d12, d0); 15758 15759 // Operations that use ProcessNaN. 15760 __ Fsqrt(d13, d0); 15761 __ Frinta(d14, d0); 15762 __ Frintn(d15, d0); 15763 __ Frintz(d16, d0); 15764 15765 // Fcvt usually has special NaN handling, but it respects default-NaN mode. 15766 __ Fcvt(s17, d0); 15767 } 15768 15769 if (test_2op) { 15770 __ Fadd(d18, d0, d1); 15771 __ Fsub(d19, d0, d1); 15772 __ Fmul(d20, d0, d1); 15773 __ Fdiv(d21, d0, d1); 15774 __ Fmax(d22, d0, d1); 15775 __ Fmin(d23, d0, d1); 15776 } 15777 15778 __ Fmadd(d24, d0, d1, d2); 15779 __ Fmsub(d25, d0, d1, d2); 15780 __ Fnmadd(d26, d0, d1, d2); 15781 __ Fnmsub(d27, d0, d1, d2); 15782 15783 // Restore FPCR. 15784 __ Msr(FPCR, x0); 15785 15786 END(); 15787 RUN(); 15788 15789 if (test_1op) { 15790 uint64_t n_raw = DoubleToRawbits(n); 15791 ASSERT_EQUAL_FP64(n, d10); 15792 ASSERT_EQUAL_FP64(RawbitsToDouble(n_raw & ~kDSignMask), d11); 15793 ASSERT_EQUAL_FP64(RawbitsToDouble(n_raw ^ kDSignMask), d12); 15794 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13); 15795 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d14); 15796 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d15); 15797 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d16); 15798 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s17); 15799 } 15800 15801 if (test_2op) { 15802 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d18); 15803 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d19); 15804 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d20); 15805 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d21); 15806 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d22); 15807 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d23); 15808 } 15809 15810 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d24); 15811 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d25); 15812 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d26); 15813 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d27); 15814 15815 TEARDOWN(); 15816 } 15817 15818 15819 TEST(default_nan_double) { 15820 double sn = RawbitsToDouble(0x7ff5555511111111); 15821 double sm = RawbitsToDouble(0x7ff5555522222222); 15822 double sa = RawbitsToDouble(0x7ff55555aaaaaaaa); 15823 double qn = RawbitsToDouble(0x7ffaaaaa11111111); 15824 double qm = RawbitsToDouble(0x7ffaaaaa22222222); 15825 double qa = RawbitsToDouble(0x7ffaaaaaaaaaaaaa); 15826 VIXL_ASSERT(IsSignallingNaN(sn)); 15827 VIXL_ASSERT(IsSignallingNaN(sm)); 15828 VIXL_ASSERT(IsSignallingNaN(sa)); 15829 VIXL_ASSERT(IsQuietNaN(qn)); 15830 VIXL_ASSERT(IsQuietNaN(qm)); 15831 VIXL_ASSERT(IsQuietNaN(qa)); 15832 15833 // - Signalling NaNs 15834 DefaultNaNHelper(sn, 0.0, 0.0); 15835 DefaultNaNHelper(0.0, sm, 0.0); 15836 DefaultNaNHelper(0.0, 0.0, sa); 15837 DefaultNaNHelper(sn, sm, 0.0); 15838 DefaultNaNHelper(0.0, sm, sa); 15839 DefaultNaNHelper(sn, 0.0, sa); 15840 DefaultNaNHelper(sn, sm, sa); 15841 // - Quiet NaNs 15842 DefaultNaNHelper(qn, 0.0, 0.0); 15843 DefaultNaNHelper(0.0, qm, 0.0); 15844 DefaultNaNHelper(0.0, 0.0, qa); 15845 DefaultNaNHelper(qn, qm, 0.0); 15846 DefaultNaNHelper(0.0, qm, qa); 15847 DefaultNaNHelper(qn, 0.0, qa); 15848 DefaultNaNHelper(qn, qm, qa); 15849 // - Mixed NaNs 15850 DefaultNaNHelper(qn, sm, sa); 15851 DefaultNaNHelper(sn, qm, sa); 15852 DefaultNaNHelper(sn, sm, qa); 15853 DefaultNaNHelper(qn, qm, sa); 15854 DefaultNaNHelper(sn, qm, qa); 15855 DefaultNaNHelper(qn, sm, qa); 15856 DefaultNaNHelper(qn, qm, qa); 15857 } 15858 15859 15860 TEST(ldar_stlr) { 15861 // The middle value is read, modified, and written. The padding exists only to 15862 // check for over-write. 15863 uint8_t b[] = {0, 0x12, 0}; 15864 uint16_t h[] = {0, 0x1234, 0}; 15865 uint32_t w[] = {0, 0x12345678, 0}; 15866 uint64_t x[] = {0, 0x123456789abcdef0, 0}; 15867 15868 SETUP(); 15869 START(); 15870 15871 __ Mov(x10, reinterpret_cast<uintptr_t>(&b[1])); 15872 __ Ldarb(w0, MemOperand(x10)); 15873 __ Add(w0, w0, 1); 15874 __ Stlrb(w0, MemOperand(x10)); 15875 15876 __ Mov(x10, reinterpret_cast<uintptr_t>(&h[1])); 15877 __ Ldarh(w0, MemOperand(x10)); 15878 __ Add(w0, w0, 1); 15879 __ Stlrh(w0, MemOperand(x10)); 15880 15881 __ Mov(x10, reinterpret_cast<uintptr_t>(&w[1])); 15882 __ Ldar(w0, MemOperand(x10)); 15883 __ Add(w0, w0, 1); 15884 __ Stlr(w0, MemOperand(x10)); 15885 15886 __ Mov(x10, reinterpret_cast<uintptr_t>(&x[1])); 15887 __ Ldar(x0, MemOperand(x10)); 15888 __ Add(x0, x0, 1); 15889 __ Stlr(x0, MemOperand(x10)); 15890 15891 END(); 15892 RUN(); 15893 15894 ASSERT_EQUAL_32(0x13, b[1]); 15895 ASSERT_EQUAL_32(0x1235, h[1]); 15896 ASSERT_EQUAL_32(0x12345679, w[1]); 15897 ASSERT_EQUAL_64(0x123456789abcdef1, x[1]); 15898 15899 // Check for over-write. 15900 ASSERT_EQUAL_32(0, b[0]); 15901 ASSERT_EQUAL_32(0, b[2]); 15902 ASSERT_EQUAL_32(0, h[0]); 15903 ASSERT_EQUAL_32(0, h[2]); 15904 ASSERT_EQUAL_32(0, w[0]); 15905 ASSERT_EQUAL_32(0, w[2]); 15906 ASSERT_EQUAL_64(0, x[0]); 15907 ASSERT_EQUAL_64(0, x[2]); 15908 15909 TEARDOWN(); 15910 } 15911 15912 15913 TEST(ldxr_stxr) { 15914 // The middle value is read, modified, and written. The padding exists only to 15915 // check for over-write. 15916 uint8_t b[] = {0, 0x12, 0}; 15917 uint16_t h[] = {0, 0x1234, 0}; 15918 uint32_t w[] = {0, 0x12345678, 0}; 15919 uint64_t x[] = {0, 0x123456789abcdef0, 0}; 15920 15921 // As above, but get suitably-aligned values for ldxp and stxp. 15922 uint32_t wp_data[] = {0, 0, 0, 0, 0}; 15923 uint32_t* wp = AlignUp(wp_data + 1, kWRegSizeInBytes * 2) - 1; 15924 wp[1] = 0x12345678; // wp[1] is 64-bit-aligned. 15925 wp[2] = 0x87654321; 15926 uint64_t xp_data[] = {0, 0, 0, 0, 0}; 15927 uint64_t* xp = AlignUp(xp_data + 1, kXRegSizeInBytes * 2) - 1; 15928 xp[1] = 0x123456789abcdef0; // xp[1] is 128-bit-aligned. 15929 xp[2] = 0x0fedcba987654321; 15930 15931 SETUP(); 15932 START(); 15933 15934 __ Mov(x10, reinterpret_cast<uintptr_t>(&b[1])); 15935 Label try_b; 15936 __ Bind(&try_b); 15937 __ Ldxrb(w0, MemOperand(x10)); 15938 __ Add(w0, w0, 1); 15939 __ Stxrb(w5, w0, MemOperand(x10)); 15940 __ Cbnz(w5, &try_b); 15941 15942 __ Mov(x10, reinterpret_cast<uintptr_t>(&h[1])); 15943 Label try_h; 15944 __ Bind(&try_h); 15945 __ Ldxrh(w0, MemOperand(x10)); 15946 __ Add(w0, w0, 1); 15947 __ Stxrh(w5, w0, MemOperand(x10)); 15948 __ Cbnz(w5, &try_h); 15949 15950 __ Mov(x10, reinterpret_cast<uintptr_t>(&w[1])); 15951 Label try_w; 15952 __ Bind(&try_w); 15953 __ Ldxr(w0, MemOperand(x10)); 15954 __ Add(w0, w0, 1); 15955 __ Stxr(w5, w0, MemOperand(x10)); 15956 __ Cbnz(w5, &try_w); 15957 15958 __ Mov(x10, reinterpret_cast<uintptr_t>(&x[1])); 15959 Label try_x; 15960 __ Bind(&try_x); 15961 __ Ldxr(x0, MemOperand(x10)); 15962 __ Add(x0, x0, 1); 15963 __ Stxr(w5, x0, MemOperand(x10)); 15964 __ Cbnz(w5, &try_x); 15965 15966 __ Mov(x10, reinterpret_cast<uintptr_t>(&wp[1])); 15967 Label try_wp; 15968 __ Bind(&try_wp); 15969 __ Ldxp(w0, w1, MemOperand(x10)); 15970 __ Add(w0, w0, 1); 15971 __ Add(w1, w1, 1); 15972 __ Stxp(w5, w0, w1, MemOperand(x10)); 15973 __ Cbnz(w5, &try_wp); 15974 15975 __ Mov(x10, reinterpret_cast<uintptr_t>(&xp[1])); 15976 Label try_xp; 15977 __ Bind(&try_xp); 15978 __ Ldxp(x0, x1, MemOperand(x10)); 15979 __ Add(x0, x0, 1); 15980 __ Add(x1, x1, 1); 15981 __ Stxp(w5, x0, x1, MemOperand(x10)); 15982 __ Cbnz(w5, &try_xp); 15983 15984 END(); 15985 RUN(); 15986 15987 ASSERT_EQUAL_32(0x13, b[1]); 15988 ASSERT_EQUAL_32(0x1235, h[1]); 15989 ASSERT_EQUAL_32(0x12345679, w[1]); 15990 ASSERT_EQUAL_64(0x123456789abcdef1, x[1]); 15991 ASSERT_EQUAL_32(0x12345679, wp[1]); 15992 ASSERT_EQUAL_32(0x87654322, wp[2]); 15993 ASSERT_EQUAL_64(0x123456789abcdef1, xp[1]); 15994 ASSERT_EQUAL_64(0x0fedcba987654322, xp[2]); 15995 15996 // Check for over-write. 15997 ASSERT_EQUAL_32(0, b[0]); 15998 ASSERT_EQUAL_32(0, b[2]); 15999 ASSERT_EQUAL_32(0, h[0]); 16000 ASSERT_EQUAL_32(0, h[2]); 16001 ASSERT_EQUAL_32(0, w[0]); 16002 ASSERT_EQUAL_32(0, w[2]); 16003 ASSERT_EQUAL_64(0, x[0]); 16004 ASSERT_EQUAL_64(0, x[2]); 16005 ASSERT_EQUAL_32(0, wp[0]); 16006 ASSERT_EQUAL_32(0, wp[3]); 16007 ASSERT_EQUAL_64(0, xp[0]); 16008 ASSERT_EQUAL_64(0, xp[3]); 16009 16010 TEARDOWN(); 16011 } 16012 16013 16014 TEST(ldaxr_stlxr) { 16015 // The middle value is read, modified, and written. The padding exists only to 16016 // check for over-write. 16017 uint8_t b[] = {0, 0x12, 0}; 16018 uint16_t h[] = {0, 0x1234, 0}; 16019 uint32_t w[] = {0, 0x12345678, 0}; 16020 uint64_t x[] = {0, 0x123456789abcdef0, 0}; 16021 16022 // As above, but get suitably-aligned values for ldxp and stxp. 16023 uint32_t wp_data[] = {0, 0, 0, 0, 0}; 16024 uint32_t* wp = AlignUp(wp_data + 1, kWRegSizeInBytes * 2) - 1; 16025 wp[1] = 0x12345678; // wp[1] is 64-bit-aligned. 16026 wp[2] = 0x87654321; 16027 uint64_t xp_data[] = {0, 0, 0, 0, 0}; 16028 uint64_t* xp = AlignUp(xp_data + 1, kXRegSizeInBytes * 2) - 1; 16029 xp[1] = 0x123456789abcdef0; // xp[1] is 128-bit-aligned. 16030 xp[2] = 0x0fedcba987654321; 16031 16032 SETUP(); 16033 START(); 16034 16035 __ Mov(x10, reinterpret_cast<uintptr_t>(&b[1])); 16036 Label try_b; 16037 __ Bind(&try_b); 16038 __ Ldaxrb(w0, MemOperand(x10)); 16039 __ Add(w0, w0, 1); 16040 __ Stlxrb(w5, w0, MemOperand(x10)); 16041 __ Cbnz(w5, &try_b); 16042 16043 __ Mov(x10, reinterpret_cast<uintptr_t>(&h[1])); 16044 Label try_h; 16045 __ Bind(&try_h); 16046 __ Ldaxrh(w0, MemOperand(x10)); 16047 __ Add(w0, w0, 1); 16048 __ Stlxrh(w5, w0, MemOperand(x10)); 16049 __ Cbnz(w5, &try_h); 16050 16051 __ Mov(x10, reinterpret_cast<uintptr_t>(&w[1])); 16052 Label try_w; 16053 __ Bind(&try_w); 16054 __ Ldaxr(w0, MemOperand(x10)); 16055 __ Add(w0, w0, 1); 16056 __ Stlxr(w5, w0, MemOperand(x10)); 16057 __ Cbnz(w5, &try_w); 16058 16059 __ Mov(x10, reinterpret_cast<uintptr_t>(&x[1])); 16060 Label try_x; 16061 __ Bind(&try_x); 16062 __ Ldaxr(x0, MemOperand(x10)); 16063 __ Add(x0, x0, 1); 16064 __ Stlxr(w5, x0, MemOperand(x10)); 16065 __ Cbnz(w5, &try_x); 16066 16067 __ Mov(x10, reinterpret_cast<uintptr_t>(&wp[1])); 16068 Label try_wp; 16069 __ Bind(&try_wp); 16070 __ Ldaxp(w0, w1, MemOperand(x10)); 16071 __ Add(w0, w0, 1); 16072 __ Add(w1, w1, 1); 16073 __ Stlxp(w5, w0, w1, MemOperand(x10)); 16074 __ Cbnz(w5, &try_wp); 16075 16076 __ Mov(x10, reinterpret_cast<uintptr_t>(&xp[1])); 16077 Label try_xp; 16078 __ Bind(&try_xp); 16079 __ Ldaxp(x0, x1, MemOperand(x10)); 16080 __ Add(x0, x0, 1); 16081 __ Add(x1, x1, 1); 16082 __ Stlxp(w5, x0, x1, MemOperand(x10)); 16083 __ Cbnz(w5, &try_xp); 16084 16085 END(); 16086 RUN(); 16087 16088 ASSERT_EQUAL_32(0x13, b[1]); 16089 ASSERT_EQUAL_32(0x1235, h[1]); 16090 ASSERT_EQUAL_32(0x12345679, w[1]); 16091 ASSERT_EQUAL_64(0x123456789abcdef1, x[1]); 16092 ASSERT_EQUAL_32(0x12345679, wp[1]); 16093 ASSERT_EQUAL_32(0x87654322, wp[2]); 16094 ASSERT_EQUAL_64(0x123456789abcdef1, xp[1]); 16095 ASSERT_EQUAL_64(0x0fedcba987654322, xp[2]); 16096 16097 // Check for over-write. 16098 ASSERT_EQUAL_32(0, b[0]); 16099 ASSERT_EQUAL_32(0, b[2]); 16100 ASSERT_EQUAL_32(0, h[0]); 16101 ASSERT_EQUAL_32(0, h[2]); 16102 ASSERT_EQUAL_32(0, w[0]); 16103 ASSERT_EQUAL_32(0, w[2]); 16104 ASSERT_EQUAL_64(0, x[0]); 16105 ASSERT_EQUAL_64(0, x[2]); 16106 ASSERT_EQUAL_32(0, wp[0]); 16107 ASSERT_EQUAL_32(0, wp[3]); 16108 ASSERT_EQUAL_64(0, xp[0]); 16109 ASSERT_EQUAL_64(0, xp[3]); 16110 16111 TEARDOWN(); 16112 } 16113 16114 16115 TEST(clrex) { 16116 // This data should never be written. 16117 uint64_t data[] = {0, 0, 0}; 16118 uint64_t* data_aligned = AlignUp(data, kXRegSizeInBytes * 2); 16119 16120 SETUP(); 16121 START(); 16122 16123 __ Mov(x10, reinterpret_cast<uintptr_t>(data_aligned)); 16124 __ Mov(w6, 0); 16125 16126 __ Ldxrb(w0, MemOperand(x10)); 16127 __ Clrex(); 16128 __ Add(w0, w0, 1); 16129 __ Stxrb(w5, w0, MemOperand(x10)); 16130 __ Add(w6, w6, w5); 16131 16132 __ Ldxrh(w0, MemOperand(x10)); 16133 __ Clrex(); 16134 __ Add(w0, w0, 1); 16135 __ Stxrh(w5, w0, MemOperand(x10)); 16136 __ Add(w6, w6, w5); 16137 16138 __ Ldxr(w0, MemOperand(x10)); 16139 __ Clrex(); 16140 __ Add(w0, w0, 1); 16141 __ Stxr(w5, w0, MemOperand(x10)); 16142 __ Add(w6, w6, w5); 16143 16144 __ Ldxr(x0, MemOperand(x10)); 16145 __ Clrex(); 16146 __ Add(x0, x0, 1); 16147 __ Stxr(w5, x0, MemOperand(x10)); 16148 __ Add(w6, w6, w5); 16149 16150 __ Ldxp(w0, w1, MemOperand(x10)); 16151 __ Clrex(); 16152 __ Add(w0, w0, 1); 16153 __ Add(w1, w1, 1); 16154 __ Stxp(w5, w0, w1, MemOperand(x10)); 16155 __ Add(w6, w6, w5); 16156 16157 __ Ldxp(x0, x1, MemOperand(x10)); 16158 __ Clrex(); 16159 __ Add(x0, x0, 1); 16160 __ Add(x1, x1, 1); 16161 __ Stxp(w5, x0, x1, MemOperand(x10)); 16162 __ Add(w6, w6, w5); 16163 16164 // Acquire-release variants. 16165 16166 __ Ldaxrb(w0, MemOperand(x10)); 16167 __ Clrex(); 16168 __ Add(w0, w0, 1); 16169 __ Stlxrb(w5, w0, MemOperand(x10)); 16170 __ Add(w6, w6, w5); 16171 16172 __ Ldaxrh(w0, MemOperand(x10)); 16173 __ Clrex(); 16174 __ Add(w0, w0, 1); 16175 __ Stlxrh(w5, w0, MemOperand(x10)); 16176 __ Add(w6, w6, w5); 16177 16178 __ Ldaxr(w0, MemOperand(x10)); 16179 __ Clrex(); 16180 __ Add(w0, w0, 1); 16181 __ Stlxr(w5, w0, MemOperand(x10)); 16182 __ Add(w6, w6, w5); 16183 16184 __ Ldaxr(x0, MemOperand(x10)); 16185 __ Clrex(); 16186 __ Add(x0, x0, 1); 16187 __ Stlxr(w5, x0, MemOperand(x10)); 16188 __ Add(w6, w6, w5); 16189 16190 __ Ldaxp(w0, w1, MemOperand(x10)); 16191 __ Clrex(); 16192 __ Add(w0, w0, 1); 16193 __ Add(w1, w1, 1); 16194 __ Stlxp(w5, w0, w1, MemOperand(x10)); 16195 __ Add(w6, w6, w5); 16196 16197 __ Ldaxp(x0, x1, MemOperand(x10)); 16198 __ Clrex(); 16199 __ Add(x0, x0, 1); 16200 __ Add(x1, x1, 1); 16201 __ Stlxp(w5, x0, x1, MemOperand(x10)); 16202 __ Add(w6, w6, w5); 16203 16204 END(); 16205 RUN(); 16206 16207 // None of the 12 store-exclusives should have succeeded. 16208 ASSERT_EQUAL_32(12, w6); 16209 16210 ASSERT_EQUAL_64(0, data[0]); 16211 ASSERT_EQUAL_64(0, data[1]); 16212 ASSERT_EQUAL_64(0, data[2]); 16213 16214 TEARDOWN(); 16215 } 16216 16217 16218 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 16219 // Check that the simulator occasionally makes store-exclusive fail. 16220 TEST(ldxr_stxr_fail) { 16221 uint64_t data[] = {0, 0, 0}; 16222 uint64_t* data_aligned = AlignUp(data, kXRegSizeInBytes * 2); 16223 16224 // Impose a hard limit on the number of attempts, so the test cannot hang. 16225 static const uint64_t kWatchdog = 10000; 16226 Label done; 16227 16228 SETUP(); 16229 START(); 16230 16231 __ Mov(x10, reinterpret_cast<uintptr_t>(data_aligned)); 16232 __ Mov(x11, kWatchdog); 16233 16234 // This loop is the opposite of what we normally do with ldxr and stxr; we 16235 // keep trying until we fail (or the watchdog counter runs out). 16236 Label try_b; 16237 __ Bind(&try_b); 16238 __ Ldxrb(w0, MemOperand(x10)); 16239 __ Stxrb(w5, w0, MemOperand(x10)); 16240 // Check the watchdog counter. 16241 __ Sub(x11, x11, 1); 16242 __ Cbz(x11, &done); 16243 // Check the exclusive-store result. 16244 __ Cbz(w5, &try_b); 16245 16246 Label try_h; 16247 __ Bind(&try_h); 16248 __ Ldxrh(w0, MemOperand(x10)); 16249 __ Stxrh(w5, w0, MemOperand(x10)); 16250 __ Sub(x11, x11, 1); 16251 __ Cbz(x11, &done); 16252 __ Cbz(w5, &try_h); 16253 16254 Label try_w; 16255 __ Bind(&try_w); 16256 __ Ldxr(w0, MemOperand(x10)); 16257 __ Stxr(w5, w0, MemOperand(x10)); 16258 __ Sub(x11, x11, 1); 16259 __ Cbz(x11, &done); 16260 __ Cbz(w5, &try_w); 16261 16262 Label try_x; 16263 __ Bind(&try_x); 16264 __ Ldxr(x0, MemOperand(x10)); 16265 __ Stxr(w5, x0, MemOperand(x10)); 16266 __ Sub(x11, x11, 1); 16267 __ Cbz(x11, &done); 16268 __ Cbz(w5, &try_x); 16269 16270 Label try_wp; 16271 __ Bind(&try_wp); 16272 __ Ldxp(w0, w1, MemOperand(x10)); 16273 __ Stxp(w5, w0, w1, MemOperand(x10)); 16274 __ Sub(x11, x11, 1); 16275 __ Cbz(x11, &done); 16276 __ Cbz(w5, &try_wp); 16277 16278 Label try_xp; 16279 __ Bind(&try_xp); 16280 __ Ldxp(x0, x1, MemOperand(x10)); 16281 __ Stxp(w5, x0, x1, MemOperand(x10)); 16282 __ Sub(x11, x11, 1); 16283 __ Cbz(x11, &done); 16284 __ Cbz(w5, &try_xp); 16285 16286 __ Bind(&done); 16287 // Trigger an error if x11 (watchdog) is zero. 16288 __ Cmp(x11, 0); 16289 __ Cset(x12, eq); 16290 16291 END(); 16292 RUN(); 16293 16294 // Check that the watchdog counter didn't run out. 16295 ASSERT_EQUAL_64(0, x12); 16296 16297 TEARDOWN(); 16298 } 16299 #endif 16300 16301 16302 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 16303 // Check that the simulator occasionally makes store-exclusive fail. 16304 TEST(ldaxr_stlxr_fail) { 16305 uint64_t data[] = {0, 0, 0}; 16306 uint64_t* data_aligned = AlignUp(data, kXRegSizeInBytes * 2); 16307 16308 // Impose a hard limit on the number of attempts, so the test cannot hang. 16309 static const uint64_t kWatchdog = 10000; 16310 Label done; 16311 16312 SETUP(); 16313 START(); 16314 16315 __ Mov(x10, reinterpret_cast<uintptr_t>(data_aligned)); 16316 __ Mov(x11, kWatchdog); 16317 16318 // This loop is the opposite of what we normally do with ldxr and stxr; we 16319 // keep trying until we fail (or the watchdog counter runs out). 16320 Label try_b; 16321 __ Bind(&try_b); 16322 __ Ldxrb(w0, MemOperand(x10)); 16323 __ Stxrb(w5, w0, MemOperand(x10)); 16324 // Check the watchdog counter. 16325 __ Sub(x11, x11, 1); 16326 __ Cbz(x11, &done); 16327 // Check the exclusive-store result. 16328 __ Cbz(w5, &try_b); 16329 16330 Label try_h; 16331 __ Bind(&try_h); 16332 __ Ldaxrh(w0, MemOperand(x10)); 16333 __ Stlxrh(w5, w0, MemOperand(x10)); 16334 __ Sub(x11, x11, 1); 16335 __ Cbz(x11, &done); 16336 __ Cbz(w5, &try_h); 16337 16338 Label try_w; 16339 __ Bind(&try_w); 16340 __ Ldaxr(w0, MemOperand(x10)); 16341 __ Stlxr(w5, w0, MemOperand(x10)); 16342 __ Sub(x11, x11, 1); 16343 __ Cbz(x11, &done); 16344 __ Cbz(w5, &try_w); 16345 16346 Label try_x; 16347 __ Bind(&try_x); 16348 __ Ldaxr(x0, MemOperand(x10)); 16349 __ Stlxr(w5, x0, MemOperand(x10)); 16350 __ Sub(x11, x11, 1); 16351 __ Cbz(x11, &done); 16352 __ Cbz(w5, &try_x); 16353 16354 Label try_wp; 16355 __ Bind(&try_wp); 16356 __ Ldaxp(w0, w1, MemOperand(x10)); 16357 __ Stlxp(w5, w0, w1, MemOperand(x10)); 16358 __ Sub(x11, x11, 1); 16359 __ Cbz(x11, &done); 16360 __ Cbz(w5, &try_wp); 16361 16362 Label try_xp; 16363 __ Bind(&try_xp); 16364 __ Ldaxp(x0, x1, MemOperand(x10)); 16365 __ Stlxp(w5, x0, x1, MemOperand(x10)); 16366 __ Sub(x11, x11, 1); 16367 __ Cbz(x11, &done); 16368 __ Cbz(w5, &try_xp); 16369 16370 __ Bind(&done); 16371 // Trigger an error if x11 (watchdog) is zero. 16372 __ Cmp(x11, 0); 16373 __ Cset(x12, eq); 16374 16375 END(); 16376 RUN(); 16377 16378 // Check that the watchdog counter didn't run out. 16379 ASSERT_EQUAL_64(0, x12); 16380 16381 TEARDOWN(); 16382 } 16383 #endif 16384 16385 16386 TEST(load_store_tagged_immediate_offset) { 16387 uint64_t tags[] = {0x00, 0x1, 0x55, 0xff}; 16388 int tag_count = sizeof(tags) / sizeof(tags[0]); 16389 16390 const int kMaxDataLength = 160; 16391 16392 for (int i = 0; i < tag_count; i++) { 16393 unsigned char src[kMaxDataLength]; 16394 uint64_t src_raw = reinterpret_cast<uint64_t>(src); 16395 uint64_t src_tag = tags[i]; 16396 uint64_t src_tagged = CPU::SetPointerTag(src_raw, src_tag); 16397 16398 for (int k = 0; k < kMaxDataLength; k++) { 16399 src[k] = k + 1; 16400 } 16401 16402 for (int j = 0; j < tag_count; j++) { 16403 unsigned char dst[kMaxDataLength]; 16404 uint64_t dst_raw = reinterpret_cast<uint64_t>(dst); 16405 uint64_t dst_tag = tags[j]; 16406 uint64_t dst_tagged = CPU::SetPointerTag(dst_raw, dst_tag); 16407 16408 memset(dst, 0, kMaxDataLength); 16409 16410 SETUP(); 16411 START(); 16412 16413 __ Mov(x0, src_tagged); 16414 __ Mov(x1, dst_tagged); 16415 16416 int offset = 0; 16417 16418 // Scaled-immediate offsets. 16419 { 16420 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16421 __ ldp(q0, q1, MemOperand(x0, offset)); 16422 __ stp(q0, q1, MemOperand(x1, offset)); 16423 } 16424 offset += 2 * kQRegSizeInBytes; 16425 16426 { 16427 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16428 __ ldp(x2, x3, MemOperand(x0, offset)); 16429 __ stp(x2, x3, MemOperand(x1, offset)); 16430 } 16431 offset += 2 * kXRegSizeInBytes; 16432 16433 { 16434 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16435 __ ldpsw(x2, x3, MemOperand(x0, offset)); 16436 __ stp(w2, w3, MemOperand(x1, offset)); 16437 } 16438 offset += 2 * kWRegSizeInBytes; 16439 16440 { 16441 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16442 __ ldp(d0, d1, MemOperand(x0, offset)); 16443 __ stp(d0, d1, MemOperand(x1, offset)); 16444 } 16445 offset += 2 * kDRegSizeInBytes; 16446 16447 { 16448 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16449 __ ldp(w2, w3, MemOperand(x0, offset)); 16450 __ stp(w2, w3, MemOperand(x1, offset)); 16451 } 16452 offset += 2 * kWRegSizeInBytes; 16453 16454 { 16455 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16456 __ ldp(s0, s1, MemOperand(x0, offset)); 16457 __ stp(s0, s1, MemOperand(x1, offset)); 16458 } 16459 offset += 2 * kSRegSizeInBytes; 16460 16461 { 16462 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16463 __ ldr(x2, MemOperand(x0, offset), RequireScaledOffset); 16464 __ str(x2, MemOperand(x1, offset), RequireScaledOffset); 16465 } 16466 offset += kXRegSizeInBytes; 16467 16468 { 16469 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16470 __ ldr(d0, MemOperand(x0, offset), RequireScaledOffset); 16471 __ str(d0, MemOperand(x1, offset), RequireScaledOffset); 16472 } 16473 offset += kDRegSizeInBytes; 16474 16475 { 16476 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16477 __ ldr(w2, MemOperand(x0, offset), RequireScaledOffset); 16478 __ str(w2, MemOperand(x1, offset), RequireScaledOffset); 16479 } 16480 offset += kWRegSizeInBytes; 16481 16482 { 16483 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16484 __ ldr(s0, MemOperand(x0, offset), RequireScaledOffset); 16485 __ str(s0, MemOperand(x1, offset), RequireScaledOffset); 16486 } 16487 offset += kSRegSizeInBytes; 16488 16489 { 16490 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16491 __ ldrh(w2, MemOperand(x0, offset), RequireScaledOffset); 16492 __ strh(w2, MemOperand(x1, offset), RequireScaledOffset); 16493 } 16494 offset += 2; 16495 16496 { 16497 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16498 __ ldrsh(w2, MemOperand(x0, offset), RequireScaledOffset); 16499 __ strh(w2, MemOperand(x1, offset), RequireScaledOffset); 16500 } 16501 offset += 2; 16502 16503 { 16504 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16505 __ ldrb(w2, MemOperand(x0, offset), RequireScaledOffset); 16506 __ strb(w2, MemOperand(x1, offset), RequireScaledOffset); 16507 } 16508 offset += 1; 16509 16510 { 16511 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16512 __ ldrsb(w2, MemOperand(x0, offset), RequireScaledOffset); 16513 __ strb(w2, MemOperand(x1, offset), RequireScaledOffset); 16514 } 16515 offset += 1; 16516 16517 // Unscaled-immediate offsets. 16518 16519 { 16520 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16521 __ ldur(x2, MemOperand(x0, offset), RequireUnscaledOffset); 16522 __ stur(x2, MemOperand(x1, offset), RequireUnscaledOffset); 16523 } 16524 offset += kXRegSizeInBytes; 16525 16526 { 16527 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16528 __ ldur(d0, MemOperand(x0, offset), RequireUnscaledOffset); 16529 __ stur(d0, MemOperand(x1, offset), RequireUnscaledOffset); 16530 } 16531 offset += kDRegSizeInBytes; 16532 16533 { 16534 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16535 __ ldur(w2, MemOperand(x0, offset), RequireUnscaledOffset); 16536 __ stur(w2, MemOperand(x1, offset), RequireUnscaledOffset); 16537 } 16538 offset += kWRegSizeInBytes; 16539 16540 { 16541 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16542 __ ldur(s0, MemOperand(x0, offset), RequireUnscaledOffset); 16543 __ stur(s0, MemOperand(x1, offset), RequireUnscaledOffset); 16544 } 16545 offset += kSRegSizeInBytes; 16546 16547 { 16548 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16549 __ ldurh(w2, MemOperand(x0, offset), RequireUnscaledOffset); 16550 __ sturh(w2, MemOperand(x1, offset), RequireUnscaledOffset); 16551 } 16552 offset += 2; 16553 16554 { 16555 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16556 __ ldursh(w2, MemOperand(x0, offset), RequireUnscaledOffset); 16557 __ sturh(w2, MemOperand(x1, offset), RequireUnscaledOffset); 16558 } 16559 offset += 2; 16560 16561 { 16562 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16563 __ ldurb(w2, MemOperand(x0, offset), RequireUnscaledOffset); 16564 __ sturb(w2, MemOperand(x1, offset), RequireUnscaledOffset); 16565 } 16566 offset += 1; 16567 16568 { 16569 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16570 __ ldursb(w2, MemOperand(x0, offset), RequireUnscaledOffset); 16571 __ sturb(w2, MemOperand(x1, offset), RequireUnscaledOffset); 16572 } 16573 offset += 1; 16574 16575 // Extract the tag (so we can test that it was preserved correctly). 16576 __ Ubfx(x0, x0, kAddressTagOffset, kAddressTagWidth); 16577 __ Ubfx(x1, x1, kAddressTagOffset, kAddressTagWidth); 16578 16579 VIXL_ASSERT(kMaxDataLength >= offset); 16580 16581 END(); 16582 RUN(); 16583 16584 ASSERT_EQUAL_64(src_tag, x0); 16585 ASSERT_EQUAL_64(dst_tag, x1); 16586 16587 for (int k = 0; k < offset; k++) { 16588 VIXL_CHECK(src[k] == dst[k]); 16589 } 16590 16591 TEARDOWN(); 16592 } 16593 } 16594 } 16595 16596 16597 TEST(load_store_tagged_immediate_preindex) { 16598 uint64_t tags[] = {0x00, 0x1, 0x55, 0xff}; 16599 int tag_count = sizeof(tags) / sizeof(tags[0]); 16600 16601 const int kMaxDataLength = 128; 16602 16603 for (int i = 0; i < tag_count; i++) { 16604 unsigned char src[kMaxDataLength]; 16605 uint64_t src_raw = reinterpret_cast<uint64_t>(src); 16606 uint64_t src_tag = tags[i]; 16607 uint64_t src_tagged = CPU::SetPointerTag(src_raw, src_tag); 16608 16609 for (int k = 0; k < kMaxDataLength; k++) { 16610 src[k] = k + 1; 16611 } 16612 16613 for (int j = 0; j < tag_count; j++) { 16614 unsigned char dst[kMaxDataLength]; 16615 uint64_t dst_raw = reinterpret_cast<uint64_t>(dst); 16616 uint64_t dst_tag = tags[j]; 16617 uint64_t dst_tagged = CPU::SetPointerTag(dst_raw, dst_tag); 16618 16619 for (int k = 0; k < kMaxDataLength; k++) { 16620 dst[k] = 0; 16621 } 16622 16623 SETUP(); 16624 START(); 16625 16626 // Each MemOperand must apply a pre-index equal to the size of the 16627 // previous access. 16628 16629 // Start with a non-zero preindex. 16630 int preindex = 62 * kXRegSizeInBytes; 16631 int data_length = 0; 16632 16633 __ Mov(x0, src_tagged - preindex); 16634 __ Mov(x1, dst_tagged - preindex); 16635 16636 { 16637 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16638 __ ldp(q0, q1, MemOperand(x0, preindex, PreIndex)); 16639 __ stp(q0, q1, MemOperand(x1, preindex, PreIndex)); 16640 } 16641 preindex = 2 * kQRegSizeInBytes; 16642 data_length = preindex; 16643 16644 { 16645 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16646 __ ldp(x2, x3, MemOperand(x0, preindex, PreIndex)); 16647 __ stp(x2, x3, MemOperand(x1, preindex, PreIndex)); 16648 } 16649 preindex = 2 * kXRegSizeInBytes; 16650 data_length += preindex; 16651 16652 { 16653 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16654 __ ldpsw(x2, x3, MemOperand(x0, preindex, PreIndex)); 16655 __ stp(w2, w3, MemOperand(x1, preindex, PreIndex)); 16656 } 16657 preindex = 2 * kWRegSizeInBytes; 16658 data_length += preindex; 16659 16660 { 16661 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16662 __ ldp(d0, d1, MemOperand(x0, preindex, PreIndex)); 16663 __ stp(d0, d1, MemOperand(x1, preindex, PreIndex)); 16664 } 16665 preindex = 2 * kDRegSizeInBytes; 16666 data_length += preindex; 16667 16668 { 16669 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16670 __ ldp(w2, w3, MemOperand(x0, preindex, PreIndex)); 16671 __ stp(w2, w3, MemOperand(x1, preindex, PreIndex)); 16672 } 16673 preindex = 2 * kWRegSizeInBytes; 16674 data_length += preindex; 16675 16676 { 16677 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16678 __ ldp(s0, s1, MemOperand(x0, preindex, PreIndex)); 16679 __ stp(s0, s1, MemOperand(x1, preindex, PreIndex)); 16680 } 16681 preindex = 2 * kSRegSizeInBytes; 16682 data_length += preindex; 16683 16684 { 16685 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16686 __ ldr(x2, MemOperand(x0, preindex, PreIndex)); 16687 __ str(x2, MemOperand(x1, preindex, PreIndex)); 16688 } 16689 preindex = kXRegSizeInBytes; 16690 data_length += preindex; 16691 16692 { 16693 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16694 __ ldr(d0, MemOperand(x0, preindex, PreIndex)); 16695 __ str(d0, MemOperand(x1, preindex, PreIndex)); 16696 } 16697 preindex = kDRegSizeInBytes; 16698 data_length += preindex; 16699 16700 { 16701 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16702 __ ldr(w2, MemOperand(x0, preindex, PreIndex)); 16703 __ str(w2, MemOperand(x1, preindex, PreIndex)); 16704 } 16705 preindex = kWRegSizeInBytes; 16706 data_length += preindex; 16707 16708 { 16709 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16710 __ ldr(s0, MemOperand(x0, preindex, PreIndex)); 16711 __ str(s0, MemOperand(x1, preindex, PreIndex)); 16712 } 16713 preindex = kSRegSizeInBytes; 16714 data_length += preindex; 16715 16716 { 16717 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16718 __ ldrh(w2, MemOperand(x0, preindex, PreIndex)); 16719 __ strh(w2, MemOperand(x1, preindex, PreIndex)); 16720 } 16721 preindex = 2; 16722 data_length += preindex; 16723 16724 { 16725 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16726 __ ldrsh(w2, MemOperand(x0, preindex, PreIndex)); 16727 __ strh(w2, MemOperand(x1, preindex, PreIndex)); 16728 } 16729 preindex = 2; 16730 data_length += preindex; 16731 16732 { 16733 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16734 __ ldrb(w2, MemOperand(x0, preindex, PreIndex)); 16735 __ strb(w2, MemOperand(x1, preindex, PreIndex)); 16736 } 16737 preindex = 1; 16738 data_length += preindex; 16739 16740 { 16741 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16742 __ ldrsb(w2, MemOperand(x0, preindex, PreIndex)); 16743 __ strb(w2, MemOperand(x1, preindex, PreIndex)); 16744 } 16745 preindex = 1; 16746 data_length += preindex; 16747 16748 VIXL_ASSERT(kMaxDataLength >= data_length); 16749 16750 END(); 16751 RUN(); 16752 16753 // Check that the preindex was correctly applied in each operation, and 16754 // that the tag was preserved. 16755 ASSERT_EQUAL_64(src_tagged + data_length - preindex, x0); 16756 ASSERT_EQUAL_64(dst_tagged + data_length - preindex, x1); 16757 16758 for (int k = 0; k < data_length; k++) { 16759 VIXL_CHECK(src[k] == dst[k]); 16760 } 16761 16762 TEARDOWN(); 16763 } 16764 } 16765 } 16766 16767 16768 TEST(load_store_tagged_immediate_postindex) { 16769 uint64_t tags[] = {0x00, 0x1, 0x55, 0xff}; 16770 int tag_count = sizeof(tags) / sizeof(tags[0]); 16771 16772 const int kMaxDataLength = 128; 16773 16774 for (int i = 0; i < tag_count; i++) { 16775 unsigned char src[kMaxDataLength]; 16776 uint64_t src_raw = reinterpret_cast<uint64_t>(src); 16777 uint64_t src_tag = tags[i]; 16778 uint64_t src_tagged = CPU::SetPointerTag(src_raw, src_tag); 16779 16780 for (int k = 0; k < kMaxDataLength; k++) { 16781 src[k] = k + 1; 16782 } 16783 16784 for (int j = 0; j < tag_count; j++) { 16785 unsigned char dst[kMaxDataLength]; 16786 uint64_t dst_raw = reinterpret_cast<uint64_t>(dst); 16787 uint64_t dst_tag = tags[j]; 16788 uint64_t dst_tagged = CPU::SetPointerTag(dst_raw, dst_tag); 16789 16790 for (int k = 0; k < kMaxDataLength; k++) { 16791 dst[k] = 0; 16792 } 16793 16794 SETUP(); 16795 START(); 16796 16797 int postindex = 2 * kXRegSizeInBytes; 16798 int data_length = 0; 16799 16800 __ Mov(x0, src_tagged); 16801 __ Mov(x1, dst_tagged); 16802 16803 { 16804 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16805 __ ldp(x2, x3, MemOperand(x0, postindex, PostIndex)); 16806 __ stp(x2, x3, MemOperand(x1, postindex, PostIndex)); 16807 } 16808 data_length = postindex; 16809 16810 postindex = 2 * kQRegSizeInBytes; 16811 { 16812 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16813 __ ldp(q0, q1, MemOperand(x0, postindex, PostIndex)); 16814 __ stp(q0, q1, MemOperand(x1, postindex, PostIndex)); 16815 } 16816 data_length += postindex; 16817 16818 postindex = 2 * kWRegSizeInBytes; 16819 { 16820 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16821 __ ldpsw(x2, x3, MemOperand(x0, postindex, PostIndex)); 16822 __ stp(w2, w3, MemOperand(x1, postindex, PostIndex)); 16823 } 16824 data_length += postindex; 16825 16826 postindex = 2 * kDRegSizeInBytes; 16827 { 16828 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16829 __ ldp(d0, d1, MemOperand(x0, postindex, PostIndex)); 16830 __ stp(d0, d1, MemOperand(x1, postindex, PostIndex)); 16831 } 16832 data_length += postindex; 16833 16834 postindex = 2 * kWRegSizeInBytes; 16835 { 16836 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16837 __ ldp(w2, w3, MemOperand(x0, postindex, PostIndex)); 16838 __ stp(w2, w3, MemOperand(x1, postindex, PostIndex)); 16839 } 16840 data_length += postindex; 16841 16842 postindex = 2 * kSRegSizeInBytes; 16843 { 16844 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16845 __ ldp(s0, s1, MemOperand(x0, postindex, PostIndex)); 16846 __ stp(s0, s1, MemOperand(x1, postindex, PostIndex)); 16847 } 16848 data_length += postindex; 16849 16850 postindex = kXRegSizeInBytes; 16851 { 16852 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16853 __ ldr(x2, MemOperand(x0, postindex, PostIndex)); 16854 __ str(x2, MemOperand(x1, postindex, PostIndex)); 16855 } 16856 data_length += postindex; 16857 16858 postindex = kDRegSizeInBytes; 16859 { 16860 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16861 __ ldr(d0, MemOperand(x0, postindex, PostIndex)); 16862 __ str(d0, MemOperand(x1, postindex, PostIndex)); 16863 } 16864 data_length += postindex; 16865 16866 postindex = kWRegSizeInBytes; 16867 { 16868 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16869 __ ldr(w2, MemOperand(x0, postindex, PostIndex)); 16870 __ str(w2, MemOperand(x1, postindex, PostIndex)); 16871 } 16872 data_length += postindex; 16873 16874 postindex = kSRegSizeInBytes; 16875 { 16876 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16877 __ ldr(s0, MemOperand(x0, postindex, PostIndex)); 16878 __ str(s0, MemOperand(x1, postindex, PostIndex)); 16879 } 16880 data_length += postindex; 16881 16882 postindex = 2; 16883 { 16884 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16885 __ ldrh(w2, MemOperand(x0, postindex, PostIndex)); 16886 __ strh(w2, MemOperand(x1, postindex, PostIndex)); 16887 } 16888 data_length += postindex; 16889 16890 postindex = 2; 16891 { 16892 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16893 __ ldrsh(w2, MemOperand(x0, postindex, PostIndex)); 16894 __ strh(w2, MemOperand(x1, postindex, PostIndex)); 16895 } 16896 data_length += postindex; 16897 16898 postindex = 1; 16899 { 16900 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16901 __ ldrb(w2, MemOperand(x0, postindex, PostIndex)); 16902 __ strb(w2, MemOperand(x1, postindex, PostIndex)); 16903 } 16904 data_length += postindex; 16905 16906 postindex = 1; 16907 { 16908 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16909 __ ldrsb(w2, MemOperand(x0, postindex, PostIndex)); 16910 __ strb(w2, MemOperand(x1, postindex, PostIndex)); 16911 } 16912 data_length += postindex; 16913 16914 VIXL_ASSERT(kMaxDataLength >= data_length); 16915 16916 END(); 16917 RUN(); 16918 16919 // Check that the postindex was correctly applied in each operation, and 16920 // that the tag was preserved. 16921 ASSERT_EQUAL_64(src_tagged + data_length, x0); 16922 ASSERT_EQUAL_64(dst_tagged + data_length, x1); 16923 16924 for (int k = 0; k < data_length; k++) { 16925 VIXL_CHECK(src[k] == dst[k]); 16926 } 16927 16928 TEARDOWN(); 16929 } 16930 } 16931 } 16932 16933 16934 TEST(load_store_tagged_register_offset) { 16935 uint64_t tags[] = {0x00, 0x1, 0x55, 0xff}; 16936 int tag_count = sizeof(tags) / sizeof(tags[0]); 16937 16938 const int kMaxDataLength = 128; 16939 16940 for (int i = 0; i < tag_count; i++) { 16941 unsigned char src[kMaxDataLength]; 16942 uint64_t src_raw = reinterpret_cast<uint64_t>(src); 16943 uint64_t src_tag = tags[i]; 16944 uint64_t src_tagged = CPU::SetPointerTag(src_raw, src_tag); 16945 16946 for (int k = 0; k < kMaxDataLength; k++) { 16947 src[k] = k + 1; 16948 } 16949 16950 for (int j = 0; j < tag_count; j++) { 16951 unsigned char dst[kMaxDataLength]; 16952 uint64_t dst_raw = reinterpret_cast<uint64_t>(dst); 16953 uint64_t dst_tag = tags[j]; 16954 uint64_t dst_tagged = CPU::SetPointerTag(dst_raw, dst_tag); 16955 16956 // Also tag the offset register; the operation should still succeed. 16957 for (int o = 0; o < tag_count; o++) { 16958 uint64_t offset_base = CPU::SetPointerTag(UINT64_C(0), tags[o]); 16959 int data_length = 0; 16960 16961 for (int k = 0; k < kMaxDataLength; k++) { 16962 dst[k] = 0; 16963 } 16964 16965 SETUP(); 16966 START(); 16967 16968 __ Mov(x0, src_tagged); 16969 __ Mov(x1, dst_tagged); 16970 16971 __ Mov(x10, offset_base + data_length); 16972 { 16973 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16974 __ ldr(x2, MemOperand(x0, x10)); 16975 __ str(x2, MemOperand(x1, x10)); 16976 } 16977 data_length += kXRegSizeInBytes; 16978 16979 __ Mov(x10, offset_base + data_length); 16980 { 16981 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16982 __ ldr(d0, MemOperand(x0, x10)); 16983 __ str(d0, MemOperand(x1, x10)); 16984 } 16985 data_length += kDRegSizeInBytes; 16986 16987 __ Mov(x10, offset_base + data_length); 16988 { 16989 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16990 __ ldr(w2, MemOperand(x0, x10)); 16991 __ str(w2, MemOperand(x1, x10)); 16992 } 16993 data_length += kWRegSizeInBytes; 16994 16995 __ Mov(x10, offset_base + data_length); 16996 { 16997 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 16998 __ ldr(s0, MemOperand(x0, x10)); 16999 __ str(s0, MemOperand(x1, x10)); 17000 } 17001 data_length += kSRegSizeInBytes; 17002 17003 __ Mov(x10, offset_base + data_length); 17004 { 17005 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 17006 __ ldrh(w2, MemOperand(x0, x10)); 17007 __ strh(w2, MemOperand(x1, x10)); 17008 } 17009 data_length += 2; 17010 17011 __ Mov(x10, offset_base + data_length); 17012 { 17013 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 17014 __ ldrsh(w2, MemOperand(x0, x10)); 17015 __ strh(w2, MemOperand(x1, x10)); 17016 } 17017 data_length += 2; 17018 17019 __ Mov(x10, offset_base + data_length); 17020 { 17021 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 17022 __ ldrb(w2, MemOperand(x0, x10)); 17023 __ strb(w2, MemOperand(x1, x10)); 17024 } 17025 data_length += 1; 17026 17027 __ Mov(x10, offset_base + data_length); 17028 { 17029 ExactAssemblyScope scope(&masm, 2 * kInstructionSize); 17030 __ ldrsb(w2, MemOperand(x0, x10)); 17031 __ strb(w2, MemOperand(x1, x10)); 17032 } 17033 data_length += 1; 17034 17035 VIXL_ASSERT(kMaxDataLength >= data_length); 17036 17037 END(); 17038 RUN(); 17039 17040 // Check that the postindex was correctly applied in each operation, and 17041 // that the tag was preserved. 17042 ASSERT_EQUAL_64(src_tagged, x0); 17043 ASSERT_EQUAL_64(dst_tagged, x1); 17044 ASSERT_EQUAL_64(offset_base + data_length - 1, x10); 17045 17046 for (int k = 0; k < data_length; k++) { 17047 VIXL_CHECK(src[k] == dst[k]); 17048 } 17049 17050 TEARDOWN(); 17051 } 17052 } 17053 } 17054 } 17055 17056 17057 TEST(load_store_tagged_register_postindex) { 17058 uint64_t src[] = {0x0706050403020100, 0x0f0e0d0c0b0a0908}; 17059 uint64_t tags[] = {0x00, 0x1, 0x55, 0xff}; 17060 int tag_count = sizeof(tags) / sizeof(tags[0]); 17061 17062 for (int j = 0; j < tag_count; j++) { 17063 for (int i = 0; i < tag_count; i++) { 17064 SETUP(); 17065 uint64_t src_base = reinterpret_cast<uint64_t>(src); 17066 uint64_t src_tagged = CPU::SetPointerTag(src_base, tags[i]); 17067 uint64_t offset_tagged = CPU::SetPointerTag(UINT64_C(0), tags[j]); 17068 17069 START(); 17070 __ Mov(x10, src_tagged); 17071 __ Mov(x11, offset_tagged); 17072 __ Ld1(v0.V16B(), MemOperand(x10, x11, PostIndex)); 17073 // TODO: add other instructions (ld2-4, st1-4) as they become available. 17074 END(); 17075 17076 RUN(); 17077 17078 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q0); 17079 ASSERT_EQUAL_64(src_tagged + offset_tagged, x10); 17080 17081 TEARDOWN(); 17082 } 17083 } 17084 } 17085 17086 17087 TEST(branch_tagged) { 17088 SETUP(); 17089 START(); 17090 17091 Label loop, loop_entry, done; 17092 __ Adr(x0, &loop); 17093 __ Mov(x1, 0); 17094 __ B(&loop_entry); 17095 17096 __ Bind(&loop); 17097 __ Add(x1, x1, 1); // Count successful jumps. 17098 17099 // Advance to the next tag, then bail out if we've come back around to tag 0. 17100 __ Add(x0, x0, UINT64_C(1) << kAddressTagOffset); 17101 __ Tst(x0, kAddressTagMask); 17102 __ B(eq, &done); 17103 17104 __ Bind(&loop_entry); 17105 __ Br(x0); 17106 17107 __ Bind(&done); 17108 17109 END(); 17110 RUN(); 17111 17112 ASSERT_EQUAL_64(1 << kAddressTagWidth, x1); 17113 17114 TEARDOWN(); 17115 } 17116 17117 17118 TEST(branch_and_link_tagged) { 17119 SETUP(); 17120 START(); 17121 17122 Label loop, loop_entry, done; 17123 __ Adr(x0, &loop); 17124 __ Mov(x1, 0); 17125 __ B(&loop_entry); 17126 17127 __ Bind(&loop); 17128 17129 // Bail out (before counting a successful jump) if lr appears to be tagged. 17130 __ Tst(lr, kAddressTagMask); 17131 __ B(ne, &done); 17132 17133 __ Add(x1, x1, 1); // Count successful jumps. 17134 17135 // Advance to the next tag, then bail out if we've come back around to tag 0. 17136 __ Add(x0, x0, UINT64_C(1) << kAddressTagOffset); 17137 __ Tst(x0, kAddressTagMask); 17138 __ B(eq, &done); 17139 17140 __ Bind(&loop_entry); 17141 __ Blr(x0); 17142 17143 __ Bind(&done); 17144 17145 END(); 17146 RUN(); 17147 17148 ASSERT_EQUAL_64(1 << kAddressTagWidth, x1); 17149 17150 TEARDOWN(); 17151 } 17152 17153 17154 TEST(branch_tagged_and_adr_adrp) { 17155 SETUP_CUSTOM(kPageSize, PageOffsetDependentCode); 17156 START(); 17157 17158 Label loop, loop_entry, done; 17159 __ Adr(x0, &loop); 17160 __ Mov(x1, 0); 17161 __ B(&loop_entry); 17162 17163 __ Bind(&loop); 17164 17165 // Bail out (before counting a successful jump) if `adr x10, ...` is tagged. 17166 __ Adr(x10, &done); 17167 __ Tst(x10, kAddressTagMask); 17168 __ B(ne, &done); 17169 17170 // Bail out (before counting a successful jump) if `adrp x11, ...` is tagged. 17171 __ Adrp(x11, &done); 17172 __ Tst(x11, kAddressTagMask); 17173 __ B(ne, &done); 17174 17175 __ Add(x1, x1, 1); // Count successful iterations. 17176 17177 // Advance to the next tag, then bail out if we've come back around to tag 0. 17178 __ Add(x0, x0, UINT64_C(1) << kAddressTagOffset); 17179 __ Tst(x0, kAddressTagMask); 17180 __ B(eq, &done); 17181 17182 __ Bind(&loop_entry); 17183 __ Br(x0); 17184 17185 __ Bind(&done); 17186 17187 END(); 17188 RUN_CUSTOM(); 17189 17190 ASSERT_EQUAL_64(1 << kAddressTagWidth, x1); 17191 17192 TEARDOWN_CUSTOM(); 17193 } 17194 17195 TEST(neon_3same_addp) { 17196 SETUP(); 17197 17198 START(); 17199 17200 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 17201 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff); 17202 __ Addp(v16.V16B(), v0.V16B(), v1.V16B()); 17203 17204 END(); 17205 17206 RUN(); 17207 ASSERT_EQUAL_128(0x00ff54ffff54aaff, 0xffffffffffffffff, q16); 17208 TEARDOWN(); 17209 } 17210 17211 TEST(neon_3same_sqdmulh_sqrdmulh) { 17212 SETUP(); 17213 17214 START(); 17215 17216 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000); 17217 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000); 17218 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000); 17219 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000); 17220 17221 __ Sqdmulh(v16.V4H(), v0.V4H(), v1.V4H()); 17222 __ Sqdmulh(v17.V4S(), v2.V4S(), v3.V4S()); 17223 __ Sqdmulh(h18, h0, h1); 17224 __ Sqdmulh(s19, s2, s3); 17225 17226 __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.V4H()); 17227 __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.V4S()); 17228 __ Sqrdmulh(h22, h0, h1); 17229 __ Sqrdmulh(s23, s2, s3); 17230 17231 END(); 17232 17233 RUN(); 17234 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100007fff, q16); 17235 ASSERT_EQUAL_128(0x000000017fffffff, 0x000000007fffffff, q17); 17236 ASSERT_EQUAL_128(0, 0x7fff, q18); 17237 ASSERT_EQUAL_128(0, 0x7fffffff, q19); 17238 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100017fff, q20); 17239 ASSERT_EQUAL_128(0x000000017fffffff, 0x000000017fffffff, q21); 17240 ASSERT_EQUAL_128(0, 0x7fff, q22); 17241 ASSERT_EQUAL_128(0, 0x7fffffff, q23); 17242 TEARDOWN(); 17243 } 17244 17245 TEST(neon_byelement_sqdmulh_sqrdmulh) { 17246 SETUP(); 17247 17248 START(); 17249 17250 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000); 17251 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000); 17252 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000); 17253 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000); 17254 17255 __ Sqdmulh(v16.V4H(), v0.V4H(), v1.H(), 1); 17256 __ Sqdmulh(v17.V4S(), v2.V4S(), v3.S(), 1); 17257 __ Sqdmulh(h18, h0, v1.H(), 0); 17258 __ Sqdmulh(s19, s2, v3.S(), 0); 17259 17260 __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.H(), 1); 17261 __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.S(), 1); 17262 __ Sqrdmulh(h22, h0, v1.H(), 0); 17263 __ Sqrdmulh(s23, s2, v3.S(), 0); 17264 17265 END(); 17266 17267 RUN(); 17268 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000fff0, q16); 17269 ASSERT_EQUAL_128(0x00000000fffffff0, 0x00000000fffffff0, q17); 17270 ASSERT_EQUAL_128(0, 0x7fff, q18); 17271 ASSERT_EQUAL_128(0, 0x7fffffff, q19); 17272 ASSERT_EQUAL_128(0x0000000000000000, 0x000000010001fff0, q20); 17273 ASSERT_EQUAL_128(0x00000001fffffff0, 0x00000001fffffff0, q21); 17274 ASSERT_EQUAL_128(0, 0x7fff, q22); 17275 ASSERT_EQUAL_128(0, 0x7fffffff, q23); 17276 TEARDOWN(); 17277 } 17278 17279 17280 TEST(neon_2regmisc_saddlp) { 17281 SETUP(); 17282 17283 START(); 17284 17285 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 17286 17287 __ Saddlp(v16.V8H(), v0.V16B()); 17288 __ Saddlp(v17.V4H(), v0.V8B()); 17289 17290 __ Saddlp(v18.V4S(), v0.V8H()); 17291 __ Saddlp(v19.V2S(), v0.V4H()); 17292 17293 __ Saddlp(v20.V2D(), v0.V4S()); 17294 __ Saddlp(v21.V1D(), v0.V2S()); 17295 17296 END(); 17297 17298 RUN(); 17299 ASSERT_EQUAL_128(0x0080ffffff010080, 0xff01ffff0080ff01, q16); 17300 ASSERT_EQUAL_128(0x0000000000000000, 0xff01ffff0080ff01, q17); 17301 ASSERT_EQUAL_128(0x0000800000000081, 0xffff7f81ffff8200, q18); 17302 ASSERT_EQUAL_128(0x0000000000000000, 0xffff7f81ffff8200, q19); 17303 ASSERT_EQUAL_128(0x0000000000818000, 0xffffffff82017f81, q20); 17304 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff82017f81, q21); 17305 TEARDOWN(); 17306 } 17307 17308 TEST(neon_2regmisc_uaddlp) { 17309 SETUP(); 17310 17311 START(); 17312 17313 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 17314 17315 __ Uaddlp(v16.V8H(), v0.V16B()); 17316 __ Uaddlp(v17.V4H(), v0.V8B()); 17317 17318 __ Uaddlp(v18.V4S(), v0.V8H()); 17319 __ Uaddlp(v19.V2S(), v0.V4H()); 17320 17321 __ Uaddlp(v20.V2D(), v0.V4S()); 17322 __ Uaddlp(v21.V1D(), v0.V2S()); 17323 17324 END(); 17325 17326 RUN(); 17327 ASSERT_EQUAL_128(0x008000ff01010080, 0x010100ff00800101, q16); 17328 ASSERT_EQUAL_128(0x0000000000000000, 0x010100ff00800101, q17); 17329 ASSERT_EQUAL_128(0x0000800000010081, 0x00017f8100008200, q18); 17330 ASSERT_EQUAL_128(0x0000000000000000, 0x00017f8100008200, q19); 17331 ASSERT_EQUAL_128(0x0000000100818000, 0x0000000082017f81, q20); 17332 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000082017f81, q21); 17333 TEARDOWN(); 17334 } 17335 17336 TEST(neon_2regmisc_sadalp) { 17337 SETUP(); 17338 17339 START(); 17340 17341 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 17342 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 17343 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 17344 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 17345 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 17346 17347 __ Mov(v16.V16B(), v1.V16B()); 17348 __ Mov(v17.V16B(), v1.V16B()); 17349 __ Sadalp(v16.V8H(), v0.V16B()); 17350 __ Sadalp(v17.V4H(), v0.V8B()); 17351 17352 __ Mov(v18.V16B(), v2.V16B()); 17353 __ Mov(v19.V16B(), v2.V16B()); 17354 __ Sadalp(v18.V4S(), v1.V8H()); 17355 __ Sadalp(v19.V2S(), v1.V4H()); 17356 17357 __ Mov(v20.V16B(), v3.V16B()); 17358 __ Mov(v21.V16B(), v4.V16B()); 17359 __ Sadalp(v20.V2D(), v2.V4S()); 17360 __ Sadalp(v21.V1D(), v2.V2S()); 17361 17362 END(); 17363 17364 RUN(); 17365 ASSERT_EQUAL_128(0x80808000ff000080, 0xff00ffff00817f00, q16); 17366 ASSERT_EQUAL_128(0x0000000000000000, 0xff00ffff00817f00, q17); 17367 ASSERT_EQUAL_128(0x7fff0001fffffffe, 0xffffffff80007fff, q18); 17368 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff80007fff, q19); 17369 ASSERT_EQUAL_128(0x7fffffff80000000, 0x800000007ffffffe, q20); 17370 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21); 17371 TEARDOWN(); 17372 } 17373 17374 TEST(neon_2regmisc_uadalp) { 17375 SETUP(); 17376 17377 START(); 17378 17379 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 17380 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 17381 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 17382 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 17383 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 17384 17385 __ Mov(v16.V16B(), v1.V16B()); 17386 __ Mov(v17.V16B(), v1.V16B()); 17387 __ Uadalp(v16.V8H(), v0.V16B()); 17388 __ Uadalp(v17.V4H(), v0.V8B()); 17389 17390 __ Mov(v18.V16B(), v2.V16B()); 17391 __ Mov(v19.V16B(), v2.V16B()); 17392 __ Uadalp(v18.V4S(), v1.V8H()); 17393 __ Uadalp(v19.V2S(), v1.V4H()); 17394 17395 __ Mov(v20.V16B(), v3.V16B()); 17396 __ Mov(v21.V16B(), v4.V16B()); 17397 __ Uadalp(v20.V2D(), v2.V4S()); 17398 __ Uadalp(v21.V1D(), v2.V2S()); 17399 17400 END(); 17401 17402 RUN(); 17403 ASSERT_EQUAL_128(0x8080810001000080, 0x010000ff00818100, q16); 17404 ASSERT_EQUAL_128(0x0000000000000000, 0x010000ff00818100, q17); 17405 ASSERT_EQUAL_128(0x800100010000fffe, 0x0000ffff80007fff, q18); 17406 ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff80007fff, q19); 17407 ASSERT_EQUAL_128(0x8000000180000000, 0x800000007ffffffe, q20); 17408 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21); 17409 TEARDOWN(); 17410 } 17411 17412 TEST(neon_3same_mul) { 17413 SETUP(); 17414 17415 START(); 17416 17417 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 17418 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff); 17419 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17420 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17421 17422 __ Mla(v16.V16B(), v0.V16B(), v1.V16B()); 17423 __ Mls(v17.V16B(), v0.V16B(), v1.V16B()); 17424 __ Mul(v18.V16B(), v0.V16B(), v1.V16B()); 17425 17426 END(); 17427 17428 RUN(); 17429 ASSERT_EQUAL_128(0x0102757605b1b208, 0x5f0a61450db90f56, q16); 17430 ASSERT_EQUAL_128(0x01029192055b5c08, 0xb30ab5d30d630faa, q17); 17431 ASSERT_EQUAL_128(0x0000727200abab00, 0x5600563900ab0056, q18); 17432 TEARDOWN(); 17433 } 17434 17435 17436 TEST(neon_3same_absdiff) { 17437 SETUP(); 17438 17439 START(); 17440 17441 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 17442 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff); 17443 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17444 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17445 17446 __ Saba(v16.V16B(), v0.V16B(), v1.V16B()); 17447 __ Uaba(v17.V16B(), v0.V16B(), v1.V16B()); 17448 __ Sabd(v18.V16B(), v0.V16B(), v1.V16B()); 17449 __ Uabd(v19.V16B(), v0.V16B(), v1.V16B()); 17450 17451 END(); 17452 17453 RUN(); 17454 ASSERT_EQUAL_128(0x0202aeaf065c5d5e, 0x5e5f600c62646455, q16); 17455 ASSERT_EQUAL_128(0x0002585904b0b1b2, 0x5e5f600c62b86455, q17); 17456 ASSERT_EQUAL_128(0x0100abab01565656, 0x5555550055565555, q18); 17457 ASSERT_EQUAL_128(0xff005555ffaaaaaa, 0x5555550055aa5555, q19); 17458 TEARDOWN(); 17459 } 17460 17461 17462 TEST(neon_byelement_mul) { 17463 SETUP(); 17464 17465 START(); 17466 17467 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 17468 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff); 17469 17470 17471 __ Mul(v16.V4H(), v0.V4H(), v1.H(), 0); 17472 __ Mul(v17.V8H(), v0.V8H(), v1.H(), 7); 17473 __ Mul(v18.V2S(), v0.V2S(), v1.S(), 0); 17474 __ Mul(v19.V4S(), v0.V4S(), v1.S(), 3); 17475 17476 __ Movi(v20.V2D(), 0x0000000000000000, 0x0001000200030004); 17477 __ Movi(v21.V2D(), 0x0005000600070008, 0x0001000200030004); 17478 __ Mla(v20.V4H(), v0.V4H(), v1.H(), 0); 17479 __ Mla(v21.V8H(), v0.V8H(), v1.H(), 7); 17480 17481 __ Movi(v22.V2D(), 0x0000000000000000, 0x0000000200000004); 17482 __ Movi(v23.V2D(), 0x0000000600000008, 0x0000000200000004); 17483 __ Mla(v22.V2S(), v0.V2S(), v1.S(), 0); 17484 __ Mla(v23.V4S(), v0.V4S(), v1.S(), 3); 17485 17486 __ Movi(v24.V2D(), 0x0000000000000000, 0x0100aaabfe015456); 17487 __ Movi(v25.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 17488 __ Mls(v24.V4H(), v0.V4H(), v1.H(), 0); 17489 __ Mls(v25.V8H(), v0.V8H(), v1.H(), 7); 17490 17491 __ Movi(v26.V2D(), 0x0000000000000000, 0xc8e2aaabe1c85456); 17492 __ Movi(v27.V2D(), 0x39545572c6aa54e4, 0x39545572c6aa54e4); 17493 __ Mls(v26.V2S(), v0.V2S(), v1.S(), 0); 17494 __ Mls(v27.V4S(), v0.V4S(), v1.S(), 3); 17495 17496 END(); 17497 17498 RUN(); 17499 ASSERT_EQUAL_128(0x0000000000000000, 0x0100aaabfe015456, q16); 17500 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17); 17501 ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaabe1c85456, q18); 17502 ASSERT_EQUAL_128(0x39545572c6aa54e4, 0x39545572c6aa54e4, q19); 17503 17504 ASSERT_EQUAL_128(0x0000000000000000, 0x0101aaadfe04545a, q20); 17505 ASSERT_EQUAL_128(0xff05aa5b010655b2, 0xff01aa57010255ae, q21); 17506 ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaade1c8545a, q22); 17507 ASSERT_EQUAL_128(0x39545578c6aa54ec, 0x39545574c6aa54e8, q23); 17508 17509 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24); 17510 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25); 17511 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26); 17512 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27); 17513 TEARDOWN(); 17514 } 17515 17516 17517 TEST(neon_byelement_mull) { 17518 SETUP(); 17519 17520 START(); 17521 17522 __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa); 17523 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff); 17524 17525 17526 __ Smull(v16.V4S(), v0.V4H(), v1.H(), 7); 17527 __ Smull2(v17.V4S(), v0.V8H(), v1.H(), 0); 17528 __ Umull(v18.V4S(), v0.V4H(), v1.H(), 7); 17529 __ Umull2(v19.V4S(), v0.V8H(), v1.H(), 0); 17530 17531 __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001); 17532 __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001); 17533 __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001); 17534 __ Movi(v23.V2D(), 0x0000000100000002, 0x0000000200000001); 17535 17536 __ Smlal(v20.V4S(), v0.V4H(), v1.H(), 7); 17537 __ Smlal2(v21.V4S(), v0.V8H(), v1.H(), 0); 17538 __ Umlal(v22.V4S(), v0.V4H(), v1.H(), 7); 17539 __ Umlal2(v23.V4S(), v0.V8H(), v1.H(), 0); 17540 17541 __ Movi(v24.V2D(), 0xffffff00ffffaa55, 0x000000ff000055aa); 17542 __ Movi(v25.V2D(), 0xffaaaaabffff55ab, 0x0054ffab0000fe01); 17543 __ Movi(v26.V2D(), 0x0000ff000000aa55, 0x000000ff000055aa); 17544 __ Movi(v27.V2D(), 0x00a9aaab00fe55ab, 0x0054ffab0000fe01); 17545 17546 __ Smlsl(v24.V4S(), v0.V4H(), v1.H(), 7); 17547 __ Smlsl2(v25.V4S(), v0.V8H(), v1.H(), 0); 17548 __ Umlsl(v26.V4S(), v0.V4H(), v1.H(), 7); 17549 __ Umlsl2(v27.V4S(), v0.V8H(), v1.H(), 0); 17550 17551 END(); 17552 17553 RUN(); 17554 17555 ASSERT_EQUAL_128(0xffffff00ffffaa55, 0x000000ff000055aa, q16); 17556 ASSERT_EQUAL_128(0xffaaaaabffff55ab, 0x0054ffab0000fe01, q17); 17557 ASSERT_EQUAL_128(0x0000ff000000aa55, 0x000000ff000055aa, q18); 17558 ASSERT_EQUAL_128(0x00a9aaab00fe55ab, 0x0054ffab0000fe01, q19); 17559 17560 ASSERT_EQUAL_128(0xffffff01ffffaa57, 0x00000101000055ab, q20); 17561 ASSERT_EQUAL_128(0xffaaaaacffff55ad, 0x0054ffad0000fe02, q21); 17562 ASSERT_EQUAL_128(0x0000ff010000aa57, 0x00000101000055ab, q22); 17563 ASSERT_EQUAL_128(0x00a9aaac00fe55ad, 0x0054ffad0000fe02, q23); 17564 17565 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24); 17566 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25); 17567 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26); 17568 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27); 17569 17570 TEARDOWN(); 17571 } 17572 17573 17574 TEST(neon_byelement_sqdmull) { 17575 SETUP(); 17576 17577 START(); 17578 17579 __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa); 17580 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff); 17581 17582 __ Sqdmull(v16.V4S(), v0.V4H(), v1.H(), 7); 17583 __ Sqdmull2(v17.V4S(), v0.V8H(), v1.H(), 0); 17584 __ Sqdmull(s18, h0, v1.H(), 7); 17585 17586 __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001); 17587 __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001); 17588 __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001); 17589 17590 __ Sqdmlal(v20.V4S(), v0.V4H(), v1.H(), 7); 17591 __ Sqdmlal2(v21.V4S(), v0.V8H(), v1.H(), 0); 17592 __ Sqdmlal(s22, h0, v1.H(), 7); 17593 17594 __ Movi(v24.V2D(), 0xfffffe00ffff54aa, 0x000001fe0000ab54); 17595 __ Movi(v25.V2D(), 0xff555556fffeab56, 0x00a9ff560001fc02); 17596 __ Movi(v26.V2D(), 0x0000000000000000, 0x000000000000ab54); 17597 17598 __ Sqdmlsl(v24.V4S(), v0.V4H(), v1.H(), 7); 17599 __ Sqdmlsl2(v25.V4S(), v0.V8H(), v1.H(), 0); 17600 __ Sqdmlsl(s26, h0, v1.H(), 7); 17601 17602 END(); 17603 17604 RUN(); 17605 17606 ASSERT_EQUAL_128(0xfffffe00ffff54aa, 0x000001fe0000ab54, q16); 17607 ASSERT_EQUAL_128(0xff555556fffeab56, 0x00a9ff560001fc02, q17); 17608 ASSERT_EQUAL_128(0, 0x0000ab54, q18); 17609 17610 ASSERT_EQUAL_128(0xfffffe01ffff54ac, 0x000002000000ab55, q20); 17611 ASSERT_EQUAL_128(0xff555557fffeab58, 0x00a9ff580001fc03, q21); 17612 ASSERT_EQUAL_128(0, 0x0000ab55, q22); 17613 17614 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24); 17615 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25); 17616 ASSERT_EQUAL_128(0, 0x00000000, q26); 17617 17618 TEARDOWN(); 17619 } 17620 17621 17622 TEST(neon_3diff_absdiff) { 17623 SETUP(); 17624 17625 START(); 17626 17627 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa); 17628 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff); 17629 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17630 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17631 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17632 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17633 17634 __ Sabal(v16.V8H(), v0.V8B(), v1.V8B()); 17635 __ Uabal(v17.V8H(), v0.V8B(), v1.V8B()); 17636 __ Sabal2(v18.V8H(), v0.V16B(), v1.V16B()); 17637 __ Uabal2(v19.V8H(), v0.V16B(), v1.V16B()); 17638 17639 END(); 17640 17641 RUN(); 17642 ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0b620d630f55, q16); 17643 ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0bb60d630f55, q17); 17644 ASSERT_EQUAL_128(0x0103030405b107b3, 0x090b0b620d640f55, q18); 17645 ASSERT_EQUAL_128(0x02010304055b075d, 0x0a090bb60db80fab, q19); 17646 TEARDOWN(); 17647 } 17648 17649 17650 TEST(neon_3diff_sqdmull) { 17651 SETUP(); 17652 17653 START(); 17654 17655 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000); 17656 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000); 17657 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000); 17658 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000); 17659 17660 __ Sqdmull(v16.V4S(), v0.V4H(), v1.V4H()); 17661 __ Sqdmull2(v17.V4S(), v0.V8H(), v1.V8H()); 17662 __ Sqdmull(v18.V2D(), v2.V2S(), v3.V2S()); 17663 __ Sqdmull2(v19.V2D(), v2.V4S(), v3.V4S()); 17664 __ Sqdmull(s20, h0, h1); 17665 __ Sqdmull(d21, s2, s3); 17666 17667 END(); 17668 17669 RUN(); 17670 ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q16); 17671 ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q17); 17672 ASSERT_EQUAL_128(0x8000000100000000, 0x7fffffffffffffff, q18); 17673 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000100000000, q19); 17674 ASSERT_EQUAL_128(0, 0x7fffffff, q20); 17675 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21); 17676 TEARDOWN(); 17677 } 17678 17679 17680 TEST(neon_3diff_sqdmlal) { 17681 SETUP(); 17682 17683 START(); 17684 17685 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000); 17686 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000); 17687 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000); 17688 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000); 17689 17690 __ Movi(v16.V2D(), 0xffffffff00000001, 0x8fffffff00000001); 17691 __ Movi(v17.V2D(), 0x00000001ffffffff, 0x00000001ffffffff); 17692 __ Movi(v18.V2D(), 0x8000000000000001, 0x0000000000000001); 17693 __ Movi(v19.V2D(), 0xffffffffffffffff, 0x7fffffffffffffff); 17694 __ Movi(v20.V2D(), 0, 0x00000001); 17695 __ Movi(v21.V2D(), 0, 0x00000001); 17696 17697 __ Sqdmlal(v16.V4S(), v0.V4H(), v1.V4H()); 17698 __ Sqdmlal2(v17.V4S(), v0.V8H(), v1.V8H()); 17699 __ Sqdmlal(v18.V2D(), v2.V2S(), v3.V2S()); 17700 __ Sqdmlal2(v19.V2D(), v2.V4S(), v3.V4S()); 17701 __ Sqdmlal(s20, h0, h1); 17702 __ Sqdmlal(d21, s2, s3); 17703 17704 END(); 17705 17706 RUN(); 17707 ASSERT_EQUAL_128(0x8000ffff7ffe0003, 0x800000007fffffff, q16); 17708 ASSERT_EQUAL_128(0x800100017ffe0001, 0x800100017ffffffe, q17); 17709 ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q18); 17710 ASSERT_EQUAL_128(0x7ffffffffffffffe, 0x00000000ffffffff, q19); 17711 ASSERT_EQUAL_128(0, 0x7fffffff, q20); 17712 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21); 17713 TEARDOWN(); 17714 } 17715 17716 17717 TEST(neon_3diff_sqdmlsl) { 17718 SETUP(); 17719 17720 START(); 17721 17722 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000); 17723 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000); 17724 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000); 17725 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000); 17726 17727 __ Movi(v16.V2D(), 0xffffffff00000001, 0x7ffffffe80000001); 17728 __ Movi(v17.V2D(), 0x00000001ffffffff, 0x7ffffffe00000001); 17729 __ Movi(v18.V2D(), 0x8000000000000001, 0x8000000000000001); 17730 __ Movi(v19.V2D(), 0xfffffffffffffffe, 0x7fffffffffffffff); 17731 __ Movi(v20.V2D(), 0, 0x00000001); 17732 __ Movi(v21.V2D(), 0, 0x00000001); 17733 17734 __ Sqdmlsl(v16.V4S(), v0.V4H(), v1.V4H()); 17735 __ Sqdmlsl2(v17.V4S(), v0.V8H(), v1.V8H()); 17736 __ Sqdmlsl(v18.V2D(), v2.V2S(), v3.V2S()); 17737 __ Sqdmlsl2(v19.V2D(), v2.V4S(), v3.V4S()); 17738 __ Sqdmlsl(s20, h0, h1); 17739 __ Sqdmlsl(d21, s2, s3); 17740 17741 END(); 17742 17743 RUN(); 17744 ASSERT_EQUAL_128(0x7ffeffff8001ffff, 0x7fffffff80000000, q16); 17745 ASSERT_EQUAL_128(0x7fff00018001fffd, 0x7fffffff80000002, q17); 17746 ASSERT_EQUAL_128(0xffffffff00000001, 0x8000000000000000, q18); 17747 ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q19); 17748 ASSERT_EQUAL_128(0, 0x80000002, q20); 17749 ASSERT_EQUAL_128(0, 0x8000000000000002, q21); 17750 17751 TEARDOWN(); 17752 } 17753 17754 17755 TEST(neon_3diff_mla) { 17756 SETUP(); 17757 17758 START(); 17759 17760 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa); 17761 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff); 17762 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17763 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17764 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17765 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17766 17767 __ Smlal(v16.V8H(), v0.V8B(), v1.V8B()); 17768 __ Umlal(v17.V8H(), v0.V8B(), v1.V8B()); 17769 __ Smlal2(v18.V8H(), v0.V16B(), v1.V16B()); 17770 __ Umlal2(v19.V8H(), v0.V16B(), v1.V16B()); 17771 17772 END(); 17773 17774 RUN(); 17775 ASSERT_EQUAL_128(0x01580304055c2341, 0x090a0ab70d0e0f56, q16); 17776 ASSERT_EQUAL_128(0xaa580304ae5c2341, 0x090a5fb70d0eb856, q17); 17777 ASSERT_EQUAL_128(0x01020304e878ea7a, 0x090a0ab70cb90f00, q18); 17778 ASSERT_EQUAL_128(0x010203043d783f7a, 0x090a5fb761b90f00, q19); 17779 TEARDOWN(); 17780 } 17781 17782 17783 TEST(neon_3diff_mls) { 17784 SETUP(); 17785 17786 START(); 17787 17788 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa); 17789 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff); 17790 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17791 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17792 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17793 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00); 17794 17795 __ Smlsl(v16.V8H(), v0.V8B(), v1.V8B()); 17796 __ Umlsl(v17.V8H(), v0.V8B(), v1.V8B()); 17797 __ Smlsl2(v18.V8H(), v0.V16B(), v1.V16B()); 17798 __ Umlsl2(v19.V8H(), v0.V16B(), v1.V16B()); 17799 17800 END(); 17801 17802 RUN(); 17803 ASSERT_EQUAL_128(0x00ac030404b0eacf, 0x090a0b610d0e0eaa, q16); 17804 ASSERT_EQUAL_128(0x57ac03045bb0eacf, 0x090ab6610d0e65aa, q17); 17805 ASSERT_EQUAL_128(0x0102030421942396, 0x090a0b610d630f00, q18); 17806 ASSERT_EQUAL_128(0x01020304cc94ce96, 0x090ab661b8630f00, q19); 17807 TEARDOWN(); 17808 } 17809 17810 17811 TEST(neon_3same_compare) { 17812 SETUP(); 17813 17814 START(); 17815 17816 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 17817 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff); 17818 17819 __ Cmeq(v16.V16B(), v0.V16B(), v0.V16B()); 17820 __ Cmeq(v17.V16B(), v0.V16B(), v1.V16B()); 17821 __ Cmge(v18.V16B(), v0.V16B(), v0.V16B()); 17822 __ Cmge(v19.V16B(), v0.V16B(), v1.V16B()); 17823 __ Cmgt(v20.V16B(), v0.V16B(), v0.V16B()); 17824 __ Cmgt(v21.V16B(), v0.V16B(), v1.V16B()); 17825 __ Cmhi(v22.V16B(), v0.V16B(), v0.V16B()); 17826 __ Cmhi(v23.V16B(), v0.V16B(), v1.V16B()); 17827 __ Cmhs(v24.V16B(), v0.V16B(), v0.V16B()); 17828 __ Cmhs(v25.V16B(), v0.V16B(), v1.V16B()); 17829 17830 END(); 17831 17832 RUN(); 17833 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16); 17834 ASSERT_EQUAL_128(0x00ff000000000000, 0x000000ff00000000, q17); 17835 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18); 17836 ASSERT_EQUAL_128(0x00ff00ffff00ff00, 0xff0000ff0000ff00, q19); 17837 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20); 17838 ASSERT_EQUAL_128(0x000000ffff00ff00, 0xff0000000000ff00, q21); 17839 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22); 17840 ASSERT_EQUAL_128(0xff00ff0000ff00ff, 0xff00000000ffff00, q23); 17841 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q24); 17842 ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xff0000ff00ffff00, q25); 17843 TEARDOWN(); 17844 } 17845 17846 17847 TEST(neon_3same_scalar_compare) { 17848 SETUP(); 17849 17850 START(); 17851 17852 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 17853 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff); 17854 17855 __ Cmeq(d16, d0, d0); 17856 __ Cmeq(d17, d0, d1); 17857 __ Cmeq(d18, d1, d0); 17858 __ Cmge(d19, d0, d0); 17859 __ Cmge(d20, d0, d1); 17860 __ Cmge(d21, d1, d0); 17861 __ Cmgt(d22, d0, d0); 17862 __ Cmgt(d23, d0, d1); 17863 __ Cmhi(d24, d0, d0); 17864 __ Cmhi(d25, d0, d1); 17865 __ Cmhs(d26, d0, d0); 17866 __ Cmhs(d27, d0, d1); 17867 __ Cmhs(d28, d1, d0); 17868 17869 END(); 17870 17871 RUN(); 17872 17873 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q16); 17874 ASSERT_EQUAL_128(0, 0x0000000000000000, q17); 17875 ASSERT_EQUAL_128(0, 0x0000000000000000, q18); 17876 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19); 17877 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20); 17878 ASSERT_EQUAL_128(0, 0x0000000000000000, q21); 17879 ASSERT_EQUAL_128(0, 0x0000000000000000, q22); 17880 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q23); 17881 ASSERT_EQUAL_128(0, 0x0000000000000000, q24); 17882 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25); 17883 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26); 17884 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q27); 17885 ASSERT_EQUAL_128(0, 0x0000000000000000, q28); 17886 17887 TEARDOWN(); 17888 } 17889 17890 TEST(neon_2regmisc_fcmeq) { 17891 SETUP(); 17892 17893 START(); 17894 17895 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero. 17896 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan. 17897 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0. 17898 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0. 17899 17900 __ Fcmeq(s16, s0, 0.0); 17901 __ Fcmeq(s17, s1, 0.0); 17902 __ Fcmeq(s18, s2, 0.0); 17903 __ Fcmeq(d19, d0, 0.0); 17904 __ Fcmeq(d20, d1, 0.0); 17905 __ Fcmeq(d21, d2, 0.0); 17906 __ Fcmeq(v22.V2S(), v0.V2S(), 0.0); 17907 __ Fcmeq(v23.V4S(), v1.V4S(), 0.0); 17908 __ Fcmeq(v24.V2D(), v1.V2D(), 0.0); 17909 __ Fcmeq(v25.V2D(), v2.V2D(), 0.0); 17910 17911 END(); 17912 17913 RUN(); 17914 ASSERT_EQUAL_128(0, 0xffffffff, q16); 17915 ASSERT_EQUAL_128(0, 0x00000000, q17); 17916 ASSERT_EQUAL_128(0, 0x00000000, q18); 17917 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19); 17918 ASSERT_EQUAL_128(0, 0x0000000000000000, q20); 17919 ASSERT_EQUAL_128(0, 0x0000000000000000, q21); 17920 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22); 17921 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23); 17922 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24); 17923 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25); 17924 TEARDOWN(); 17925 } 17926 17927 TEST(neon_2regmisc_fcmge) { 17928 SETUP(); 17929 17930 START(); 17931 17932 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero. 17933 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan. 17934 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0. 17935 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0. 17936 17937 __ Fcmge(s16, s0, 0.0); 17938 __ Fcmge(s17, s1, 0.0); 17939 __ Fcmge(s18, s2, 0.0); 17940 __ Fcmge(d19, d0, 0.0); 17941 __ Fcmge(d20, d1, 0.0); 17942 __ Fcmge(d21, d3, 0.0); 17943 __ Fcmge(v22.V2S(), v0.V2S(), 0.0); 17944 __ Fcmge(v23.V4S(), v1.V4S(), 0.0); 17945 __ Fcmge(v24.V2D(), v1.V2D(), 0.0); 17946 __ Fcmge(v25.V2D(), v3.V2D(), 0.0); 17947 17948 END(); 17949 17950 RUN(); 17951 ASSERT_EQUAL_128(0, 0xffffffff, q16); 17952 ASSERT_EQUAL_128(0, 0x00000000, q17); 17953 ASSERT_EQUAL_128(0, 0x00000000, q18); 17954 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19); 17955 ASSERT_EQUAL_128(0, 0x0000000000000000, q20); 17956 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21); 17957 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22); 17958 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23); 17959 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24); 17960 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25); 17961 TEARDOWN(); 17962 } 17963 17964 17965 TEST(neon_2regmisc_fcmgt) { 17966 SETUP(); 17967 17968 START(); 17969 17970 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero. 17971 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan. 17972 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0. 17973 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0. 17974 17975 __ Fcmgt(s16, s0, 0.0); 17976 __ Fcmgt(s17, s1, 0.0); 17977 __ Fcmgt(s18, s2, 0.0); 17978 __ Fcmgt(d19, d0, 0.0); 17979 __ Fcmgt(d20, d1, 0.0); 17980 __ Fcmgt(d21, d3, 0.0); 17981 __ Fcmgt(v22.V2S(), v0.V2S(), 0.0); 17982 __ Fcmgt(v23.V4S(), v1.V4S(), 0.0); 17983 __ Fcmgt(v24.V2D(), v1.V2D(), 0.0); 17984 __ Fcmgt(v25.V2D(), v3.V2D(), 0.0); 17985 17986 END(); 17987 17988 RUN(); 17989 ASSERT_EQUAL_128(0, 0x00000000, q16); 17990 ASSERT_EQUAL_128(0, 0x00000000, q17); 17991 ASSERT_EQUAL_128(0, 0x00000000, q18); 17992 ASSERT_EQUAL_128(0, 0x0000000000000000, q19); 17993 ASSERT_EQUAL_128(0, 0x0000000000000000, q20); 17994 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21); 17995 ASSERT_EQUAL_128(0, 0x0000000000000000, q22); 17996 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23); 17997 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24); 17998 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25); 17999 TEARDOWN(); 18000 } 18001 18002 TEST(neon_2regmisc_fcmle) { 18003 SETUP(); 18004 18005 START(); 18006 18007 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero. 18008 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan. 18009 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0. 18010 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0. 18011 18012 __ Fcmle(s16, s0, 0.0); 18013 __ Fcmle(s17, s1, 0.0); 18014 __ Fcmle(s18, s3, 0.0); 18015 __ Fcmle(d19, d0, 0.0); 18016 __ Fcmle(d20, d1, 0.0); 18017 __ Fcmle(d21, d2, 0.0); 18018 __ Fcmle(v22.V2S(), v0.V2S(), 0.0); 18019 __ Fcmle(v23.V4S(), v1.V4S(), 0.0); 18020 __ Fcmle(v24.V2D(), v1.V2D(), 0.0); 18021 __ Fcmle(v25.V2D(), v2.V2D(), 0.0); 18022 18023 END(); 18024 18025 RUN(); 18026 ASSERT_EQUAL_128(0, 0xffffffff, q16); 18027 ASSERT_EQUAL_128(0, 0x00000000, q17); 18028 ASSERT_EQUAL_128(0, 0x00000000, q18); 18029 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19); 18030 ASSERT_EQUAL_128(0, 0x0000000000000000, q20); 18031 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21); 18032 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22); 18033 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23); 18034 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24); 18035 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25); 18036 TEARDOWN(); 18037 } 18038 18039 18040 TEST(neon_2regmisc_fcmlt) { 18041 SETUP(); 18042 18043 START(); 18044 18045 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero. 18046 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan. 18047 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0. 18048 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0. 18049 18050 __ Fcmlt(s16, s0, 0.0); 18051 __ Fcmlt(s17, s1, 0.0); 18052 __ Fcmlt(s18, s3, 0.0); 18053 __ Fcmlt(d19, d0, 0.0); 18054 __ Fcmlt(d20, d1, 0.0); 18055 __ Fcmlt(d21, d2, 0.0); 18056 __ Fcmlt(v22.V2S(), v0.V2S(), 0.0); 18057 __ Fcmlt(v23.V4S(), v1.V4S(), 0.0); 18058 __ Fcmlt(v24.V2D(), v1.V2D(), 0.0); 18059 __ Fcmlt(v25.V2D(), v2.V2D(), 0.0); 18060 18061 END(); 18062 18063 RUN(); 18064 ASSERT_EQUAL_128(0, 0x00000000, q16); 18065 ASSERT_EQUAL_128(0, 0x00000000, q17); 18066 ASSERT_EQUAL_128(0, 0x00000000, q18); 18067 ASSERT_EQUAL_128(0, 0x0000000000000000, q19); 18068 ASSERT_EQUAL_128(0, 0x0000000000000000, q20); 18069 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21); 18070 ASSERT_EQUAL_128(0, 0x0000000000000000, q22); 18071 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23); 18072 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24); 18073 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25); 18074 TEARDOWN(); 18075 } 18076 18077 TEST(neon_2regmisc_cmeq) { 18078 SETUP(); 18079 18080 START(); 18081 18082 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000); 18083 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff); 18084 18085 __ Cmeq(v16.V8B(), v1.V8B(), 0); 18086 __ Cmeq(v17.V16B(), v1.V16B(), 0); 18087 __ Cmeq(v18.V4H(), v1.V4H(), 0); 18088 __ Cmeq(v19.V8H(), v1.V8H(), 0); 18089 __ Cmeq(v20.V2S(), v0.V2S(), 0); 18090 __ Cmeq(v21.V4S(), v0.V4S(), 0); 18091 __ Cmeq(d22, d0, 0); 18092 __ Cmeq(d23, d1, 0); 18093 __ Cmeq(v24.V2D(), v0.V2D(), 0); 18094 18095 END(); 18096 18097 RUN(); 18098 ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000000ff00, q16); 18099 ASSERT_EQUAL_128(0xffff0000000000ff, 0xffff00000000ff00, q17); 18100 ASSERT_EQUAL_128(0x0000000000000000, 0xffff000000000000, q18); 18101 ASSERT_EQUAL_128(0xffff000000000000, 0xffff000000000000, q19); 18102 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20); 18103 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q21); 18104 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22); 18105 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23); 18106 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24); 18107 TEARDOWN(); 18108 } 18109 18110 18111 TEST(neon_2regmisc_cmge) { 18112 SETUP(); 18113 18114 START(); 18115 18116 __ Movi(v0.V2D(), 0xff01000200030004, 0x0000000000000000); 18117 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff); 18118 18119 __ Cmge(v16.V8B(), v1.V8B(), 0); 18120 __ Cmge(v17.V16B(), v1.V16B(), 0); 18121 __ Cmge(v18.V4H(), v1.V4H(), 0); 18122 __ Cmge(v19.V8H(), v1.V8H(), 0); 18123 __ Cmge(v20.V2S(), v0.V2S(), 0); 18124 __ Cmge(v21.V4S(), v0.V4S(), 0); 18125 __ Cmge(d22, d0, 0); 18126 __ Cmge(d23, d1, 0); 18127 __ Cmge(v24.V2D(), v0.V2D(), 0); 18128 18129 END(); 18130 18131 RUN(); 18132 ASSERT_EQUAL_128(0x0000000000000000, 0xffff00ffffffff00, q16); 18133 ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xffff00ffffffff00, q17); 18134 ASSERT_EQUAL_128(0x0000000000000000, 0xffff0000ffffffff, q18); 18135 ASSERT_EQUAL_128(0xffffffff00000000, 0xffff0000ffffffff, q19); 18136 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20); 18137 ASSERT_EQUAL_128(0x00000000ffffffff, 0xffffffffffffffff, q21); 18138 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22); 18139 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23); 18140 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24); 18141 TEARDOWN(); 18142 } 18143 18144 18145 TEST(neon_2regmisc_cmlt) { 18146 SETUP(); 18147 18148 START(); 18149 18150 __ Movi(v0.V2D(), 0x0001000200030004, 0xff00000000000000); 18151 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff); 18152 18153 __ Cmlt(v16.V8B(), v1.V8B(), 0); 18154 __ Cmlt(v17.V16B(), v1.V16B(), 0); 18155 __ Cmlt(v18.V4H(), v1.V4H(), 0); 18156 __ Cmlt(v19.V8H(), v1.V8H(), 0); 18157 __ Cmlt(v20.V2S(), v1.V2S(), 0); 18158 __ Cmlt(v21.V4S(), v1.V4S(), 0); 18159 __ Cmlt(d22, d0, 0); 18160 __ Cmlt(d23, d1, 0); 18161 __ Cmlt(v24.V2D(), v0.V2D(), 0); 18162 18163 END(); 18164 18165 RUN(); 18166 ASSERT_EQUAL_128(0x0000000000000000, 0x0000ff00000000ff, q16); 18167 ASSERT_EQUAL_128(0x000000ffff00ff00, 0x0000ff00000000ff, q17); 18168 ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff00000000, q18); 18169 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000ffff00000000, q19); 18170 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20); 18171 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21); 18172 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22); 18173 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23); 18174 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24); 18175 TEARDOWN(); 18176 } 18177 18178 18179 TEST(neon_2regmisc_cmle) { 18180 SETUP(); 18181 18182 START(); 18183 18184 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000); 18185 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff); 18186 18187 __ Cmle(v16.V8B(), v1.V8B(), 0); 18188 __ Cmle(v17.V16B(), v1.V16B(), 0); 18189 __ Cmle(v18.V4H(), v1.V4H(), 0); 18190 __ Cmle(v19.V8H(), v1.V8H(), 0); 18191 __ Cmle(v20.V2S(), v1.V2S(), 0); 18192 __ Cmle(v21.V4S(), v1.V4S(), 0); 18193 __ Cmle(d22, d0, 0); 18194 __ Cmle(d23, d1, 0); 18195 __ Cmle(v24.V2D(), v0.V2D(), 0); 18196 18197 END(); 18198 18199 RUN(); 18200 ASSERT_EQUAL_128(0x0000000000000000, 0xffffff000000ffff, q16); 18201 ASSERT_EQUAL_128(0xffff00ffff00ffff, 0xffffff000000ffff, q17); 18202 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff00000000, q18); 18203 ASSERT_EQUAL_128(0xffff0000ffffffff, 0xffffffff00000000, q19); 18204 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20); 18205 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21); 18206 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22); 18207 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23); 18208 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24); 18209 TEARDOWN(); 18210 } 18211 18212 18213 TEST(neon_2regmisc_cmgt) { 18214 SETUP(); 18215 18216 START(); 18217 18218 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000); 18219 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff); 18220 18221 __ Cmgt(v16.V8B(), v1.V8B(), 0); 18222 __ Cmgt(v17.V16B(), v1.V16B(), 0); 18223 __ Cmgt(v18.V4H(), v1.V4H(), 0); 18224 __ Cmgt(v19.V8H(), v1.V8H(), 0); 18225 __ Cmgt(v20.V2S(), v0.V2S(), 0); 18226 __ Cmgt(v21.V4S(), v0.V4S(), 0); 18227 __ Cmgt(d22, d0, 0); 18228 __ Cmgt(d23, d1, 0); 18229 __ Cmgt(v24.V2D(), v0.V2D(), 0); 18230 18231 END(); 18232 18233 RUN(); 18234 ASSERT_EQUAL_128(0x0000000000000000, 0x000000ffffff0000, q16); 18235 ASSERT_EQUAL_128(0x0000ff0000ff0000, 0x000000ffffff0000, q17); 18236 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18); 18237 ASSERT_EQUAL_128(0x0000ffff00000000, 0x00000000ffffffff, q19); 18238 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20); 18239 ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q21); 18240 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22); 18241 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23); 18242 ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q24); 18243 TEARDOWN(); 18244 } 18245 18246 18247 TEST(neon_2regmisc_neg) { 18248 SETUP(); 18249 18250 START(); 18251 18252 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 18253 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 18254 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 18255 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 18256 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 18257 18258 __ Neg(v16.V8B(), v0.V8B()); 18259 __ Neg(v17.V16B(), v0.V16B()); 18260 __ Neg(v18.V4H(), v1.V4H()); 18261 __ Neg(v19.V8H(), v1.V8H()); 18262 __ Neg(v20.V2S(), v2.V2S()); 18263 __ Neg(v21.V4S(), v2.V4S()); 18264 __ Neg(d22, d3); 18265 __ Neg(v23.V2D(), v3.V2D()); 18266 __ Neg(v24.V2D(), v4.V2D()); 18267 18268 END(); 18269 18270 RUN(); 18271 ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100ff81807f, q16); 18272 ASSERT_EQUAL_128(0x81ff00017f8081ff, 0x807f0100ff81807f, q17); 18273 ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18); 18274 ASSERT_EQUAL_128(0x80007fff00010000, 0x00010000ffff8001, q19); 18275 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20); 18276 ASSERT_EQUAL_128(0x8000000000000001, 0x0000000080000001, q21); 18277 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000001, q22); 18278 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q23); 18279 ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24); 18280 18281 TEARDOWN(); 18282 } 18283 18284 18285 TEST(neon_2regmisc_sqneg) { 18286 SETUP(); 18287 18288 START(); 18289 18290 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 18291 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 18292 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 18293 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 18294 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 18295 18296 __ Sqneg(v16.V8B(), v0.V8B()); 18297 __ Sqneg(v17.V16B(), v0.V16B()); 18298 __ Sqneg(v18.V4H(), v1.V4H()); 18299 __ Sqneg(v19.V8H(), v1.V8H()); 18300 __ Sqneg(v20.V2S(), v2.V2S()); 18301 __ Sqneg(v21.V4S(), v2.V4S()); 18302 __ Sqneg(v22.V2D(), v3.V2D()); 18303 __ Sqneg(v23.V2D(), v4.V2D()); 18304 18305 __ Sqneg(b24, b0); 18306 __ Sqneg(h25, h1); 18307 __ Sqneg(s26, s2); 18308 __ Sqneg(d27, d3); 18309 18310 END(); 18311 18312 RUN(); 18313 ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100ff817f7f, q16); 18314 ASSERT_EQUAL_128(0x81ff00017f7f81ff, 0x7f7f0100ff817f7f, q17); 18315 ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18); 18316 ASSERT_EQUAL_128(0x7fff7fff00010000, 0x00010000ffff8001, q19); 18317 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20); 18318 ASSERT_EQUAL_128(0x7fffffff00000001, 0x0000000080000001, q21); 18319 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q22); 18320 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23); 18321 18322 ASSERT_EQUAL_128(0, 0x7f, q24); 18323 ASSERT_EQUAL_128(0, 0x8001, q25); 18324 ASSERT_EQUAL_128(0, 0x80000001, q26); 18325 ASSERT_EQUAL_128(0, 0x8000000000000001, q27); 18326 18327 TEARDOWN(); 18328 } 18329 18330 18331 TEST(neon_2regmisc_abs) { 18332 SETUP(); 18333 18334 START(); 18335 18336 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 18337 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 18338 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 18339 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 18340 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 18341 18342 __ Abs(v16.V8B(), v0.V8B()); 18343 __ Abs(v17.V16B(), v0.V16B()); 18344 __ Abs(v18.V4H(), v1.V4H()); 18345 __ Abs(v19.V8H(), v1.V8H()); 18346 __ Abs(v20.V2S(), v2.V2S()); 18347 __ Abs(v21.V4S(), v2.V4S()); 18348 __ Abs(d22, d3); 18349 __ Abs(v23.V2D(), v3.V2D()); 18350 __ Abs(v24.V2D(), v4.V2D()); 18351 18352 END(); 18353 18354 RUN(); 18355 ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100017f807f, q16); 18356 ASSERT_EQUAL_128(0x7f0100017f807f01, 0x807f0100017f807f, q17); 18357 ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18); 18358 ASSERT_EQUAL_128(0x80007fff00010000, 0x0001000000017fff, q19); 18359 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20); 18360 ASSERT_EQUAL_128(0x8000000000000001, 0x000000007fffffff, q21); 18361 ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffffffffffff, q22); 18362 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q23); 18363 ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24); 18364 18365 TEARDOWN(); 18366 } 18367 18368 18369 TEST(neon_2regmisc_sqabs) { 18370 SETUP(); 18371 18372 START(); 18373 18374 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 18375 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 18376 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 18377 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 18378 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 18379 18380 __ Sqabs(v16.V8B(), v0.V8B()); 18381 __ Sqabs(v17.V16B(), v0.V16B()); 18382 __ Sqabs(v18.V4H(), v1.V4H()); 18383 __ Sqabs(v19.V8H(), v1.V8H()); 18384 __ Sqabs(v20.V2S(), v2.V2S()); 18385 __ Sqabs(v21.V4S(), v2.V4S()); 18386 __ Sqabs(v22.V2D(), v3.V2D()); 18387 __ Sqabs(v23.V2D(), v4.V2D()); 18388 18389 __ Sqabs(b24, b0); 18390 __ Sqabs(h25, h1); 18391 __ Sqabs(s26, s2); 18392 __ Sqabs(d27, d3); 18393 18394 END(); 18395 18396 RUN(); 18397 ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100017f7f7f, q16); 18398 ASSERT_EQUAL_128(0x7f0100017f7f7f01, 0x7f7f0100017f7f7f, q17); 18399 ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18); 18400 ASSERT_EQUAL_128(0x7fff7fff00010000, 0x0001000000017fff, q19); 18401 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20); 18402 ASSERT_EQUAL_128(0x7fffffff00000001, 0x000000007fffffff, q21); 18403 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q22); 18404 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23); 18405 18406 ASSERT_EQUAL_128(0, 0x7f, q24); 18407 ASSERT_EQUAL_128(0, 0x7fff, q25); 18408 ASSERT_EQUAL_128(0, 0x7fffffff, q26); 18409 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27); 18410 18411 TEARDOWN(); 18412 } 18413 18414 TEST(neon_2regmisc_suqadd) { 18415 SETUP(); 18416 18417 START(); 18418 18419 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 18420 __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f0180ff); 18421 18422 __ Movi(v2.V2D(), 0x80008001ffff0000, 0xffff000000017ffd); 18423 __ Movi(v3.V2D(), 0xffff000080008001, 0x00017fffffff0001); 18424 18425 __ Movi(v4.V2D(), 0x80000000fffffffe, 0xfffffff17ffffffe); 18426 __ Movi(v5.V2D(), 0xffffffff80000000, 0x7fffffff00000002); 18427 18428 __ Movi(v6.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 18429 __ Movi(v7.V2D(), 0x8000000000000000, 0x8000000000000002); 18430 18431 __ Mov(v16.V2D(), v0.V2D()); 18432 __ Mov(v17.V2D(), v0.V2D()); 18433 __ Mov(v18.V2D(), v2.V2D()); 18434 __ Mov(v19.V2D(), v2.V2D()); 18435 __ Mov(v20.V2D(), v4.V2D()); 18436 __ Mov(v21.V2D(), v4.V2D()); 18437 __ Mov(v22.V2D(), v6.V2D()); 18438 18439 __ Mov(v23.V2D(), v0.V2D()); 18440 __ Mov(v24.V2D(), v2.V2D()); 18441 __ Mov(v25.V2D(), v4.V2D()); 18442 __ Mov(v26.V2D(), v6.V2D()); 18443 18444 __ Suqadd(v16.V8B(), v1.V8B()); 18445 __ Suqadd(v17.V16B(), v1.V16B()); 18446 __ Suqadd(v18.V4H(), v3.V4H()); 18447 __ Suqadd(v19.V8H(), v3.V8H()); 18448 __ Suqadd(v20.V2S(), v5.V2S()); 18449 __ Suqadd(v21.V4S(), v5.V4S()); 18450 __ Suqadd(v22.V2D(), v7.V2D()); 18451 18452 __ Suqadd(b23, b1); 18453 __ Suqadd(h24, h3); 18454 __ Suqadd(s25, s5); 18455 __ Suqadd(d26, d7); 18456 18457 END(); 18458 18459 RUN(); 18460 ASSERT_EQUAL_128(0x0000000000000000, 0x81817f7f7f7f007f, q16); 18461 ASSERT_EQUAL_128(0x7f7f7f7f7f807f7f, 0x81817f7f7f7f007f, q17); 18462 ASSERT_EQUAL_128(0x0000000000000000, 0x00007fff7fff7ffe, q18); 18463 ASSERT_EQUAL_128(0x7fff80017fff7fff, 0x00007fff7fff7ffe, q19); 18464 ASSERT_EQUAL_128(0x0000000000000000, 0x7ffffff07fffffff, q20); 18465 ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x7ffffff07fffffff, q21); 18466 ASSERT_EQUAL_128(0x0000000000000001, 0x7fffffffffffffff, q22); 18467 18468 ASSERT_EQUAL_128(0, 0x7f, q23); 18469 ASSERT_EQUAL_128(0, 0x7ffe, q24); 18470 ASSERT_EQUAL_128(0, 0x7fffffff, q25); 18471 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26); 18472 TEARDOWN(); 18473 } 18474 18475 TEST(neon_2regmisc_usqadd) { 18476 SETUP(); 18477 18478 START(); 18479 18480 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f7ffe); 18481 __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f018002); 18482 18483 __ Movi(v2.V2D(), 0x80008001fffe0000, 0xffff000000017ffd); 18484 __ Movi(v3.V2D(), 0xffff000000028001, 0x00017fffffff0001); 18485 18486 __ Movi(v4.V2D(), 0x80000000fffffffe, 0x00000001fffffffe); 18487 __ Movi(v5.V2D(), 0xffffffff80000000, 0xfffffffe00000002); 18488 18489 __ Movi(v6.V2D(), 0x8000000000000002, 0x7fffffffffffffff); 18490 __ Movi(v7.V2D(), 0x7fffffffffffffff, 0x8000000000000000); 18491 18492 __ Mov(v16.V2D(), v0.V2D()); 18493 __ Mov(v17.V2D(), v0.V2D()); 18494 __ Mov(v18.V2D(), v2.V2D()); 18495 __ Mov(v19.V2D(), v2.V2D()); 18496 __ Mov(v20.V2D(), v4.V2D()); 18497 __ Mov(v21.V2D(), v4.V2D()); 18498 __ Mov(v22.V2D(), v6.V2D()); 18499 18500 __ Mov(v23.V2D(), v0.V2D()); 18501 __ Mov(v24.V2D(), v2.V2D()); 18502 __ Mov(v25.V2D(), v4.V2D()); 18503 __ Mov(v26.V2D(), v6.V2D()); 18504 18505 __ Usqadd(v16.V8B(), v1.V8B()); 18506 __ Usqadd(v17.V16B(), v1.V16B()); 18507 __ Usqadd(v18.V4H(), v3.V4H()); 18508 __ Usqadd(v19.V8H(), v3.V8H()); 18509 __ Usqadd(v20.V2S(), v5.V2S()); 18510 __ Usqadd(v21.V4S(), v5.V4S()); 18511 __ Usqadd(v22.V2D(), v7.V2D()); 18512 18513 __ Usqadd(b23, b1); 18514 __ Usqadd(h24, h3); 18515 __ Usqadd(s25, s5); 18516 __ Usqadd(d26, d7); 18517 18518 END(); 18519 18520 RUN(); 18521 ASSERT_EQUAL_128(0x0000000000000000, 0x81817f00808000ff, q16); 18522 ASSERT_EQUAL_128(0x8080008080808080, 0x81817f00808000ff, q17); 18523 ASSERT_EQUAL_128(0x0000000000000000, 0xffff7fff00007ffe, q18); 18524 ASSERT_EQUAL_128(0x7fff8001ffff0000, 0xffff7fff00007ffe, q19); 18525 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q20); 18526 ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x00000000ffffffff, q21); 18527 ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q22); 18528 18529 ASSERT_EQUAL_128(0, 0xff, q23); 18530 ASSERT_EQUAL_128(0, 0x7ffe, q24); 18531 ASSERT_EQUAL_128(0, 0xffffffff, q25); 18532 ASSERT_EQUAL_128(0, 0x0000000000000000, q26); 18533 TEARDOWN(); 18534 } 18535 18536 18537 TEST(system_sys) { 18538 SETUP(); 18539 const char* msg = "SYS test!"; 18540 uintptr_t msg_addr = reinterpret_cast<uintptr_t>(msg); 18541 18542 START(); 18543 __ Mov(x4, msg_addr); 18544 __ Sys(3, 0x7, 0x5, 1, x4); 18545 __ Mov(x3, x4); 18546 __ Sys(3, 0x7, 0xa, 1, x3); 18547 __ Mov(x2, x3); 18548 __ Sys(3, 0x7, 0xb, 1, x2); 18549 __ Mov(x1, x2); 18550 __ Sys(3, 0x7, 0xe, 1, x1); 18551 // TODO: Add tests to check ZVA equivalent. 18552 END(); 18553 18554 RUN(); 18555 18556 TEARDOWN(); 18557 } 18558 18559 18560 TEST(system_ic) { 18561 SETUP(); 18562 const char* msg = "IC test!"; 18563 uintptr_t msg_addr = reinterpret_cast<uintptr_t>(msg); 18564 18565 START(); 18566 __ Mov(x11, msg_addr); 18567 __ Ic(IVAU, x11); 18568 END(); 18569 18570 RUN(); 18571 18572 TEARDOWN(); 18573 } 18574 18575 18576 TEST(system_dc) { 18577 SETUP(); 18578 const char* msg = "DC test!"; 18579 uintptr_t msg_addr = reinterpret_cast<uintptr_t>(msg); 18580 18581 START(); 18582 __ Mov(x20, msg_addr); 18583 __ Dc(CVAC, x20); 18584 __ Mov(x21, x20); 18585 __ Dc(CVAU, x21); 18586 __ Mov(x22, x21); 18587 __ Dc(CIVAC, x22); 18588 // TODO: Add tests to check ZVA. 18589 END(); 18590 18591 RUN(); 18592 18593 TEARDOWN(); 18594 } 18595 18596 18597 TEST(neon_2regmisc_xtn) { 18598 SETUP(); 18599 18600 START(); 18601 18602 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 18603 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 18604 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 18605 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 18606 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 18607 18608 __ Xtn(v16.V8B(), v0.V8H()); 18609 __ Xtn2(v16.V16B(), v1.V8H()); 18610 __ Xtn(v17.V4H(), v1.V4S()); 18611 __ Xtn2(v17.V8H(), v2.V4S()); 18612 __ Xtn(v18.V2S(), v3.V2D()); 18613 __ Xtn2(v18.V4S(), v4.V2D()); 18614 18615 END(); 18616 18617 RUN(); 18618 ASSERT_EQUAL_128(0x0001ff00ff0001ff, 0x01ff800181007f81, q16); 18619 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8001000000007fff, q17); 18620 ASSERT_EQUAL_128(0x0000000000000000, 0x00000001ffffffff, q18); 18621 TEARDOWN(); 18622 } 18623 18624 18625 TEST(neon_2regmisc_sqxtn) { 18626 SETUP(); 18627 18628 START(); 18629 18630 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081); 18631 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 18632 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 18633 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 18634 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 18635 18636 __ Sqxtn(v16.V8B(), v0.V8H()); 18637 __ Sqxtn2(v16.V16B(), v1.V8H()); 18638 __ Sqxtn(v17.V4H(), v1.V4S()); 18639 __ Sqxtn2(v17.V8H(), v2.V4S()); 18640 __ Sqxtn(v18.V2S(), v3.V2D()); 18641 __ Sqxtn2(v18.V4S(), v4.V2D()); 18642 __ Sqxtn(b19, h0); 18643 __ Sqxtn(h20, s0); 18644 __ Sqxtn(s21, d0); 18645 18646 END(); 18647 18648 RUN(); 18649 ASSERT_EQUAL_128(0x8080ff00ff00017f, 0x7f7a807f80807f80, q16); 18650 ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000800080007fff, q17); 18651 ASSERT_EQUAL_128(0x8000000000000000, 0x800000007fffffff, q18); 18652 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19); 18653 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000007fff, q20); 18654 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21); 18655 TEARDOWN(); 18656 } 18657 18658 18659 TEST(neon_2regmisc_uqxtn) { 18660 SETUP(); 18661 18662 START(); 18663 18664 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081); 18665 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 18666 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 18667 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 18668 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 18669 18670 __ Uqxtn(v16.V8B(), v0.V8H()); 18671 __ Uqxtn2(v16.V16B(), v1.V8H()); 18672 __ Uqxtn(v17.V4H(), v1.V4S()); 18673 __ Uqxtn2(v17.V8H(), v2.V4S()); 18674 __ Uqxtn(v18.V2S(), v3.V2D()); 18675 __ Uqxtn2(v18.V4S(), v4.V2D()); 18676 __ Uqxtn(b19, h0); 18677 __ Uqxtn(h20, s0); 18678 __ Uqxtn(s21, d0); 18679 18680 END(); 18681 18682 RUN(); 18683 ASSERT_EQUAL_128(0xffffff00ff0001ff, 0xff7affffffffffff, q16); 18684 ASSERT_EQUAL_128(0xffffffff0000ffff, 0xffffffffffffffff, q17); 18685 ASSERT_EQUAL_128(0xffffffff00000000, 0xffffffffffffffff, q18); 18686 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000ff, q19); 18687 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20); 18688 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q21); 18689 TEARDOWN(); 18690 } 18691 18692 18693 TEST(neon_2regmisc_sqxtun) { 18694 SETUP(); 18695 18696 START(); 18697 18698 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081); 18699 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 18700 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 18701 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 18702 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 18703 18704 __ Sqxtun(v16.V8B(), v0.V8H()); 18705 __ Sqxtun2(v16.V16B(), v1.V8H()); 18706 __ Sqxtun(v17.V4H(), v1.V4S()); 18707 __ Sqxtun2(v17.V8H(), v2.V4S()); 18708 __ Sqxtun(v18.V2S(), v3.V2D()); 18709 __ Sqxtun2(v18.V4S(), v4.V2D()); 18710 __ Sqxtun(b19, h0); 18711 __ Sqxtun(h20, s0); 18712 __ Sqxtun(s21, d0); 18713 18714 END(); 18715 18716 RUN(); 18717 ASSERT_EQUAL_128(0x00000000000001ff, 0xff7a00ff0000ff00, q16); 18718 ASSERT_EQUAL_128(0x000000000000ffff, 0x000000000000ffff, q17); 18719 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18); 18720 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19); 18721 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20); 18722 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q21); 18723 TEARDOWN(); 18724 } 18725 18726 TEST(neon_3same_and) { 18727 SETUP(); 18728 18729 START(); 18730 18731 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 18732 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff); 18733 18734 __ And(v16.V16B(), v0.V16B(), v0.V16B()); // self test 18735 __ And(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations 18736 __ And(v24.V8B(), v0.V8B(), v0.V8B()); // self test 18737 __ And(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations 18738 END(); 18739 18740 RUN(); 18741 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16); 18742 ASSERT_EQUAL_128(0x0000000000555500, 0xaa00aa00005500aa, q17); 18743 ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24); 18744 ASSERT_EQUAL_128(0, 0xaa00aa00005500aa, q25); 18745 TEARDOWN(); 18746 } 18747 18748 TEST(neon_3same_bic) { 18749 SETUP(); 18750 18751 START(); 18752 18753 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 18754 __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff); 18755 18756 __ Bic(v16.V16B(), v0.V16B(), v0.V16B()); // self test 18757 __ Bic(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations 18758 __ Bic(v24.V8B(), v0.V8B(), v0.V8B()); // self test 18759 __ Bic(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations 18760 END(); 18761 18762 RUN(); 18763 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16); 18764 ASSERT_EQUAL_128(0xff00005500aa5500, 0x0000aa0000005500, q17); 18765 ASSERT_EQUAL_128(0, 0x0000000000000000, q24); 18766 ASSERT_EQUAL_128(0, 0x0000aa0000005500, q25); 18767 TEARDOWN(); 18768 } 18769 18770 TEST(neon_3same_orr) { 18771 SETUP(); 18772 18773 START(); 18774 18775 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 18776 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff); 18777 18778 __ Orr(v16.V16B(), v0.V16B(), v0.V16B()); // self test 18779 __ Orr(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations 18780 __ Orr(v24.V8B(), v0.V8B(), v0.V8B()); // self test 18781 __ Orr(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations 18782 END(); 18783 18784 RUN(); 18785 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16); 18786 ASSERT_EQUAL_128(0xffaaffffffffffaa, 0xff55ff5555ff55ff, q17); 18787 ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24); 18788 ASSERT_EQUAL_128(0, 0xff55ff5555ff55ff, q25); 18789 TEARDOWN(); 18790 } 18791 18792 TEST(neon_3same_mov) { 18793 SETUP(); 18794 18795 START(); 18796 18797 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 18798 18799 __ Mov(v16.V16B(), v0.V16B()); 18800 __ Mov(v17.V8H(), v0.V8H()); 18801 __ Mov(v18.V4S(), v0.V4S()); 18802 __ Mov(v19.V2D(), v0.V2D()); 18803 18804 __ Mov(v24.V8B(), v0.V8B()); 18805 __ Mov(v25.V4H(), v0.V4H()); 18806 __ Mov(v26.V2S(), v0.V2S()); 18807 END(); 18808 18809 RUN(); 18810 18811 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16); 18812 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17); 18813 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q18); 18814 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q19); 18815 18816 ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q24); 18817 ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q25); 18818 ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q26); 18819 18820 TEARDOWN(); 18821 } 18822 18823 TEST(neon_3same_orn) { 18824 SETUP(); 18825 18826 START(); 18827 18828 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 18829 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff); 18830 18831 __ Orn(v16.V16B(), v0.V16B(), v0.V16B()); // self test 18832 __ Orn(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations 18833 __ Orn(v24.V8B(), v0.V8B(), v0.V8B()); // self test 18834 __ Orn(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations 18835 END(); 18836 18837 RUN(); 18838 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16); 18839 ASSERT_EQUAL_128(0xff55aa5500ff55ff, 0xffaaaaffaaffffaa, q17); 18840 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q24); 18841 ASSERT_EQUAL_128(0, 0xffaaaaffaaffffaa, q25); 18842 TEARDOWN(); 18843 } 18844 18845 TEST(neon_3same_eor) { 18846 SETUP(); 18847 18848 START(); 18849 18850 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa); 18851 __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff); 18852 18853 __ Eor(v16.V16B(), v0.V16B(), v0.V16B()); // self test 18854 __ Eor(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations 18855 __ Eor(v24.V8B(), v0.V8B(), v0.V8B()); // self test 18856 __ Eor(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations 18857 END(); 18858 18859 RUN(); 18860 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16); 18861 ASSERT_EQUAL_128(0xffff0055aaaaff55, 0x00ffaa0000005555, q17); 18862 ASSERT_EQUAL_128(0, 0x0000000000000000, q24); 18863 ASSERT_EQUAL_128(0, 0x00ffaa0000005555, q25); 18864 TEARDOWN(); 18865 } 18866 18867 TEST(neon_3same_bif) { 18868 SETUP(); 18869 18870 START(); 18871 18872 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa); 18873 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa); 18874 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa); 18875 18876 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0); 18877 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00); 18878 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00); 18879 18880 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f); 18881 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0); 18882 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00); 18883 18884 __ Bif(v16.V16B(), v0.V16B(), v1.V16B()); 18885 __ Bif(v17.V16B(), v2.V16B(), v3.V16B()); 18886 __ Bif(v18.V8B(), v4.V8B(), v5.V8B()); 18887 END(); 18888 18889 RUN(); 18890 18891 ASSERT_EQUAL_128(0xffffff00ff0055ff, 0xffaa0055aa00aaaa, q16); 18892 ASSERT_EQUAL_128(0x5555ffffffcccc00, 0xff333300fff0f000, q17); 18893 ASSERT_EQUAL_128(0, 0xf0f0f0f0f00f0ff0, q18); 18894 TEARDOWN(); 18895 } 18896 18897 TEST(neon_3same_bit) { 18898 SETUP(); 18899 18900 START(); 18901 18902 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa); 18903 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa); 18904 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa); 18905 18906 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0); 18907 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00); 18908 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00); 18909 18910 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f); 18911 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0); 18912 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00); 18913 18914 __ Bit(v16.V16B(), v0.V16B(), v1.V16B()); 18915 __ Bit(v17.V16B(), v2.V16B(), v3.V16B()); 18916 __ Bit(v18.V8B(), v4.V8B(), v5.V8B()); 18917 END(); 18918 18919 RUN(); 18920 18921 ASSERT_EQUAL_128(0xff000000ff00ff55, 0xaaff550000aaaaaa, q16); 18922 ASSERT_EQUAL_128(0x55550000cc00ffcc, 0x3300ff33f000fff0, q17); 18923 ASSERT_EQUAL_128(0, 0xf0f0f0f00ff0f00f, q18); 18924 TEARDOWN(); 18925 } 18926 18927 TEST(neon_3same_bsl) { 18928 SETUP(); 18929 18930 START(); 18931 18932 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa); 18933 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa); 18934 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa); 18935 18936 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0); 18937 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00); 18938 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00); 18939 18940 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f); 18941 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0); 18942 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00); 18943 18944 __ Bsl(v16.V16B(), v0.V16B(), v1.V16B()); 18945 __ Bsl(v17.V16B(), v2.V16B(), v3.V16B()); 18946 __ Bsl(v18.V8B(), v4.V8B(), v5.V8B()); 18947 END(); 18948 18949 RUN(); 18950 18951 ASSERT_EQUAL_128(0xff0000ffff005555, 0xaaaa55aa55aaffaa, q16); 18952 ASSERT_EQUAL_128(0xff550000cc33ff00, 0x33ccff00f00fff00, q17); 18953 ASSERT_EQUAL_128(0, 0xf0fffff000f0f000, q18); 18954 TEARDOWN(); 18955 } 18956 18957 18958 TEST(neon_3same_smax) { 18959 SETUP(); 18960 18961 START(); 18962 18963 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55); 18964 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff); 18965 18966 __ Smax(v16.V8B(), v0.V8B(), v1.V8B()); 18967 __ Smax(v18.V4H(), v0.V4H(), v1.V4H()); 18968 __ Smax(v20.V2S(), v0.V2S(), v1.V2S()); 18969 18970 __ Smax(v17.V16B(), v0.V16B(), v1.V16B()); 18971 __ Smax(v19.V8H(), v0.V8H(), v1.V8H()); 18972 __ Smax(v21.V4S(), v0.V4S(), v1.V4S()); 18973 END(); 18974 18975 RUN(); 18976 18977 ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16); 18978 ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18); 18979 ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20); 18980 ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17); 18981 ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19); 18982 ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21); 18983 TEARDOWN(); 18984 } 18985 18986 18987 TEST(neon_3same_smaxp) { 18988 SETUP(); 18989 18990 START(); 18991 18992 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55); 18993 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff); 18994 18995 __ Smaxp(v16.V8B(), v0.V8B(), v1.V8B()); 18996 __ Smaxp(v18.V4H(), v0.V4H(), v1.V4H()); 18997 __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S()); 18998 18999 __ Smaxp(v17.V16B(), v0.V16B(), v1.V16B()); 19000 __ Smaxp(v19.V8H(), v0.V8H(), v1.V8H()); 19001 __ Smaxp(v21.V4S(), v0.V4S(), v1.V4S()); 19002 END(); 19003 19004 RUN(); 19005 19006 ASSERT_EQUAL_128(0x0, 0x0000ff55ffff0055, q16); 19007 ASSERT_EQUAL_128(0x0, 0x000055ffffff0000, q18); 19008 ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20); 19009 ASSERT_EQUAL_128(0x5555aaaa0000ff55, 0xaaaa5555ffff0055, q17); 19010 ASSERT_EQUAL_128(0x55aaaaaa000055ff, 0xaaaa5555ffff0000, q19); 19011 ASSERT_EQUAL_128(0x55aa555500000000, 0x555555550000aa55, q21); 19012 TEARDOWN(); 19013 } 19014 19015 19016 TEST(neon_addp_scalar) { 19017 SETUP(); 19018 19019 START(); 19020 19021 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00); 19022 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff); 19023 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00); 19024 19025 __ Addp(d16, v0.V2D()); 19026 __ Addp(d17, v1.V2D()); 19027 __ Addp(d18, v2.V2D()); 19028 19029 END(); 19030 19031 RUN(); 19032 19033 ASSERT_EQUAL_128(0x0, 0x00224466ef66fa80, q16); 19034 ASSERT_EQUAL_128(0x0, 0x55aa5556aa5500a9, q17); 19035 ASSERT_EQUAL_128(0x0, 0xaaaaaaa96655ff55, q18); 19036 TEARDOWN(); 19037 } 19038 19039 TEST(neon_acrosslanes_addv) { 19040 SETUP(); 19041 19042 START(); 19043 19044 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00); 19045 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff); 19046 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00); 19047 19048 __ Addv(b16, v0.V8B()); 19049 __ Addv(b17, v0.V16B()); 19050 __ Addv(h18, v1.V4H()); 19051 __ Addv(h19, v1.V8H()); 19052 __ Addv(s20, v2.V4S()); 19053 19054 END(); 19055 19056 RUN(); 19057 19058 ASSERT_EQUAL_128(0x0, 0xc7, q16); 19059 ASSERT_EQUAL_128(0x0, 0x99, q17); 19060 ASSERT_EQUAL_128(0x0, 0x55a9, q18); 19061 ASSERT_EQUAL_128(0x0, 0x55fc, q19); 19062 ASSERT_EQUAL_128(0x0, 0x1100a9fe, q20); 19063 TEARDOWN(); 19064 } 19065 19066 19067 TEST(neon_acrosslanes_saddlv) { 19068 SETUP(); 19069 19070 START(); 19071 19072 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00); 19073 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff); 19074 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00); 19075 19076 __ Saddlv(h16, v0.V8B()); 19077 __ Saddlv(h17, v0.V16B()); 19078 __ Saddlv(s18, v1.V4H()); 19079 __ Saddlv(s19, v1.V8H()); 19080 __ Saddlv(d20, v2.V4S()); 19081 19082 END(); 19083 19084 RUN(); 19085 19086 ASSERT_EQUAL_128(0x0, 0xffc7, q16); 19087 ASSERT_EQUAL_128(0x0, 0xff99, q17); 19088 ASSERT_EQUAL_128(0x0, 0x000055a9, q18); 19089 ASSERT_EQUAL_128(0x0, 0x000055fc, q19); 19090 ASSERT_EQUAL_128(0x0, 0x0000001100a9fe, q20); 19091 TEARDOWN(); 19092 } 19093 19094 19095 TEST(neon_acrosslanes_uaddlv) { 19096 SETUP(); 19097 19098 START(); 19099 19100 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00); 19101 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff); 19102 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00); 19103 19104 __ Uaddlv(h16, v0.V8B()); 19105 __ Uaddlv(h17, v0.V16B()); 19106 __ Uaddlv(s18, v1.V4H()); 19107 __ Uaddlv(s19, v1.V8H()); 19108 __ Uaddlv(d20, v2.V4S()); 19109 19110 END(); 19111 19112 RUN(); 19113 19114 ASSERT_EQUAL_128(0x0, 0x02c7, q16); 19115 ASSERT_EQUAL_128(0x0, 0x0599, q17); 19116 ASSERT_EQUAL_128(0x0, 0x000155a9, q18); 19117 ASSERT_EQUAL_128(0x0, 0x000355fc, q19); 19118 ASSERT_EQUAL_128(0x0, 0x000000021100a9fe, q20); 19119 TEARDOWN(); 19120 } 19121 19122 19123 TEST(neon_acrosslanes_smaxv) { 19124 SETUP(); 19125 19126 START(); 19127 19128 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00); 19129 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff); 19130 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00); 19131 19132 __ Smaxv(b16, v0.V8B()); 19133 __ Smaxv(b17, v0.V16B()); 19134 __ Smaxv(h18, v1.V4H()); 19135 __ Smaxv(h19, v1.V8H()); 19136 __ Smaxv(s20, v2.V4S()); 19137 19138 END(); 19139 19140 RUN(); 19141 19142 ASSERT_EQUAL_128(0x0, 0x33, q16); 19143 ASSERT_EQUAL_128(0x0, 0x44, q17); 19144 ASSERT_EQUAL_128(0x0, 0x55ff, q18); 19145 ASSERT_EQUAL_128(0x0, 0x55ff, q19); 19146 ASSERT_EQUAL_128(0x0, 0x66555555, q20); 19147 TEARDOWN(); 19148 } 19149 19150 19151 TEST(neon_acrosslanes_sminv) { 19152 SETUP(); 19153 19154 START(); 19155 19156 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00); 19157 __ Movi(v1.V2D(), 0xfffa5555aaaaaaaa, 0x00000000ffaa55ff); 19158 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00); 19159 19160 __ Sminv(b16, v0.V8B()); 19161 __ Sminv(b17, v0.V16B()); 19162 __ Sminv(h18, v1.V4H()); 19163 __ Sminv(h19, v1.V8H()); 19164 __ Sminv(s20, v2.V4S()); 19165 19166 END(); 19167 19168 RUN(); 19169 19170 ASSERT_EQUAL_128(0x0, 0xaa, q16); 19171 ASSERT_EQUAL_128(0x0, 0x80, q17); 19172 ASSERT_EQUAL_128(0x0, 0xffaa, q18); 19173 ASSERT_EQUAL_128(0x0, 0xaaaa, q19); 19174 ASSERT_EQUAL_128(0x0, 0xaaaaaaaa, q20); 19175 TEARDOWN(); 19176 } 19177 19178 TEST(neon_acrosslanes_umaxv) { 19179 SETUP(); 19180 19181 START(); 19182 19183 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00); 19184 __ Movi(v1.V2D(), 0x55aa5555aaaaffab, 0x00000000ffaa55ff); 19185 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00); 19186 19187 __ Umaxv(b16, v0.V8B()); 19188 __ Umaxv(b17, v0.V16B()); 19189 __ Umaxv(h18, v1.V4H()); 19190 __ Umaxv(h19, v1.V8H()); 19191 __ Umaxv(s20, v2.V4S()); 19192 19193 END(); 19194 19195 RUN(); 19196 19197 ASSERT_EQUAL_128(0x0, 0xfc, q16); 19198 ASSERT_EQUAL_128(0x0, 0xfe, q17); 19199 ASSERT_EQUAL_128(0x0, 0xffaa, q18); 19200 ASSERT_EQUAL_128(0x0, 0xffab, q19); 19201 ASSERT_EQUAL_128(0x0, 0xffffffff, q20); 19202 TEARDOWN(); 19203 } 19204 19205 19206 TEST(neon_acrosslanes_uminv) { 19207 SETUP(); 19208 19209 START(); 19210 19211 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x02112233aabbfc01); 19212 __ Movi(v1.V2D(), 0xfffa5555aaaa0000, 0x00010003ffaa55ff); 19213 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00); 19214 19215 __ Uminv(b16, v0.V8B()); 19216 __ Uminv(b17, v0.V16B()); 19217 __ Uminv(h18, v1.V4H()); 19218 __ Uminv(h19, v1.V8H()); 19219 __ Uminv(s20, v2.V4S()); 19220 19221 END(); 19222 19223 RUN(); 19224 19225 ASSERT_EQUAL_128(0x0, 0x01, q16); 19226 ASSERT_EQUAL_128(0x0, 0x00, q17); 19227 ASSERT_EQUAL_128(0x0, 0x0001, q18); 19228 ASSERT_EQUAL_128(0x0, 0x0000, q19); 19229 ASSERT_EQUAL_128(0x0, 0x0000aa00, q20); 19230 TEARDOWN(); 19231 } 19232 19233 19234 TEST(neon_3same_smin) { 19235 SETUP(); 19236 19237 START(); 19238 19239 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55); 19240 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff); 19241 19242 __ Smin(v16.V8B(), v0.V8B(), v1.V8B()); 19243 __ Smin(v18.V4H(), v0.V4H(), v1.V4H()); 19244 __ Smin(v20.V2S(), v0.V2S(), v1.V2S()); 19245 19246 __ Smin(v17.V16B(), v0.V16B(), v1.V16B()); 19247 __ Smin(v19.V8H(), v0.V8H(), v1.V8H()); 19248 __ Smin(v21.V4S(), v0.V4S(), v1.V4S()); 19249 END(); 19250 19251 RUN(); 19252 19253 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16); 19254 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18); 19255 ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20); 19256 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17); 19257 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19); 19258 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21); 19259 TEARDOWN(); 19260 } 19261 19262 19263 TEST(neon_3same_umax) { 19264 SETUP(); 19265 19266 START(); 19267 19268 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55); 19269 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff); 19270 19271 __ Umax(v16.V8B(), v0.V8B(), v1.V8B()); 19272 __ Umax(v18.V4H(), v0.V4H(), v1.V4H()); 19273 __ Umax(v20.V2S(), v0.V2S(), v1.V2S()); 19274 19275 __ Umax(v17.V16B(), v0.V16B(), v1.V16B()); 19276 __ Umax(v19.V8H(), v0.V8H(), v1.V8H()); 19277 __ Umax(v21.V4S(), v0.V4S(), v1.V4S()); 19278 END(); 19279 19280 RUN(); 19281 19282 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16); 19283 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18); 19284 ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20); 19285 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17); 19286 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19); 19287 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21); 19288 TEARDOWN(); 19289 } 19290 19291 19292 TEST(neon_3same_umin) { 19293 SETUP(); 19294 19295 START(); 19296 19297 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55); 19298 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff); 19299 19300 __ Umin(v16.V8B(), v0.V8B(), v1.V8B()); 19301 __ Umin(v18.V4H(), v0.V4H(), v1.V4H()); 19302 __ Umin(v20.V2S(), v0.V2S(), v1.V2S()); 19303 19304 __ Umin(v17.V16B(), v0.V16B(), v1.V16B()); 19305 __ Umin(v19.V8H(), v0.V8H(), v1.V8H()); 19306 __ Umin(v21.V4S(), v0.V4S(), v1.V4S()); 19307 END(); 19308 19309 RUN(); 19310 19311 ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16); 19312 ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18); 19313 ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20); 19314 ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17); 19315 ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19); 19316 ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21); 19317 TEARDOWN(); 19318 } 19319 19320 19321 TEST(neon_2regmisc_mvn) { 19322 SETUP(); 19323 19324 START(); 19325 19326 __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa); 19327 19328 __ Mvn(v16.V16B(), v0.V16B()); 19329 __ Mvn(v17.V8H(), v0.V8H()); 19330 __ Mvn(v18.V4S(), v0.V4S()); 19331 __ Mvn(v19.V2D(), v0.V2D()); 19332 19333 __ Mvn(v24.V8B(), v0.V8B()); 19334 __ Mvn(v25.V4H(), v0.V4H()); 19335 __ Mvn(v26.V2S(), v0.V2S()); 19336 19337 END(); 19338 19339 RUN(); 19340 19341 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16); 19342 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q17); 19343 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q18); 19344 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q19); 19345 19346 ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q24); 19347 ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q25); 19348 ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q26); 19349 TEARDOWN(); 19350 } 19351 19352 19353 TEST(neon_2regmisc_not) { 19354 SETUP(); 19355 19356 START(); 19357 19358 __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa); 19359 __ Movi(v1.V2D(), 0, 0x00ffff0000ffff00); 19360 19361 __ Not(v16.V16B(), v0.V16B()); 19362 __ Not(v17.V8B(), v1.V8B()); 19363 END(); 19364 19365 RUN(); 19366 19367 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16); 19368 ASSERT_EQUAL_128(0x0, 0xff0000ffff0000ff, q17); 19369 TEARDOWN(); 19370 } 19371 19372 TEST(neon_2regmisc_cls_clz_cnt) { 19373 SETUP(); 19374 19375 START(); 19376 19377 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f); 19378 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef); 19379 19380 __ Cls(v16.V8B(), v1.V8B()); 19381 __ Cls(v17.V16B(), v1.V16B()); 19382 __ Cls(v18.V4H(), v1.V4H()); 19383 __ Cls(v19.V8H(), v1.V8H()); 19384 __ Cls(v20.V2S(), v1.V2S()); 19385 __ Cls(v21.V4S(), v1.V4S()); 19386 19387 __ Clz(v22.V8B(), v0.V8B()); 19388 __ Clz(v23.V16B(), v0.V16B()); 19389 __ Clz(v24.V4H(), v0.V4H()); 19390 __ Clz(v25.V8H(), v0.V8H()); 19391 __ Clz(v26.V2S(), v0.V2S()); 19392 __ Clz(v27.V4S(), v0.V4S()); 19393 19394 __ Cnt(v28.V8B(), v0.V8B()); 19395 __ Cnt(v29.V16B(), v1.V16B()); 19396 19397 END(); 19398 19399 RUN(); 19400 19401 ASSERT_EQUAL_128(0x0000000000000000, 0x0601000000000102, q16); 19402 ASSERT_EQUAL_128(0x0601000000000102, 0x0601000000000102, q17); 19403 ASSERT_EQUAL_128(0x0000000000000000, 0x0006000000000001, q18); 19404 ASSERT_EQUAL_128(0x0006000000000001, 0x0006000000000001, q19); 19405 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000600000000, q20); 19406 ASSERT_EQUAL_128(0x0000000600000000, 0x0000000600000000, q21); 19407 19408 ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q22); 19409 ASSERT_EQUAL_128(0x0807060605050505, 0x0404040404040404, q23); 19410 ASSERT_EQUAL_128(0x0000000000000000, 0x0004000400040004, q24); 19411 ASSERT_EQUAL_128(0x000f000600050005, 0x0004000400040004, q25); 19412 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000400000004, q26); 19413 ASSERT_EQUAL_128(0x0000000f00000005, 0x0000000400000004, q27); 19414 19415 ASSERT_EQUAL_128(0x0000000000000000, 0x0102020302030304, q28); 19416 ASSERT_EQUAL_128(0x0705050305030301, 0x0103030503050507, q29); 19417 19418 TEARDOWN(); 19419 } 19420 19421 TEST(neon_2regmisc_rev) { 19422 SETUP(); 19423 19424 START(); 19425 19426 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f); 19427 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef); 19428 19429 __ Rev16(v16.V8B(), v0.V8B()); 19430 __ Rev16(v17.V16B(), v0.V16B()); 19431 19432 __ Rev32(v18.V8B(), v0.V8B()); 19433 __ Rev32(v19.V16B(), v0.V16B()); 19434 __ Rev32(v20.V4H(), v0.V4H()); 19435 __ Rev32(v21.V8H(), v0.V8H()); 19436 19437 __ Rev64(v22.V8B(), v0.V8B()); 19438 __ Rev64(v23.V16B(), v0.V16B()); 19439 __ Rev64(v24.V4H(), v0.V4H()); 19440 __ Rev64(v25.V8H(), v0.V8H()); 19441 __ Rev64(v26.V2S(), v0.V2S()); 19442 __ Rev64(v27.V4S(), v0.V4S()); 19443 19444 __ Rbit(v28.V8B(), v1.V8B()); 19445 __ Rbit(v29.V16B(), v1.V16B()); 19446 19447 END(); 19448 19449 RUN(); 19450 19451 ASSERT_EQUAL_128(0x0000000000000000, 0x09080b0a0d0c0f0e, q16); 19452 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q17); 19453 19454 ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a09080f0e0d0c, q18); 19455 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q19); 19456 ASSERT_EQUAL_128(0x0000000000000000, 0x0a0b08090e0f0c0d, q20); 19457 ASSERT_EQUAL_128(0x0203000106070405, 0x0a0b08090e0f0c0d, q21); 19458 19459 ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0b0a0908, q22); 19460 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q23); 19461 ASSERT_EQUAL_128(0x0000000000000000, 0x0e0f0c0d0a0b0809, q24); 19462 ASSERT_EQUAL_128(0x0607040502030001, 0x0e0f0c0d0a0b0809, q25); 19463 ASSERT_EQUAL_128(0x0000000000000000, 0x0c0d0e0f08090a0b, q26); 19464 ASSERT_EQUAL_128(0x0405060700010203, 0x0c0d0e0f08090a0b, q27); 19465 19466 ASSERT_EQUAL_128(0x0000000000000000, 0x80c4a2e691d5b3f7, q28); 19467 ASSERT_EQUAL_128(0x7f3b5d196e2a4c08, 0x80c4a2e691d5b3f7, q29); 19468 19469 TEARDOWN(); 19470 } 19471 19472 19473 TEST(neon_sli) { 19474 SETUP(); 19475 19476 START(); 19477 19478 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f); 19479 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef); 19480 19481 __ Mov(v16.V2D(), v0.V2D()); 19482 __ Mov(v17.V2D(), v0.V2D()); 19483 __ Mov(v18.V2D(), v0.V2D()); 19484 __ Mov(v19.V2D(), v0.V2D()); 19485 __ Mov(v20.V2D(), v0.V2D()); 19486 __ Mov(v21.V2D(), v0.V2D()); 19487 __ Mov(v22.V2D(), v0.V2D()); 19488 __ Mov(v23.V2D(), v0.V2D()); 19489 19490 __ Sli(v16.V8B(), v1.V8B(), 4); 19491 __ Sli(v17.V16B(), v1.V16B(), 7); 19492 __ Sli(v18.V4H(), v1.V4H(), 8); 19493 __ Sli(v19.V8H(), v1.V8H(), 15); 19494 __ Sli(v20.V2S(), v1.V2S(), 0); 19495 __ Sli(v21.V4S(), v1.V4S(), 31); 19496 __ Sli(v22.V2D(), v1.V2D(), 48); 19497 19498 __ Sli(d23, d1, 48); 19499 19500 END(); 19501 19502 RUN(); 19503 19504 ASSERT_EQUAL_128(0x0000000000000000, 0x18395a7b9cbddeff, q16); 19505 ASSERT_EQUAL_128(0x0001020304050607, 0x88898a8b8c8d8e8f, q17); 19506 ASSERT_EQUAL_128(0x0000000000000000, 0x2309670bab0def0f, q18); 19507 ASSERT_EQUAL_128(0x0001020304050607, 0x88098a0b8c0d8e0f, q19); 19508 ASSERT_EQUAL_128(0x0000000000000000, 0x0123456789abcdef, q20); 19509 ASSERT_EQUAL_128(0x0001020304050607, 0x88090a0b8c0d0e0f, q21); 19510 ASSERT_EQUAL_128(0x3210020304050607, 0xcdef0a0b0c0d0e0f, q22); 19511 19512 ASSERT_EQUAL_128(0x0000000000000000, 0xcdef0a0b0c0d0e0f, q23); 19513 19514 19515 TEARDOWN(); 19516 } 19517 19518 19519 TEST(neon_sri) { 19520 SETUP(); 19521 19522 START(); 19523 19524 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f); 19525 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef); 19526 19527 __ Mov(v16.V2D(), v0.V2D()); 19528 __ Mov(v17.V2D(), v0.V2D()); 19529 __ Mov(v18.V2D(), v0.V2D()); 19530 __ Mov(v19.V2D(), v0.V2D()); 19531 __ Mov(v20.V2D(), v0.V2D()); 19532 __ Mov(v21.V2D(), v0.V2D()); 19533 __ Mov(v22.V2D(), v0.V2D()); 19534 __ Mov(v23.V2D(), v0.V2D()); 19535 19536 __ Sri(v16.V8B(), v1.V8B(), 4); 19537 __ Sri(v17.V16B(), v1.V16B(), 7); 19538 __ Sri(v18.V4H(), v1.V4H(), 8); 19539 __ Sri(v19.V8H(), v1.V8H(), 15); 19540 __ Sri(v20.V2S(), v1.V2S(), 1); 19541 __ Sri(v21.V4S(), v1.V4S(), 31); 19542 __ Sri(v22.V2D(), v1.V2D(), 48); 19543 19544 __ Sri(d23, d1, 48); 19545 19546 END(); 19547 19548 RUN(); 19549 19550 ASSERT_EQUAL_128(0x0000000000000000, 0x00020406080a0c0e, q16); 19551 ASSERT_EQUAL_128(0x0101030304040606, 0x08080a0a0d0d0f0f, q17); 19552 ASSERT_EQUAL_128(0x0000000000000000, 0x08010a450c890ecd, q18); 19553 ASSERT_EQUAL_128(0x0001020304040606, 0x08080a0a0c0d0e0f, q19); 19554 ASSERT_EQUAL_128(0x0000000000000000, 0x0091a2b344d5e6f7, q20); 19555 ASSERT_EQUAL_128(0x0001020304050606, 0x08090a0a0c0d0e0f, q21); 19556 ASSERT_EQUAL_128(0x000102030405fedc, 0x08090a0b0c0d0123, q22); 19557 19558 ASSERT_EQUAL_128(0x0000000000000000, 0x08090a0b0c0d0123, q23); 19559 19560 19561 TEARDOWN(); 19562 } 19563 19564 19565 TEST(neon_shrn) { 19566 SETUP(); 19567 19568 START(); 19569 19570 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 19571 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 19572 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 19573 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 19574 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 19575 19576 __ Shrn(v16.V8B(), v0.V8H(), 8); 19577 __ Shrn2(v16.V16B(), v1.V8H(), 1); 19578 __ Shrn(v17.V4H(), v1.V4S(), 16); 19579 __ Shrn2(v17.V8H(), v2.V4S(), 1); 19580 __ Shrn(v18.V2S(), v3.V2D(), 32); 19581 __ Shrn2(v18.V4S(), v3.V2D(), 1); 19582 19583 END(); 19584 19585 RUN(); 19586 ASSERT_EQUAL_128(0x0000ff00ff0000ff, 0x7f00817f80ff0180, q16); 19587 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8000ffffffff0001, q17); 19588 ASSERT_EQUAL_128(0x00000000ffffffff, 0x800000007fffffff, q18); 19589 TEARDOWN(); 19590 } 19591 19592 19593 TEST(neon_rshrn) { 19594 SETUP(); 19595 19596 START(); 19597 19598 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 19599 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 19600 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 19601 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 19602 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 19603 19604 __ Rshrn(v16.V8B(), v0.V8H(), 8); 19605 __ Rshrn2(v16.V16B(), v1.V8H(), 1); 19606 __ Rshrn(v17.V4H(), v1.V4S(), 16); 19607 __ Rshrn2(v17.V8H(), v2.V4S(), 1); 19608 __ Rshrn(v18.V2S(), v3.V2D(), 32); 19609 __ Rshrn2(v18.V4S(), v3.V2D(), 1); 19610 19611 END(); 19612 19613 RUN(); 19614 ASSERT_EQUAL_128(0x0001000000000100, 0x7f01827f81ff0181, q16); 19615 ASSERT_EQUAL_128(0x0000000000000000, 0x8001ffffffff0001, q17); 19616 ASSERT_EQUAL_128(0x0000000100000000, 0x8000000080000000, q18); 19617 TEARDOWN(); 19618 } 19619 19620 19621 TEST(neon_uqshrn) { 19622 SETUP(); 19623 19624 START(); 19625 19626 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 19627 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 19628 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 19629 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 19630 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 19631 19632 __ Uqshrn(v16.V8B(), v0.V8H(), 8); 19633 __ Uqshrn2(v16.V16B(), v1.V8H(), 1); 19634 __ Uqshrn(v17.V4H(), v1.V4S(), 16); 19635 __ Uqshrn2(v17.V8H(), v2.V4S(), 1); 19636 __ Uqshrn(v18.V2S(), v3.V2D(), 32); 19637 __ Uqshrn2(v18.V4S(), v3.V2D(), 1); 19638 19639 __ Uqshrn(b19, h0, 8); 19640 __ Uqshrn(h20, s1, 16); 19641 __ Uqshrn(s21, d3, 32); 19642 19643 END(); 19644 19645 RUN(); 19646 ASSERT_EQUAL_128(0xffffff00ff0000ff, 0x7f00817f80ff0180, q16); 19647 ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8000ffffffff0001, q17); 19648 ASSERT_EQUAL_128(0xffffffffffffffff, 0x800000007fffffff, q18); 19649 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19); 19650 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20); 19651 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21); 19652 TEARDOWN(); 19653 } 19654 19655 19656 TEST(neon_uqrshrn) { 19657 SETUP(); 19658 19659 START(); 19660 19661 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 19662 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 19663 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 19664 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 19665 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 19666 19667 __ Uqrshrn(v16.V8B(), v0.V8H(), 8); 19668 __ Uqrshrn2(v16.V16B(), v1.V8H(), 1); 19669 __ Uqrshrn(v17.V4H(), v1.V4S(), 16); 19670 __ Uqrshrn2(v17.V8H(), v2.V4S(), 1); 19671 __ Uqrshrn(v18.V2S(), v3.V2D(), 32); 19672 __ Uqrshrn2(v18.V4S(), v3.V2D(), 1); 19673 19674 __ Uqrshrn(b19, h0, 8); 19675 __ Uqrshrn(h20, s1, 16); 19676 __ Uqrshrn(s21, d3, 32); 19677 19678 END(); 19679 19680 RUN(); 19681 ASSERT_EQUAL_128(0xffffff00ff0001ff, 0x7f01827f81ff0181, q16); 19682 ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8001ffffffff0001, q17); 19683 ASSERT_EQUAL_128(0xffffffffffffffff, 0x8000000080000000, q18); 19684 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19); 19685 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20); 19686 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21); 19687 TEARDOWN(); 19688 } 19689 19690 19691 TEST(neon_sqshrn) { 19692 SETUP(); 19693 19694 START(); 19695 19696 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 19697 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 19698 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 19699 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 19700 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 19701 19702 __ Sqshrn(v16.V8B(), v0.V8H(), 8); 19703 __ Sqshrn2(v16.V16B(), v1.V8H(), 1); 19704 __ Sqshrn(v17.V4H(), v1.V4S(), 16); 19705 __ Sqshrn2(v17.V8H(), v2.V4S(), 1); 19706 __ Sqshrn(v18.V2S(), v3.V2D(), 32); 19707 __ Sqshrn2(v18.V4S(), v3.V2D(), 1); 19708 19709 __ Sqshrn(b19, h0, 8); 19710 __ Sqshrn(h20, s1, 16); 19711 __ Sqshrn(s21, d3, 32); 19712 19713 END(); 19714 19715 RUN(); 19716 ASSERT_EQUAL_128(0x8080ff00ff00007f, 0x7f00817f80ff0180, q16); 19717 ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000ffffffff0001, q17); 19718 ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18); 19719 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19); 19720 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20); 19721 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21); 19722 TEARDOWN(); 19723 } 19724 19725 19726 TEST(neon_sqrshrn) { 19727 SETUP(); 19728 19729 START(); 19730 19731 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 19732 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 19733 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 19734 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 19735 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 19736 19737 __ Sqrshrn(v16.V8B(), v0.V8H(), 8); 19738 __ Sqrshrn2(v16.V16B(), v1.V8H(), 1); 19739 __ Sqrshrn(v17.V4H(), v1.V4S(), 16); 19740 __ Sqrshrn2(v17.V8H(), v2.V4S(), 1); 19741 __ Sqrshrn(v18.V2S(), v3.V2D(), 32); 19742 __ Sqrshrn2(v18.V4S(), v3.V2D(), 1); 19743 19744 __ Sqrshrn(b19, h0, 8); 19745 __ Sqrshrn(h20, s1, 16); 19746 __ Sqrshrn(s21, d3, 32); 19747 19748 END(); 19749 19750 RUN(); 19751 ASSERT_EQUAL_128(0x808000000000017f, 0x7f01827f81ff0181, q16); 19752 ASSERT_EQUAL_128(0x8000000000007fff, 0x8001ffffffff0001, q17); 19753 ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18); 19754 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19); 19755 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20); 19756 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21); 19757 TEARDOWN(); 19758 } 19759 19760 19761 TEST(neon_sqshrun) { 19762 SETUP(); 19763 19764 START(); 19765 19766 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 19767 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 19768 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 19769 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 19770 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 19771 19772 __ Sqshrun(v16.V8B(), v0.V8H(), 8); 19773 __ Sqshrun2(v16.V16B(), v1.V8H(), 1); 19774 __ Sqshrun(v17.V4H(), v1.V4S(), 16); 19775 __ Sqshrun2(v17.V8H(), v2.V4S(), 1); 19776 __ Sqshrun(v18.V2S(), v3.V2D(), 32); 19777 __ Sqshrun2(v18.V4S(), v3.V2D(), 1); 19778 19779 __ Sqshrun(b19, h0, 8); 19780 __ Sqshrun(h20, s1, 16); 19781 __ Sqshrun(s21, d3, 32); 19782 19783 END(); 19784 19785 RUN(); 19786 ASSERT_EQUAL_128(0x00000000000000ff, 0x7f00007f00000100, q16); 19787 ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17); 19788 ASSERT_EQUAL_128(0x00000000ffffffff, 0x000000007fffffff, q18); 19789 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19); 19790 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20); 19791 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21); 19792 TEARDOWN(); 19793 } 19794 19795 19796 TEST(neon_sqrshrun) { 19797 SETUP(); 19798 19799 START(); 19800 19801 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 19802 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 19803 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 19804 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 19805 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 19806 19807 __ Sqrshrun(v16.V8B(), v0.V8H(), 8); 19808 __ Sqrshrun2(v16.V16B(), v1.V8H(), 1); 19809 __ Sqrshrun(v17.V4H(), v1.V4S(), 16); 19810 __ Sqrshrun2(v17.V8H(), v2.V4S(), 1); 19811 __ Sqrshrun(v18.V2S(), v3.V2D(), 32); 19812 __ Sqrshrun2(v18.V4S(), v3.V2D(), 1); 19813 19814 __ Sqrshrun(b19, h0, 8); 19815 __ Sqrshrun(h20, s1, 16); 19816 __ Sqrshrun(s21, d3, 32); 19817 19818 END(); 19819 19820 RUN(); 19821 ASSERT_EQUAL_128(0x00000000000001ff, 0x7f01007f00000100, q16); 19822 ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17); 19823 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000080000000, q18); 19824 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19); 19825 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20); 19826 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21); 19827 TEARDOWN(); 19828 } 19829 19830 TEST(neon_modimm_bic) { 19831 SETUP(); 19832 19833 START(); 19834 19835 __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19836 __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19837 __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19838 __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19839 __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19840 __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19841 __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19842 __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19843 __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19844 __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19845 __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19846 __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 19847 19848 __ Bic(v16.V4H(), 0x00, 0); 19849 __ Bic(v17.V4H(), 0xff, 8); 19850 __ Bic(v18.V8H(), 0x00, 0); 19851 __ Bic(v19.V8H(), 0xff, 8); 19852 19853 __ Bic(v20.V2S(), 0x00, 0); 19854 __ Bic(v21.V2S(), 0xff, 8); 19855 __ Bic(v22.V2S(), 0x00, 16); 19856 __ Bic(v23.V2S(), 0xff, 24); 19857 19858 __ Bic(v24.V4S(), 0xff, 0); 19859 __ Bic(v25.V4S(), 0x00, 8); 19860 __ Bic(v26.V4S(), 0xff, 16); 19861 __ Bic(v27.V4S(), 0x00, 24); 19862 19863 END(); 19864 19865 RUN(); 19866 19867 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16); 19868 ASSERT_EQUAL_128(0x0, 0x005500ff000000aa, q17); 19869 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18); 19870 ASSERT_EQUAL_128(0x00aa0055000000aa, 0x005500ff000000aa, q19); 19871 19872 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20); 19873 ASSERT_EQUAL_128(0x0, 0x555500ff000000aa, q21); 19874 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22); 19875 ASSERT_EQUAL_128(0x0, 0x0055ffff0000aaaa, q23); 19876 19877 ASSERT_EQUAL_128(0x00aaff00ff005500, 0x5555ff000000aa00, q24); 19878 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25); 19879 ASSERT_EQUAL_128(0x0000ff55ff0055aa, 0x5500ffff0000aaaa, q26); 19880 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27); 19881 19882 TEARDOWN(); 19883 } 19884 19885 19886 TEST(neon_modimm_movi_16bit_any) { 19887 SETUP(); 19888 19889 START(); 19890 19891 __ Movi(v0.V4H(), 0xabab); 19892 __ Movi(v1.V4H(), 0xab00); 19893 __ Movi(v2.V4H(), 0xabff); 19894 __ Movi(v3.V8H(), 0x00ab); 19895 __ Movi(v4.V8H(), 0xffab); 19896 __ Movi(v5.V8H(), 0xabcd); 19897 19898 END(); 19899 19900 RUN(); 19901 19902 ASSERT_EQUAL_128(0x0, 0xabababababababab, q0); 19903 ASSERT_EQUAL_128(0x0, 0xab00ab00ab00ab00, q1); 19904 ASSERT_EQUAL_128(0x0, 0xabffabffabffabff, q2); 19905 ASSERT_EQUAL_128(0x00ab00ab00ab00ab, 0x00ab00ab00ab00ab, q3); 19906 ASSERT_EQUAL_128(0xffabffabffabffab, 0xffabffabffabffab, q4); 19907 ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q5); 19908 19909 TEARDOWN(); 19910 } 19911 19912 19913 TEST(neon_modimm_movi_32bit_any) { 19914 SETUP(); 19915 19916 START(); 19917 19918 __ Movi(v0.V2S(), 0x000000ab); 19919 __ Movi(v1.V2S(), 0x0000ab00); 19920 __ Movi(v2.V4S(), 0x00ab0000); 19921 __ Movi(v3.V4S(), 0xab000000); 19922 19923 __ Movi(v4.V2S(), 0xffffffab); 19924 __ Movi(v5.V2S(), 0xffffabff); 19925 __ Movi(v6.V4S(), 0xffabffff); 19926 __ Movi(v7.V4S(), 0xabffffff); 19927 19928 __ Movi(v16.V2S(), 0x0000abff); 19929 __ Movi(v17.V2S(), 0x00abffff); 19930 __ Movi(v18.V4S(), 0xffab0000); 19931 __ Movi(v19.V4S(), 0xffffab00); 19932 19933 __ Movi(v20.V4S(), 0xabababab); 19934 __ Movi(v21.V4S(), 0xabcdabcd); 19935 __ Movi(v22.V4S(), 0xabcdef01); 19936 __ Movi(v23.V4S(), 0x00ffff00); 19937 19938 END(); 19939 19940 RUN(); 19941 19942 ASSERT_EQUAL_128(0x0, 0x000000ab000000ab, q0); 19943 ASSERT_EQUAL_128(0x0, 0x0000ab000000ab00, q1); 19944 ASSERT_EQUAL_128(0x00ab000000ab0000, 0x00ab000000ab0000, q2); 19945 ASSERT_EQUAL_128(0xab000000ab000000, 0xab000000ab000000, q3); 19946 19947 ASSERT_EQUAL_128(0x0, 0xffffffabffffffab, q4); 19948 ASSERT_EQUAL_128(0x0, 0xffffabffffffabff, q5); 19949 ASSERT_EQUAL_128(0xffabffffffabffff, 0xffabffffffabffff, q6); 19950 ASSERT_EQUAL_128(0xabffffffabffffff, 0xabffffffabffffff, q7); 19951 19952 ASSERT_EQUAL_128(0x0, 0x0000abff0000abff, q16); 19953 ASSERT_EQUAL_128(0x0, 0x00abffff00abffff, q17); 19954 ASSERT_EQUAL_128(0xffab0000ffab0000, 0xffab0000ffab0000, q18); 19955 ASSERT_EQUAL_128(0xffffab00ffffab00, 0xffffab00ffffab00, q19); 19956 19957 ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q20); 19958 ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q21); 19959 ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q22); 19960 ASSERT_EQUAL_128(0x00ffff0000ffff00, 0x00ffff0000ffff00, q23); 19961 TEARDOWN(); 19962 } 19963 19964 19965 TEST(neon_modimm_movi_64bit_any) { 19966 SETUP(); 19967 19968 START(); 19969 19970 __ Movi(v0.V1D(), 0x00ffff0000ffffff); 19971 __ Movi(v1.V2D(), 0xabababababababab); 19972 __ Movi(v2.V2D(), 0xabcdabcdabcdabcd); 19973 __ Movi(v3.V2D(), 0xabcdef01abcdef01); 19974 __ Movi(v4.V1D(), 0xabcdef0123456789); 19975 __ Movi(v5.V2D(), 0xabcdef0123456789); 19976 19977 END(); 19978 19979 RUN(); 19980 19981 ASSERT_EQUAL_64(0x00ffff0000ffffff, d0); 19982 ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q1); 19983 ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q2); 19984 ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q3); 19985 ASSERT_EQUAL_64(0xabcdef0123456789, d4); 19986 ASSERT_EQUAL_128(0xabcdef0123456789, 0xabcdef0123456789, q5); 19987 19988 TEARDOWN(); 19989 } 19990 19991 19992 TEST(neon_modimm_movi) { 19993 SETUP(); 19994 19995 START(); 19996 19997 __ Movi(v0.V8B(), 0xaa); 19998 __ Movi(v1.V16B(), 0x55); 19999 20000 __ Movi(d2, 0x00ffff0000ffffff); 20001 __ Movi(v3.V2D(), 0x00ffff0000ffffff); 20002 20003 __ Movi(v16.V4H(), 0x00, LSL, 0); 20004 __ Movi(v17.V4H(), 0xff, LSL, 8); 20005 __ Movi(v18.V8H(), 0x00, LSL, 0); 20006 __ Movi(v19.V8H(), 0xff, LSL, 8); 20007 20008 __ Movi(v20.V2S(), 0x00, LSL, 0); 20009 __ Movi(v21.V2S(), 0xff, LSL, 8); 20010 __ Movi(v22.V2S(), 0x00, LSL, 16); 20011 __ Movi(v23.V2S(), 0xff, LSL, 24); 20012 20013 __ Movi(v24.V4S(), 0xff, LSL, 0); 20014 __ Movi(v25.V4S(), 0x00, LSL, 8); 20015 __ Movi(v26.V4S(), 0xff, LSL, 16); 20016 __ Movi(v27.V4S(), 0x00, LSL, 24); 20017 20018 __ Movi(v28.V2S(), 0xaa, MSL, 8); 20019 __ Movi(v29.V2S(), 0x55, MSL, 16); 20020 __ Movi(v30.V4S(), 0xff, MSL, 8); 20021 __ Movi(v31.V4S(), 0x00, MSL, 16); 20022 20023 END(); 20024 20025 RUN(); 20026 20027 ASSERT_EQUAL_128(0x0, 0xaaaaaaaaaaaaaaaa, q0); 20028 ASSERT_EQUAL_128(0x5555555555555555, 0x5555555555555555, q1); 20029 20030 ASSERT_EQUAL_128(0x0, 0x00ffff0000ffffff, q2); 20031 ASSERT_EQUAL_128(0x00ffff0000ffffff, 0x00ffff0000ffffff, q3); 20032 20033 ASSERT_EQUAL_128(0x0, 0x0000000000000000, q16); 20034 ASSERT_EQUAL_128(0x0, 0xff00ff00ff00ff00, q17); 20035 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q18); 20036 ASSERT_EQUAL_128(0xff00ff00ff00ff00, 0xff00ff00ff00ff00, q19); 20037 20038 ASSERT_EQUAL_128(0x0, 0x0000000000000000, q20); 20039 ASSERT_EQUAL_128(0x0, 0x0000ff000000ff00, q21); 20040 ASSERT_EQUAL_128(0x0, 0x0000000000000000, q22); 20041 ASSERT_EQUAL_128(0x0, 0xff000000ff000000, q23); 20042 20043 ASSERT_EQUAL_128(0x000000ff000000ff, 0x000000ff000000ff, q24); 20044 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25); 20045 ASSERT_EQUAL_128(0x00ff000000ff0000, 0x00ff000000ff0000, q26); 20046 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27); 20047 20048 ASSERT_EQUAL_128(0x0, 0x0000aaff0000aaff, q28); 20049 ASSERT_EQUAL_128(0x0, 0x0055ffff0055ffff, q29); 20050 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q30); 20051 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q31); 20052 20053 TEARDOWN(); 20054 } 20055 20056 20057 TEST(neon_modimm_mvni) { 20058 SETUP(); 20059 20060 START(); 20061 20062 __ Mvni(v16.V4H(), 0x00, LSL, 0); 20063 __ Mvni(v17.V4H(), 0xff, LSL, 8); 20064 __ Mvni(v18.V8H(), 0x00, LSL, 0); 20065 __ Mvni(v19.V8H(), 0xff, LSL, 8); 20066 20067 __ Mvni(v20.V2S(), 0x00, LSL, 0); 20068 __ Mvni(v21.V2S(), 0xff, LSL, 8); 20069 __ Mvni(v22.V2S(), 0x00, LSL, 16); 20070 __ Mvni(v23.V2S(), 0xff, LSL, 24); 20071 20072 __ Mvni(v24.V4S(), 0xff, LSL, 0); 20073 __ Mvni(v25.V4S(), 0x00, LSL, 8); 20074 __ Mvni(v26.V4S(), 0xff, LSL, 16); 20075 __ Mvni(v27.V4S(), 0x00, LSL, 24); 20076 20077 __ Mvni(v28.V2S(), 0xaa, MSL, 8); 20078 __ Mvni(v29.V2S(), 0x55, MSL, 16); 20079 __ Mvni(v30.V4S(), 0xff, MSL, 8); 20080 __ Mvni(v31.V4S(), 0x00, MSL, 16); 20081 20082 END(); 20083 20084 RUN(); 20085 20086 ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q16); 20087 ASSERT_EQUAL_128(0x0, 0x00ff00ff00ff00ff, q17); 20088 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18); 20089 ASSERT_EQUAL_128(0x00ff00ff00ff00ff, 0x00ff00ff00ff00ff, q19); 20090 20091 ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q20); 20092 ASSERT_EQUAL_128(0x0, 0xffff00ffffff00ff, q21); 20093 ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q22); 20094 ASSERT_EQUAL_128(0x0, 0x00ffffff00ffffff, q23); 20095 20096 ASSERT_EQUAL_128(0xffffff00ffffff00, 0xffffff00ffffff00, q24); 20097 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25); 20098 ASSERT_EQUAL_128(0xff00ffffff00ffff, 0xff00ffffff00ffff, q26); 20099 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q27); 20100 20101 ASSERT_EQUAL_128(0x0, 0xffff5500ffff5500, q28); 20102 ASSERT_EQUAL_128(0x0, 0xffaa0000ffaa0000, q29); 20103 ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q30); 20104 ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q31); 20105 20106 TEARDOWN(); 20107 } 20108 20109 20110 TEST(neon_modimm_orr) { 20111 SETUP(); 20112 20113 START(); 20114 20115 __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20116 __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20117 __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20118 __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20119 __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20120 __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20121 __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20122 __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20123 __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20124 __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20125 __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20126 __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa); 20127 20128 __ Orr(v16.V4H(), 0x00, 0); 20129 __ Orr(v17.V4H(), 0xff, 8); 20130 __ Orr(v18.V8H(), 0x00, 0); 20131 __ Orr(v19.V8H(), 0xff, 8); 20132 20133 __ Orr(v20.V2S(), 0x00, 0); 20134 __ Orr(v21.V2S(), 0xff, 8); 20135 __ Orr(v22.V2S(), 0x00, 16); 20136 __ Orr(v23.V2S(), 0xff, 24); 20137 20138 __ Orr(v24.V4S(), 0xff, 0); 20139 __ Orr(v25.V4S(), 0x00, 8); 20140 __ Orr(v26.V4S(), 0xff, 16); 20141 __ Orr(v27.V4S(), 0x00, 24); 20142 20143 END(); 20144 20145 RUN(); 20146 20147 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16); 20148 ASSERT_EQUAL_128(0x0, 0xff55ffffff00ffaa, q17); 20149 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18); 20150 ASSERT_EQUAL_128(0xffaaff55ff00ffaa, 0xff55ffffff00ffaa, q19); 20151 20152 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20); 20153 ASSERT_EQUAL_128(0x0, 0x5555ffff0000ffaa, q21); 20154 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22); 20155 ASSERT_EQUAL_128(0x0, 0xff55ffffff00aaaa, q23); 20156 20157 ASSERT_EQUAL_128(0x00aaffffff0055ff, 0x5555ffff0000aaff, q24); 20158 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25); 20159 ASSERT_EQUAL_128(0x00ffff55ffff55aa, 0x55ffffff00ffaaaa, q26); 20160 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27); 20161 20162 TEARDOWN(); 20163 } 20164 20165 20166 // TODO: add arbitrary values once load literal to Q registers is supported. 20167 TEST(neon_modimm_fmov) { 20168 SETUP(); 20169 20170 // Immediates which can be encoded in the instructions. 20171 const float kOne = 1.0f; 20172 const float kPointFive = 0.5f; 20173 const double kMinusThirteen = -13.0; 20174 // Immediates which cannot be encoded in the instructions. 20175 const float kNonImmFP32 = 255.0f; 20176 const double kNonImmFP64 = 12.3456; 20177 20178 START(); 20179 __ Fmov(v11.V2S(), kOne); 20180 __ Fmov(v12.V4S(), kPointFive); 20181 __ Fmov(v22.V2D(), kMinusThirteen); 20182 __ Fmov(v13.V2S(), kNonImmFP32); 20183 __ Fmov(v14.V4S(), kNonImmFP32); 20184 __ Fmov(v23.V2D(), kNonImmFP64); 20185 __ Fmov(v1.V2S(), 0.0); 20186 __ Fmov(v2.V4S(), 0.0); 20187 __ Fmov(v3.V2D(), 0.0); 20188 __ Fmov(v4.V2S(), kFP32PositiveInfinity); 20189 __ Fmov(v5.V4S(), kFP32PositiveInfinity); 20190 __ Fmov(v6.V2D(), kFP64PositiveInfinity); 20191 END(); 20192 20193 RUN(); 20194 20195 const uint64_t kOne1S = FloatToRawbits(1.0); 20196 const uint64_t kOne2S = (kOne1S << 32) | kOne1S; 20197 const uint64_t kPointFive1S = FloatToRawbits(0.5); 20198 const uint64_t kPointFive2S = (kPointFive1S << 32) | kPointFive1S; 20199 const uint64_t kMinusThirteen1D = DoubleToRawbits(-13.0); 20200 const uint64_t kNonImmFP321S = FloatToRawbits(kNonImmFP32); 20201 const uint64_t kNonImmFP322S = (kNonImmFP321S << 32) | kNonImmFP321S; 20202 const uint64_t kNonImmFP641D = DoubleToRawbits(kNonImmFP64); 20203 const uint64_t kFP32Inf1S = FloatToRawbits(kFP32PositiveInfinity); 20204 const uint64_t kFP32Inf2S = (kFP32Inf1S << 32) | kFP32Inf1S; 20205 const uint64_t kFP64Inf1D = DoubleToRawbits(kFP64PositiveInfinity); 20206 20207 ASSERT_EQUAL_128(0x0, kOne2S, q11); 20208 ASSERT_EQUAL_128(kPointFive2S, kPointFive2S, q12); 20209 ASSERT_EQUAL_128(kMinusThirteen1D, kMinusThirteen1D, q22); 20210 ASSERT_EQUAL_128(0x0, kNonImmFP322S, q13); 20211 ASSERT_EQUAL_128(kNonImmFP322S, kNonImmFP322S, q14); 20212 ASSERT_EQUAL_128(kNonImmFP641D, kNonImmFP641D, q23); 20213 ASSERT_EQUAL_128(0x0, 0x0, q1); 20214 ASSERT_EQUAL_128(0x0, 0x0, q2); 20215 ASSERT_EQUAL_128(0x0, 0x0, q3); 20216 ASSERT_EQUAL_128(0x0, kFP32Inf2S, q4); 20217 ASSERT_EQUAL_128(kFP32Inf2S, kFP32Inf2S, q5); 20218 ASSERT_EQUAL_128(kFP64Inf1D, kFP64Inf1D, q6); 20219 20220 TEARDOWN(); 20221 } 20222 20223 20224 TEST(neon_perm) { 20225 SETUP(); 20226 20227 START(); 20228 20229 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f); 20230 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f); 20231 20232 __ Trn1(v16.V16B(), v0.V16B(), v1.V16B()); 20233 __ Trn2(v17.V16B(), v0.V16B(), v1.V16B()); 20234 __ Zip1(v18.V16B(), v0.V16B(), v1.V16B()); 20235 __ Zip2(v19.V16B(), v0.V16B(), v1.V16B()); 20236 __ Uzp1(v20.V16B(), v0.V16B(), v1.V16B()); 20237 __ Uzp2(v21.V16B(), v0.V16B(), v1.V16B()); 20238 20239 END(); 20240 20241 RUN(); 20242 20243 ASSERT_EQUAL_128(0x1101130315051707, 0x19091b0b1d0d1f0f, q16); 20244 ASSERT_EQUAL_128(0x1000120214041606, 0x18081a0a1c0c1e0e, q17); 20245 ASSERT_EQUAL_128(0x180819091a0a1b0b, 0x1c0c1d0d1e0e1f0f, q18); 20246 ASSERT_EQUAL_128(0x1000110112021303, 0x1404150516061707, q19); 20247 ASSERT_EQUAL_128(0x11131517191b1d1f, 0x01030507090b0d0f, q20); 20248 ASSERT_EQUAL_128(0x10121416181a1c1e, 0x00020406080a0c0e, q21); 20249 20250 TEARDOWN(); 20251 } 20252 20253 20254 TEST(neon_copy_dup_element) { 20255 SETUP(); 20256 20257 START(); 20258 20259 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff); 20260 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100); 20261 __ Movi(v2.V2D(), 0xffeddccbbaae9988, 0x0011223344556677); 20262 __ Movi(v3.V2D(), 0x7766554433221100, 0x8899aabbccddeeff); 20263 __ Movi(v4.V2D(), 0x7766554433221100, 0x0123456789abcdef); 20264 __ Movi(v5.V2D(), 0x0011223344556677, 0x0123456789abcdef); 20265 20266 __ Dup(v16.V16B(), v0.B(), 0); 20267 __ Dup(v17.V8H(), v1.H(), 7); 20268 __ Dup(v18.V4S(), v1.S(), 3); 20269 __ Dup(v19.V2D(), v0.D(), 0); 20270 20271 __ Dup(v20.V8B(), v0.B(), 0); 20272 __ Dup(v21.V4H(), v1.H(), 7); 20273 __ Dup(v22.V2S(), v1.S(), 3); 20274 20275 __ Dup(v23.B(), v0.B(), 0); 20276 __ Dup(v24.H(), v1.H(), 7); 20277 __ Dup(v25.S(), v1.S(), 3); 20278 __ Dup(v26.D(), v0.D(), 0); 20279 20280 __ Dup(v2.V16B(), v2.B(), 0); 20281 __ Dup(v3.V8H(), v3.H(), 7); 20282 __ Dup(v4.V4S(), v4.S(), 0); 20283 __ Dup(v5.V2D(), v5.D(), 1); 20284 20285 END(); 20286 20287 RUN(); 20288 20289 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16); 20290 ASSERT_EQUAL_128(0xffedffedffedffed, 0xffedffedffedffed, q17); 20291 ASSERT_EQUAL_128(0xffeddccbffeddccb, 0xffeddccbffeddccb, q18); 20292 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19); 20293 20294 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20); 20295 ASSERT_EQUAL_128(0, 0xffedffedffedffed, q21); 20296 ASSERT_EQUAL_128(0, 0xffeddccbffeddccb, q22); 20297 20298 ASSERT_EQUAL_128(0, 0x00000000000000ff, q23); 20299 ASSERT_EQUAL_128(0, 0x000000000000ffed, q24); 20300 ASSERT_EQUAL_128(0, 0x00000000ffeddccb, q25); 20301 ASSERT_EQUAL_128(0, 0x8899aabbccddeeff, q26); 20302 20303 ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q2); 20304 ASSERT_EQUAL_128(0x7766776677667766, 0x7766776677667766, q3); 20305 ASSERT_EQUAL_128(0x89abcdef89abcdef, 0x89abcdef89abcdef, q4); 20306 ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q5); 20307 TEARDOWN(); 20308 } 20309 20310 20311 TEST(neon_copy_dup_general) { 20312 SETUP(); 20313 20314 START(); 20315 20316 __ Mov(x0, 0x0011223344556677); 20317 20318 __ Dup(v16.V16B(), w0); 20319 __ Dup(v17.V8H(), w0); 20320 __ Dup(v18.V4S(), w0); 20321 __ Dup(v19.V2D(), x0); 20322 20323 __ Dup(v20.V8B(), w0); 20324 __ Dup(v21.V4H(), w0); 20325 __ Dup(v22.V2S(), w0); 20326 20327 __ Dup(v2.V16B(), wzr); 20328 __ Dup(v3.V8H(), wzr); 20329 __ Dup(v4.V4S(), wzr); 20330 __ Dup(v5.V2D(), xzr); 20331 20332 END(); 20333 20334 RUN(); 20335 20336 ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q16); 20337 ASSERT_EQUAL_128(0x6677667766776677, 0x6677667766776677, q17); 20338 ASSERT_EQUAL_128(0x4455667744556677, 0x4455667744556677, q18); 20339 ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19); 20340 20341 ASSERT_EQUAL_128(0, 0x7777777777777777, q20); 20342 ASSERT_EQUAL_128(0, 0x6677667766776677, q21); 20343 ASSERT_EQUAL_128(0, 0x4455667744556677, q22); 20344 20345 ASSERT_EQUAL_128(0, 0, q2); 20346 ASSERT_EQUAL_128(0, 0, q3); 20347 ASSERT_EQUAL_128(0, 0, q4); 20348 ASSERT_EQUAL_128(0, 0, q5); 20349 TEARDOWN(); 20350 } 20351 20352 20353 TEST(neon_copy_ins_element) { 20354 SETUP(); 20355 20356 START(); 20357 20358 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff); 20359 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100); 20360 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210); 20361 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef); 20362 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff); 20363 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff); 20364 20365 __ Movi(v2.V2D(), 0, 0x0011223344556677); 20366 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff); 20367 __ Movi(v4.V2D(), 0, 0x0123456789abcdef); 20368 __ Movi(v5.V2D(), 0, 0x0123456789abcdef); 20369 20370 __ Ins(v16.V16B(), 15, v0.V16B(), 0); 20371 __ Ins(v17.V8H(), 0, v1.V8H(), 7); 20372 __ Ins(v18.V4S(), 3, v1.V4S(), 0); 20373 __ Ins(v19.V2D(), 1, v0.V2D(), 0); 20374 20375 __ Ins(v2.V16B(), 2, v2.V16B(), 0); 20376 __ Ins(v3.V8H(), 0, v3.V8H(), 7); 20377 __ Ins(v4.V4S(), 3, v4.V4S(), 0); 20378 __ Ins(v5.V2D(), 0, v5.V2D(), 1); 20379 20380 END(); 20381 20382 RUN(); 20383 20384 ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16); 20385 ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17); 20386 ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18); 20387 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19); 20388 20389 ASSERT_EQUAL_128(0, 0x0011223344776677, q2); 20390 ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3); 20391 ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4); 20392 ASSERT_EQUAL_128(0, 0, q5); 20393 TEARDOWN(); 20394 } 20395 20396 20397 TEST(neon_copy_mov_element) { 20398 SETUP(); 20399 20400 START(); 20401 20402 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff); 20403 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100); 20404 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210); 20405 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef); 20406 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff); 20407 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff); 20408 20409 __ Movi(v2.V2D(), 0, 0x0011223344556677); 20410 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff); 20411 __ Movi(v4.V2D(), 0, 0x0123456789abcdef); 20412 __ Movi(v5.V2D(), 0, 0x0123456789abcdef); 20413 20414 __ Mov(v16.V16B(), 15, v0.V16B(), 0); 20415 __ Mov(v17.V8H(), 0, v1.V8H(), 7); 20416 __ Mov(v18.V4S(), 3, v1.V4S(), 0); 20417 __ Mov(v19.V2D(), 1, v0.V2D(), 0); 20418 20419 __ Mov(v2.V16B(), 2, v2.V16B(), 0); 20420 __ Mov(v3.V8H(), 0, v3.V8H(), 7); 20421 __ Mov(v4.V4S(), 3, v4.V4S(), 0); 20422 __ Mov(v5.V2D(), 0, v5.V2D(), 1); 20423 20424 END(); 20425 20426 RUN(); 20427 20428 ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16); 20429 ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17); 20430 ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18); 20431 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19); 20432 20433 ASSERT_EQUAL_128(0, 0x0011223344776677, q2); 20434 ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3); 20435 ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4); 20436 ASSERT_EQUAL_128(0, 0, q5); 20437 TEARDOWN(); 20438 } 20439 20440 20441 TEST(neon_copy_smov) { 20442 SETUP(); 20443 20444 START(); 20445 20446 __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210); 20447 20448 __ Smov(w0, v0.B(), 7); 20449 __ Smov(w1, v0.B(), 15); 20450 20451 __ Smov(w2, v0.H(), 0); 20452 __ Smov(w3, v0.H(), 3); 20453 20454 __ Smov(x4, v0.B(), 7); 20455 __ Smov(x5, v0.B(), 15); 20456 20457 __ Smov(x6, v0.H(), 0); 20458 __ Smov(x7, v0.H(), 3); 20459 20460 __ Smov(x16, v0.S(), 0); 20461 __ Smov(x17, v0.S(), 1); 20462 20463 END(); 20464 20465 RUN(); 20466 20467 ASSERT_EQUAL_32(0xfffffffe, w0); 20468 ASSERT_EQUAL_32(0x00000001, w1); 20469 ASSERT_EQUAL_32(0x00003210, w2); 20470 ASSERT_EQUAL_32(0xfffffedc, w3); 20471 ASSERT_EQUAL_64(0xfffffffffffffffe, x4); 20472 ASSERT_EQUAL_64(0x0000000000000001, x5); 20473 ASSERT_EQUAL_64(0x0000000000003210, x6); 20474 ASSERT_EQUAL_64(0xfffffffffffffedc, x7); 20475 ASSERT_EQUAL_64(0x0000000076543210, x16); 20476 ASSERT_EQUAL_64(0xfffffffffedcba98, x17); 20477 20478 TEARDOWN(); 20479 } 20480 20481 20482 TEST(neon_copy_umov_mov) { 20483 SETUP(); 20484 20485 START(); 20486 20487 __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210); 20488 20489 __ Umov(w0, v0.B(), 15); 20490 __ Umov(w1, v0.H(), 0); 20491 __ Umov(w2, v0.S(), 3); 20492 __ Umov(x3, v0.D(), 1); 20493 20494 __ Mov(w4, v0.S(), 3); 20495 __ Mov(x5, v0.D(), 1); 20496 20497 END(); 20498 20499 RUN(); 20500 20501 ASSERT_EQUAL_32(0x00000001, w0); 20502 ASSERT_EQUAL_32(0x00003210, w1); 20503 ASSERT_EQUAL_32(0x01234567, w2); 20504 ASSERT_EQUAL_64(0x0123456789abcdef, x3); 20505 ASSERT_EQUAL_32(0x01234567, w4); 20506 ASSERT_EQUAL_64(0x0123456789abcdef, x5); 20507 20508 TEARDOWN(); 20509 } 20510 20511 20512 TEST(neon_copy_ins_general) { 20513 SETUP(); 20514 20515 START(); 20516 20517 __ Mov(x0, 0x0011223344556677); 20518 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210); 20519 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef); 20520 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff); 20521 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff); 20522 20523 __ Movi(v2.V2D(), 0, 0x0011223344556677); 20524 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff); 20525 __ Movi(v4.V2D(), 0, 0x0123456789abcdef); 20526 __ Movi(v5.V2D(), 0, 0x0123456789abcdef); 20527 20528 __ Ins(v16.V16B(), 15, w0); 20529 __ Ins(v17.V8H(), 0, w0); 20530 __ Ins(v18.V4S(), 3, w0); 20531 __ Ins(v19.V2D(), 0, x0); 20532 20533 __ Ins(v2.V16B(), 2, w0); 20534 __ Ins(v3.V8H(), 0, w0); 20535 __ Ins(v4.V4S(), 3, w0); 20536 __ Ins(v5.V2D(), 1, x0); 20537 20538 END(); 20539 20540 RUN(); 20541 20542 ASSERT_EQUAL_128(0x7723456789abcdef, 0xfedcba9876543210, q16); 20543 ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789ab6677, q17); 20544 ASSERT_EQUAL_128(0x4455667744556677, 0x8899aabbccddeeff, q18); 20545 ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19); 20546 20547 ASSERT_EQUAL_128(0, 0x0011223344776677, q2); 20548 ASSERT_EQUAL_128(0, 0x8899aabbccdd6677, q3); 20549 ASSERT_EQUAL_128(0x4455667700000000, 0x0123456789abcdef, q4); 20550 ASSERT_EQUAL_128(0x0011223344556677, 0x0123456789abcdef, q5); 20551 TEARDOWN(); 20552 } 20553 20554 20555 TEST(neon_extract_ext) { 20556 SETUP(); 20557 20558 START(); 20559 20560 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff); 20561 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100); 20562 20563 __ Movi(v2.V2D(), 0, 0x0011223344556677); 20564 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff); 20565 20566 __ Ext(v16.V16B(), v0.V16B(), v1.V16B(), 0); 20567 __ Ext(v17.V16B(), v0.V16B(), v1.V16B(), 15); 20568 __ Ext(v1.V16B(), v0.V16B(), v1.V16B(), 8); // Dest is same as one Src 20569 __ Ext(v0.V16B(), v0.V16B(), v0.V16B(), 8); // All reg are the same 20570 20571 __ Ext(v18.V8B(), v2.V8B(), v3.V8B(), 0); 20572 __ Ext(v19.V8B(), v2.V8B(), v3.V8B(), 7); 20573 __ Ext(v2.V8B(), v2.V8B(), v3.V8B(), 4); // Dest is same as one Src 20574 __ Ext(v3.V8B(), v3.V8B(), v3.V8B(), 4); // All reg are the same 20575 20576 END(); 20577 20578 RUN(); 20579 20580 ASSERT_EQUAL_128(0x0011223344556677, 0x8899aabbccddeeff, q16); 20581 ASSERT_EQUAL_128(0xeddccbbaae998877, 0x6655443322110000, q17); 20582 ASSERT_EQUAL_128(0x7766554433221100, 0x0011223344556677, q1); 20583 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q0); 20584 20585 ASSERT_EQUAL_128(0, 0x0011223344556677, q18); 20586 ASSERT_EQUAL_128(0, 0x99aabbccddeeff00, q19); 20587 ASSERT_EQUAL_128(0, 0xccddeeff00112233, q2); 20588 ASSERT_EQUAL_128(0, 0xccddeeff8899aabb, q3); 20589 TEARDOWN(); 20590 } 20591 20592 20593 TEST(neon_3different_uaddl) { 20594 SETUP(); 20595 20596 START(); 20597 20598 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); 20599 __ Movi(v1.V2D(), 0, 0x00010280810e0fff); 20600 __ Movi(v2.V2D(), 0, 0x0101010101010101); 20601 20602 __ Movi(v3.V2D(), 0x0000000000000000, 0x0000000000000000); 20603 __ Movi(v4.V2D(), 0x0000000000000000, 0x0000000000000000); 20604 __ Movi(v5.V2D(), 0, 0x0000000180008001); 20605 __ Movi(v6.V2D(), 0, 0x000e000ff000ffff); 20606 __ Movi(v7.V2D(), 0, 0x0001000100010001); 20607 20608 __ Movi(v16.V2D(), 0x0000000000000000, 0x0000000000000000); 20609 __ Movi(v17.V2D(), 0x0000000000000000, 0x0000000000000000); 20610 __ Movi(v18.V2D(), 0, 0x0000000000000001); 20611 __ Movi(v19.V2D(), 0, 0x80000001ffffffff); 20612 __ Movi(v20.V2D(), 0, 0x0000000100000001); 20613 20614 __ Uaddl(v0.V8H(), v1.V8B(), v2.V8B()); 20615 20616 __ Uaddl(v3.V4S(), v5.V4H(), v7.V4H()); 20617 __ Uaddl(v4.V4S(), v6.V4H(), v7.V4H()); 20618 20619 __ Uaddl(v16.V2D(), v18.V2S(), v20.V2S()); 20620 __ Uaddl(v17.V2D(), v19.V2S(), v20.V2S()); 20621 20622 20623 END(); 20624 20625 RUN(); 20626 20627 ASSERT_EQUAL_128(0x0001000200030081, 0x0082000f00100100, q0); 20628 ASSERT_EQUAL_128(0x0000000100000002, 0x0000800100008002, q3); 20629 ASSERT_EQUAL_128(0x0000000f00000010, 0x0000f00100010000, q4); 20630 ASSERT_EQUAL_128(0x0000000000000001, 0x0000000000000002, q16); 20631 ASSERT_EQUAL_128(0x0000000080000002, 0x0000000100000000, q17); 20632 TEARDOWN(); 20633 } 20634 20635 20636 TEST(neon_3different_addhn_subhn) { 20637 SETUP(); 20638 20639 START(); 20640 20641 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 20642 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 20643 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 20644 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 20645 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 20646 20647 __ Addhn(v16.V8B(), v0.V8H(), v1.V8H()); 20648 __ Addhn2(v16.V16B(), v2.V8H(), v3.V8H()); 20649 __ Raddhn(v17.V8B(), v0.V8H(), v1.V8H()); 20650 __ Raddhn2(v17.V16B(), v2.V8H(), v3.V8H()); 20651 __ Subhn(v18.V8B(), v0.V8H(), v1.V8H()); 20652 __ Subhn2(v18.V16B(), v2.V8H(), v3.V8H()); 20653 __ Rsubhn(v19.V8B(), v0.V8H(), v1.V8H()); 20654 __ Rsubhn2(v19.V16B(), v2.V8H(), v3.V8H()); 20655 20656 END(); 20657 20658 RUN(); 20659 20660 ASSERT_EQUAL_128(0x0000ff007fff7fff, 0xff81817f80ff0100, q16); 20661 ASSERT_EQUAL_128(0x0000000080008000, 0xff81817f81ff0201, q17); 20662 ASSERT_EQUAL_128(0x0000ffff80008000, 0xff80817f80ff0100, q18); 20663 ASSERT_EQUAL_128(0x0000000080008000, 0xff81827f81ff0101, q19); 20664 TEARDOWN(); 20665 } 20666 20667 TEST(neon_d_only_scalar) { 20668 SETUP(); 20669 20670 START(); 20671 20672 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0); 20673 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f); 20674 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010); 20675 __ Movi(v3.V2D(), 0xffffffffffffffff, 2); 20676 __ Movi(v4.V2D(), 0xffffffffffffffff, -2); 20677 20678 __ Add(d16, d0, d0); 20679 __ Add(d17, d1, d1); 20680 __ Add(d18, d2, d2); 20681 __ Sub(d19, d0, d0); 20682 __ Sub(d20, d0, d1); 20683 __ Sub(d21, d1, d0); 20684 __ Ushl(d22, d0, d3); 20685 __ Ushl(d23, d0, d4); 20686 __ Sshl(d24, d0, d3); 20687 __ Sshl(d25, d0, d4); 20688 __ Ushr(d26, d0, 1); 20689 __ Sshr(d27, d0, 3); 20690 __ Shl(d28, d0, 0); 20691 __ Shl(d29, d0, 16); 20692 20693 END(); 20694 20695 RUN(); 20696 20697 ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q16); 20698 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q17); 20699 ASSERT_EQUAL_128(0, 0x2000000020002020, q18); 20700 ASSERT_EQUAL_128(0, 0, q19); 20701 ASSERT_EQUAL_128(0, 0x7000000170017171, q20); 20702 ASSERT_EQUAL_128(0, 0x8ffffffe8ffe8e8f, q21); 20703 ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q22); 20704 ASSERT_EQUAL_128(0, 0x3c0000003c003c3c, q23); 20705 ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q24); 20706 ASSERT_EQUAL_128(0, 0xfc0000003c003c3c, q25); 20707 ASSERT_EQUAL_128(0, 0x7800000078007878, q26); 20708 ASSERT_EQUAL_128(0, 0xfe0000001e001e1e, q27); 20709 ASSERT_EQUAL_128(0, 0xf0000000f000f0f0, q28); 20710 ASSERT_EQUAL_128(0, 0x0000f000f0f00000, q29); 20711 20712 TEARDOWN(); 20713 } 20714 20715 20716 TEST(neon_sqshl_imm_scalar) { 20717 SETUP(); 20718 20719 START(); 20720 20721 __ Movi(v0.V2D(), 0x0, 0x7f); 20722 __ Movi(v1.V2D(), 0x0, 0x80); 20723 __ Movi(v2.V2D(), 0x0, 0x01); 20724 __ Sqshl(b16, b0, 1); 20725 __ Sqshl(b17, b1, 1); 20726 __ Sqshl(b18, b2, 1); 20727 20728 __ Movi(v0.V2D(), 0x0, 0x7fff); 20729 __ Movi(v1.V2D(), 0x0, 0x8000); 20730 __ Movi(v2.V2D(), 0x0, 0x0001); 20731 __ Sqshl(h19, h0, 1); 20732 __ Sqshl(h20, h1, 1); 20733 __ Sqshl(h21, h2, 1); 20734 20735 __ Movi(v0.V2D(), 0x0, 0x7fffffff); 20736 __ Movi(v1.V2D(), 0x0, 0x80000000); 20737 __ Movi(v2.V2D(), 0x0, 0x00000001); 20738 __ Sqshl(s22, s0, 1); 20739 __ Sqshl(s23, s1, 1); 20740 __ Sqshl(s24, s2, 1); 20741 20742 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff); 20743 __ Movi(v1.V2D(), 0x0, 0x8000000000000000); 20744 __ Movi(v2.V2D(), 0x0, 0x0000000000000001); 20745 __ Sqshl(d25, d0, 1); 20746 __ Sqshl(d26, d1, 1); 20747 __ Sqshl(d27, d2, 1); 20748 20749 END(); 20750 20751 RUN(); 20752 20753 ASSERT_EQUAL_128(0, 0x7f, q16); 20754 ASSERT_EQUAL_128(0, 0x80, q17); 20755 ASSERT_EQUAL_128(0, 0x02, q18); 20756 20757 ASSERT_EQUAL_128(0, 0x7fff, q19); 20758 ASSERT_EQUAL_128(0, 0x8000, q20); 20759 ASSERT_EQUAL_128(0, 0x0002, q21); 20760 20761 ASSERT_EQUAL_128(0, 0x7fffffff, q22); 20762 ASSERT_EQUAL_128(0, 0x80000000, q23); 20763 ASSERT_EQUAL_128(0, 0x00000002, q24); 20764 20765 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q25); 20766 ASSERT_EQUAL_128(0, 0x8000000000000000, q26); 20767 ASSERT_EQUAL_128(0, 0x0000000000000002, q27); 20768 20769 TEARDOWN(); 20770 } 20771 20772 20773 TEST(neon_uqshl_imm_scalar) { 20774 SETUP(); 20775 20776 START(); 20777 20778 __ Movi(v0.V2D(), 0x0, 0x7f); 20779 __ Movi(v1.V2D(), 0x0, 0x80); 20780 __ Movi(v2.V2D(), 0x0, 0x01); 20781 __ Uqshl(b16, b0, 1); 20782 __ Uqshl(b17, b1, 1); 20783 __ Uqshl(b18, b2, 1); 20784 20785 __ Movi(v0.V2D(), 0x0, 0x7fff); 20786 __ Movi(v1.V2D(), 0x0, 0x8000); 20787 __ Movi(v2.V2D(), 0x0, 0x0001); 20788 __ Uqshl(h19, h0, 1); 20789 __ Uqshl(h20, h1, 1); 20790 __ Uqshl(h21, h2, 1); 20791 20792 __ Movi(v0.V2D(), 0x0, 0x7fffffff); 20793 __ Movi(v1.V2D(), 0x0, 0x80000000); 20794 __ Movi(v2.V2D(), 0x0, 0x00000001); 20795 __ Uqshl(s22, s0, 1); 20796 __ Uqshl(s23, s1, 1); 20797 __ Uqshl(s24, s2, 1); 20798 20799 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff); 20800 __ Movi(v1.V2D(), 0x0, 0x8000000000000000); 20801 __ Movi(v2.V2D(), 0x0, 0x0000000000000001); 20802 __ Uqshl(d25, d0, 1); 20803 __ Uqshl(d26, d1, 1); 20804 __ Uqshl(d27, d2, 1); 20805 20806 END(); 20807 20808 RUN(); 20809 20810 ASSERT_EQUAL_128(0, 0xfe, q16); 20811 ASSERT_EQUAL_128(0, 0xff, q17); 20812 ASSERT_EQUAL_128(0, 0x02, q18); 20813 20814 ASSERT_EQUAL_128(0, 0xfffe, q19); 20815 ASSERT_EQUAL_128(0, 0xffff, q20); 20816 ASSERT_EQUAL_128(0, 0x0002, q21); 20817 20818 ASSERT_EQUAL_128(0, 0xfffffffe, q22); 20819 ASSERT_EQUAL_128(0, 0xffffffff, q23); 20820 ASSERT_EQUAL_128(0, 0x00000002, q24); 20821 20822 ASSERT_EQUAL_128(0, 0xfffffffffffffffe, q25); 20823 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26); 20824 ASSERT_EQUAL_128(0, 0x0000000000000002, q27); 20825 20826 TEARDOWN(); 20827 } 20828 20829 20830 TEST(neon_sqshlu_scalar) { 20831 SETUP(); 20832 20833 START(); 20834 20835 __ Movi(v0.V2D(), 0x0, 0x7f); 20836 __ Movi(v1.V2D(), 0x0, 0x80); 20837 __ Movi(v2.V2D(), 0x0, 0x01); 20838 __ Sqshlu(b16, b0, 2); 20839 __ Sqshlu(b17, b1, 2); 20840 __ Sqshlu(b18, b2, 2); 20841 20842 __ Movi(v0.V2D(), 0x0, 0x7fff); 20843 __ Movi(v1.V2D(), 0x0, 0x8000); 20844 __ Movi(v2.V2D(), 0x0, 0x0001); 20845 __ Sqshlu(h19, h0, 2); 20846 __ Sqshlu(h20, h1, 2); 20847 __ Sqshlu(h21, h2, 2); 20848 20849 __ Movi(v0.V2D(), 0x0, 0x7fffffff); 20850 __ Movi(v1.V2D(), 0x0, 0x80000000); 20851 __ Movi(v2.V2D(), 0x0, 0x00000001); 20852 __ Sqshlu(s22, s0, 2); 20853 __ Sqshlu(s23, s1, 2); 20854 __ Sqshlu(s24, s2, 2); 20855 20856 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff); 20857 __ Movi(v1.V2D(), 0x0, 0x8000000000000000); 20858 __ Movi(v2.V2D(), 0x0, 0x0000000000000001); 20859 __ Sqshlu(d25, d0, 2); 20860 __ Sqshlu(d26, d1, 2); 20861 __ Sqshlu(d27, d2, 2); 20862 20863 END(); 20864 20865 RUN(); 20866 20867 ASSERT_EQUAL_128(0, 0xff, q16); 20868 ASSERT_EQUAL_128(0, 0x00, q17); 20869 ASSERT_EQUAL_128(0, 0x04, q18); 20870 20871 ASSERT_EQUAL_128(0, 0xffff, q19); 20872 ASSERT_EQUAL_128(0, 0x0000, q20); 20873 ASSERT_EQUAL_128(0, 0x0004, q21); 20874 20875 ASSERT_EQUAL_128(0, 0xffffffff, q22); 20876 ASSERT_EQUAL_128(0, 0x00000000, q23); 20877 ASSERT_EQUAL_128(0, 0x00000004, q24); 20878 20879 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25); 20880 ASSERT_EQUAL_128(0, 0x0000000000000000, q26); 20881 ASSERT_EQUAL_128(0, 0x0000000000000004, q27); 20882 20883 TEARDOWN(); 20884 } 20885 20886 20887 TEST(neon_sshll) { 20888 SETUP(); 20889 20890 START(); 20891 20892 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 20893 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 20894 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 20895 20896 __ Sshll(v16.V8H(), v0.V8B(), 4); 20897 __ Sshll2(v17.V8H(), v0.V16B(), 4); 20898 20899 __ Sshll(v18.V4S(), v1.V4H(), 8); 20900 __ Sshll2(v19.V4S(), v1.V8H(), 8); 20901 20902 __ Sshll(v20.V2D(), v2.V2S(), 16); 20903 __ Sshll2(v21.V2D(), v2.V4S(), 16); 20904 20905 END(); 20906 20907 RUN(); 20908 20909 ASSERT_EQUAL_128(0xf800f810fff00000, 0x001007f0f800f810, q16); 20910 ASSERT_EQUAL_128(0x07f000100000fff0, 0xf810f80007f00010, q17); 20911 ASSERT_EQUAL_128(0xffffff0000000000, 0x00000100007fff00, q18); 20912 ASSERT_EQUAL_128(0xff800000ff800100, 0xffffff0000000000, q19); 20913 ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20); 20914 ASSERT_EQUAL_128(0xffff800000000000, 0xffffffffffff0000, q21); 20915 TEARDOWN(); 20916 } 20917 20918 TEST(neon_shll) { 20919 SETUP(); 20920 20921 START(); 20922 20923 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 20924 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 20925 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 20926 20927 __ Shll(v16.V8H(), v0.V8B(), 8); 20928 __ Shll2(v17.V8H(), v0.V16B(), 8); 20929 20930 __ Shll(v18.V4S(), v1.V4H(), 16); 20931 __ Shll2(v19.V4S(), v1.V8H(), 16); 20932 20933 __ Shll(v20.V2D(), v2.V2S(), 32); 20934 __ Shll2(v21.V2D(), v2.V4S(), 32); 20935 20936 END(); 20937 20938 RUN(); 20939 20940 ASSERT_EQUAL_128(0x80008100ff000000, 0x01007f0080008100, q16); 20941 ASSERT_EQUAL_128(0x7f0001000000ff00, 0x810080007f000100, q17); 20942 ASSERT_EQUAL_128(0xffff000000000000, 0x000100007fff0000, q18); 20943 ASSERT_EQUAL_128(0x8000000080010000, 0xffff000000000000, q19); 20944 ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffff00000000, q20); 20945 ASSERT_EQUAL_128(0x8000000000000000, 0xffffffff00000000, q21); 20946 TEARDOWN(); 20947 } 20948 20949 TEST(neon_ushll) { 20950 SETUP(); 20951 20952 START(); 20953 20954 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 20955 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 20956 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 20957 20958 __ Ushll(v16.V8H(), v0.V8B(), 4); 20959 __ Ushll2(v17.V8H(), v0.V16B(), 4); 20960 20961 __ Ushll(v18.V4S(), v1.V4H(), 8); 20962 __ Ushll2(v19.V4S(), v1.V8H(), 8); 20963 20964 __ Ushll(v20.V2D(), v2.V2S(), 16); 20965 __ Ushll2(v21.V2D(), v2.V4S(), 16); 20966 20967 END(); 20968 20969 RUN(); 20970 20971 ASSERT_EQUAL_128(0x080008100ff00000, 0x001007f008000810, q16); 20972 ASSERT_EQUAL_128(0x07f0001000000ff0, 0x0810080007f00010, q17); 20973 ASSERT_EQUAL_128(0x00ffff0000000000, 0x00000100007fff00, q18); 20974 ASSERT_EQUAL_128(0x0080000000800100, 0x00ffff0000000000, q19); 20975 ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20); 20976 ASSERT_EQUAL_128(0x0000800000000000, 0x0000ffffffff0000, q21); 20977 TEARDOWN(); 20978 } 20979 20980 20981 TEST(neon_sxtl) { 20982 SETUP(); 20983 20984 START(); 20985 20986 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 20987 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 20988 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 20989 20990 __ Sxtl(v16.V8H(), v0.V8B()); 20991 __ Sxtl2(v17.V8H(), v0.V16B()); 20992 20993 __ Sxtl(v18.V4S(), v1.V4H()); 20994 __ Sxtl2(v19.V4S(), v1.V8H()); 20995 20996 __ Sxtl(v20.V2D(), v2.V2S()); 20997 __ Sxtl2(v21.V2D(), v2.V4S()); 20998 20999 END(); 21000 21001 RUN(); 21002 21003 ASSERT_EQUAL_128(0xff80ff81ffff0000, 0x0001007fff80ff81, q16); 21004 ASSERT_EQUAL_128(0x007f00010000ffff, 0xff81ff80007f0001, q17); 21005 ASSERT_EQUAL_128(0xffffffff00000000, 0x0000000100007fff, q18); 21006 ASSERT_EQUAL_128(0xffff8000ffff8001, 0xffffffff00000000, q19); 21007 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20); 21008 ASSERT_EQUAL_128(0xffffffff80000000, 0xffffffffffffffff, q21); 21009 TEARDOWN(); 21010 } 21011 21012 21013 TEST(neon_uxtl) { 21014 SETUP(); 21015 21016 START(); 21017 21018 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 21019 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 21020 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 21021 21022 __ Uxtl(v16.V8H(), v0.V8B()); 21023 __ Uxtl2(v17.V8H(), v0.V16B()); 21024 21025 __ Uxtl(v18.V4S(), v1.V4H()); 21026 __ Uxtl2(v19.V4S(), v1.V8H()); 21027 21028 __ Uxtl(v20.V2D(), v2.V2S()); 21029 __ Uxtl2(v21.V2D(), v2.V4S()); 21030 21031 END(); 21032 21033 RUN(); 21034 21035 ASSERT_EQUAL_128(0x0080008100ff0000, 0x0001007f00800081, q16); 21036 ASSERT_EQUAL_128(0x007f0001000000ff, 0x00810080007f0001, q17); 21037 ASSERT_EQUAL_128(0x0000ffff00000000, 0x0000000100007fff, q18); 21038 ASSERT_EQUAL_128(0x0000800000008001, 0x0000ffff00000000, q19); 21039 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20); 21040 ASSERT_EQUAL_128(0x0000000080000000, 0x00000000ffffffff, q21); 21041 TEARDOWN(); 21042 } 21043 21044 21045 TEST(neon_ssra) { 21046 SETUP(); 21047 21048 START(); 21049 21050 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 21051 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 21052 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 21053 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 21054 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 21055 21056 __ Mov(v16.V2D(), v0.V2D()); 21057 __ Mov(v17.V2D(), v0.V2D()); 21058 __ Mov(v18.V2D(), v1.V2D()); 21059 __ Mov(v19.V2D(), v1.V2D()); 21060 __ Mov(v20.V2D(), v2.V2D()); 21061 __ Mov(v21.V2D(), v2.V2D()); 21062 __ Mov(v22.V2D(), v3.V2D()); 21063 __ Mov(v23.V2D(), v4.V2D()); 21064 __ Mov(v24.V2D(), v3.V2D()); 21065 __ Mov(v25.V2D(), v4.V2D()); 21066 21067 __ Ssra(v16.V8B(), v0.V8B(), 4); 21068 __ Ssra(v17.V16B(), v0.V16B(), 4); 21069 21070 __ Ssra(v18.V4H(), v1.V4H(), 8); 21071 __ Ssra(v19.V8H(), v1.V8H(), 8); 21072 21073 __ Ssra(v20.V2S(), v2.V2S(), 16); 21074 __ Ssra(v21.V4S(), v2.V4S(), 16); 21075 21076 __ Ssra(v22.V2D(), v3.V2D(), 32); 21077 __ Ssra(v23.V2D(), v4.V2D(), 32); 21078 21079 __ Ssra(d24, d3, 48); 21080 21081 END(); 21082 21083 RUN(); 21084 21085 ASSERT_EQUAL_128(0x0000000000000000, 0x7879fe0001867879, q16); 21086 ASSERT_EQUAL_128(0x860100fe79788601, 0x7879fe0001867879, q17); 21087 ASSERT_EQUAL_128(0x0000000000000000, 0xfffe00000001807e, q18); 21088 ASSERT_EQUAL_128(0x7f807f81fffe0000, 0xfffe00000001807e, q19); 21089 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20); 21090 ASSERT_EQUAL_128(0x7fff8000fffffffe, 0x0000000080007ffe, q21); 21091 ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007ffffffe, q22); 21092 ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23); 21093 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24); 21094 TEARDOWN(); 21095 } 21096 21097 TEST(neon_srsra) { 21098 SETUP(); 21099 21100 START(); 21101 21102 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 21103 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 21104 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 21105 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 21106 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 21107 21108 __ Mov(v16.V2D(), v0.V2D()); 21109 __ Mov(v17.V2D(), v0.V2D()); 21110 __ Mov(v18.V2D(), v1.V2D()); 21111 __ Mov(v19.V2D(), v1.V2D()); 21112 __ Mov(v20.V2D(), v2.V2D()); 21113 __ Mov(v21.V2D(), v2.V2D()); 21114 __ Mov(v22.V2D(), v3.V2D()); 21115 __ Mov(v23.V2D(), v4.V2D()); 21116 __ Mov(v24.V2D(), v3.V2D()); 21117 __ Mov(v25.V2D(), v4.V2D()); 21118 21119 __ Srsra(v16.V8B(), v0.V8B(), 4); 21120 __ Srsra(v17.V16B(), v0.V16B(), 4); 21121 21122 __ Srsra(v18.V4H(), v1.V4H(), 8); 21123 __ Srsra(v19.V8H(), v1.V8H(), 8); 21124 21125 __ Srsra(v20.V2S(), v2.V2S(), 16); 21126 __ Srsra(v21.V4S(), v2.V4S(), 16); 21127 21128 __ Srsra(v22.V2D(), v3.V2D(), 32); 21129 __ Srsra(v23.V2D(), v4.V2D(), 32); 21130 21131 __ Srsra(d24, d3, 48); 21132 21133 END(); 21134 21135 RUN(); 21136 21137 ASSERT_EQUAL_128(0x0000000000000000, 0x7879ff0001877879, q16); 21138 ASSERT_EQUAL_128(0x870100ff79788701, 0x7879ff0001877879, q17); 21139 ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000001807f, q18); 21140 ASSERT_EQUAL_128(0x7f807f81ffff0000, 0xffff00000001807f, q19); 21141 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20); 21142 ASSERT_EQUAL_128(0x7fff8000ffffffff, 0x0000000080007fff, q21); 21143 ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007fffffff, q22); 21144 ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23); 21145 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24); 21146 21147 TEARDOWN(); 21148 } 21149 21150 TEST(neon_usra) { 21151 SETUP(); 21152 21153 START(); 21154 21155 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 21156 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 21157 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 21158 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 21159 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 21160 21161 __ Mov(v16.V2D(), v0.V2D()); 21162 __ Mov(v17.V2D(), v0.V2D()); 21163 __ Mov(v18.V2D(), v1.V2D()); 21164 __ Mov(v19.V2D(), v1.V2D()); 21165 __ Mov(v20.V2D(), v2.V2D()); 21166 __ Mov(v21.V2D(), v2.V2D()); 21167 __ Mov(v22.V2D(), v3.V2D()); 21168 __ Mov(v23.V2D(), v4.V2D()); 21169 __ Mov(v24.V2D(), v3.V2D()); 21170 __ Mov(v25.V2D(), v4.V2D()); 21171 21172 __ Usra(v16.V8B(), v0.V8B(), 4); 21173 __ Usra(v17.V16B(), v0.V16B(), 4); 21174 21175 __ Usra(v18.V4H(), v1.V4H(), 8); 21176 __ Usra(v19.V8H(), v1.V8H(), 8); 21177 21178 __ Usra(v20.V2S(), v2.V2S(), 16); 21179 __ Usra(v21.V4S(), v2.V4S(), 16); 21180 21181 __ Usra(v22.V2D(), v3.V2D(), 32); 21182 __ Usra(v23.V2D(), v4.V2D(), 32); 21183 21184 __ Usra(d24, d3, 48); 21185 21186 END(); 21187 21188 RUN(); 21189 21190 ASSERT_EQUAL_128(0x0000000000000000, 0x88890e0001868889, q16); 21191 ASSERT_EQUAL_128(0x8601000e89888601, 0x88890e0001868889, q17); 21192 ASSERT_EQUAL_128(0x0000000000000000, 0x00fe00000001807e, q18); 21193 ASSERT_EQUAL_128(0x8080808100fe0000, 0x00fe00000001807e, q19); 21194 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20); 21195 ASSERT_EQUAL_128(0x800080000000fffe, 0x0000000080007ffe, q21); 21196 ASSERT_EQUAL_128(0x8000000080000001, 0x800000007ffffffe, q22); 21197 ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23); 21198 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24); 21199 21200 TEARDOWN(); 21201 } 21202 21203 TEST(neon_ursra) { 21204 SETUP(); 21205 21206 START(); 21207 21208 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081); 21209 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff); 21210 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff); 21211 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff); 21212 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000); 21213 21214 __ Mov(v16.V2D(), v0.V2D()); 21215 __ Mov(v17.V2D(), v0.V2D()); 21216 __ Mov(v18.V2D(), v1.V2D()); 21217 __ Mov(v19.V2D(), v1.V2D()); 21218 __ Mov(v20.V2D(), v2.V2D()); 21219 __ Mov(v21.V2D(), v2.V2D()); 21220 __ Mov(v22.V2D(), v3.V2D()); 21221 __ Mov(v23.V2D(), v4.V2D()); 21222 __ Mov(v24.V2D(), v3.V2D()); 21223 __ Mov(v25.V2D(), v4.V2D()); 21224 21225 __ Ursra(v16.V8B(), v0.V8B(), 4); 21226 __ Ursra(v17.V16B(), v0.V16B(), 4); 21227 21228 __ Ursra(v18.V4H(), v1.V4H(), 8); 21229 __ Ursra(v19.V8H(), v1.V8H(), 8); 21230 21231 __ Ursra(v20.V2S(), v2.V2S(), 16); 21232 __ Ursra(v21.V4S(), v2.V4S(), 16); 21233 21234 __ Ursra(v22.V2D(), v3.V2D(), 32); 21235 __ Ursra(v23.V2D(), v4.V2D(), 32); 21236 21237 __ Ursra(d24, d3, 48); 21238 21239 END(); 21240 21241 RUN(); 21242 21243 ASSERT_EQUAL_128(0x0000000000000000, 0x88890f0001878889, q16); 21244 ASSERT_EQUAL_128(0x8701000f89888701, 0x88890f0001878889, q17); 21245 ASSERT_EQUAL_128(0x0000000000000000, 0x00ff00000001807f, q18); 21246 ASSERT_EQUAL_128(0x8080808100ff0000, 0x00ff00000001807f, q19); 21247 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20); 21248 ASSERT_EQUAL_128(0x800080000000ffff, 0x0000000080007fff, q21); 21249 ASSERT_EQUAL_128(0x8000000080000001, 0x800000007fffffff, q22); 21250 ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23); 21251 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24); 21252 TEARDOWN(); 21253 } 21254 21255 21256 TEST(neon_uqshl_scalar) { 21257 SETUP(); 21258 21259 START(); 21260 21261 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0); 21262 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f); 21263 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001); 21264 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff); 21265 21266 __ Uqshl(b16, b0, b2); 21267 __ Uqshl(b17, b0, b3); 21268 __ Uqshl(b18, b1, b2); 21269 __ Uqshl(b19, b1, b3); 21270 __ Uqshl(h20, h0, h2); 21271 __ Uqshl(h21, h0, h3); 21272 __ Uqshl(h22, h1, h2); 21273 __ Uqshl(h23, h1, h3); 21274 __ Uqshl(s24, s0, s2); 21275 __ Uqshl(s25, s0, s3); 21276 __ Uqshl(s26, s1, s2); 21277 __ Uqshl(s27, s1, s3); 21278 __ Uqshl(d28, d0, d2); 21279 __ Uqshl(d29, d0, d3); 21280 __ Uqshl(d30, d1, d2); 21281 __ Uqshl(d31, d1, d3); 21282 21283 END(); 21284 21285 RUN(); 21286 21287 ASSERT_EQUAL_128(0, 0xff, q16); 21288 ASSERT_EQUAL_128(0, 0x78, q17); 21289 ASSERT_EQUAL_128(0, 0xfe, q18); 21290 ASSERT_EQUAL_128(0, 0x3f, q19); 21291 ASSERT_EQUAL_128(0, 0xffff, q20); 21292 ASSERT_EQUAL_128(0, 0x7878, q21); 21293 ASSERT_EQUAL_128(0, 0xfefe, q22); 21294 ASSERT_EQUAL_128(0, 0x3fbf, q23); 21295 ASSERT_EQUAL_128(0, 0xffffffff, q24); 21296 ASSERT_EQUAL_128(0, 0x78007878, q25); 21297 ASSERT_EQUAL_128(0, 0xfffefefe, q26); 21298 ASSERT_EQUAL_128(0, 0x3fffbfbf, q27); 21299 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28); 21300 ASSERT_EQUAL_128(0, 0x7800000078007878, q29); 21301 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30); 21302 ASSERT_EQUAL_128(0, 0x3fffffffbfffbfbf, q31); 21303 21304 TEARDOWN(); 21305 } 21306 21307 21308 TEST(neon_sqshl_scalar) { 21309 SETUP(); 21310 21311 START(); 21312 21313 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf); 21314 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040); 21315 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001); 21316 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff); 21317 21318 __ Sqshl(b16, b0, b2); 21319 __ Sqshl(b17, b0, b3); 21320 __ Sqshl(b18, b1, b2); 21321 __ Sqshl(b19, b1, b3); 21322 __ Sqshl(h20, h0, h2); 21323 __ Sqshl(h21, h0, h3); 21324 __ Sqshl(h22, h1, h2); 21325 __ Sqshl(h23, h1, h3); 21326 __ Sqshl(s24, s0, s2); 21327 __ Sqshl(s25, s0, s3); 21328 __ Sqshl(s26, s1, s2); 21329 __ Sqshl(s27, s1, s3); 21330 __ Sqshl(d28, d0, d2); 21331 __ Sqshl(d29, d0, d3); 21332 __ Sqshl(d30, d1, d2); 21333 __ Sqshl(d31, d1, d3); 21334 21335 END(); 21336 21337 RUN(); 21338 21339 ASSERT_EQUAL_128(0, 0x80, q16); 21340 ASSERT_EQUAL_128(0, 0xdf, q17); 21341 ASSERT_EQUAL_128(0, 0x7f, q18); 21342 ASSERT_EQUAL_128(0, 0x20, q19); 21343 ASSERT_EQUAL_128(0, 0x8000, q20); 21344 ASSERT_EQUAL_128(0, 0xdfdf, q21); 21345 ASSERT_EQUAL_128(0, 0x7fff, q22); 21346 ASSERT_EQUAL_128(0, 0x2020, q23); 21347 ASSERT_EQUAL_128(0, 0x80000000, q24); 21348 ASSERT_EQUAL_128(0, 0xdfffdfdf, q25); 21349 ASSERT_EQUAL_128(0, 0x7fffffff, q26); 21350 ASSERT_EQUAL_128(0, 0x20002020, q27); 21351 ASSERT_EQUAL_128(0, 0x8000000000000000, q28); 21352 ASSERT_EQUAL_128(0, 0xdfffffffdfffdfdf, q29); 21353 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30); 21354 ASSERT_EQUAL_128(0, 0x2000000020002020, q31); 21355 21356 TEARDOWN(); 21357 } 21358 21359 21360 TEST(neon_urshl_scalar) { 21361 SETUP(); 21362 21363 START(); 21364 21365 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0); 21366 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f); 21367 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001); 21368 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff); 21369 21370 __ Urshl(d28, d0, d2); 21371 __ Urshl(d29, d0, d3); 21372 __ Urshl(d30, d1, d2); 21373 __ Urshl(d31, d1, d3); 21374 21375 END(); 21376 21377 RUN(); 21378 21379 ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q28); 21380 ASSERT_EQUAL_128(0, 0x7800000078007878, q29); 21381 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30); 21382 ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31); 21383 21384 TEARDOWN(); 21385 } 21386 21387 21388 TEST(neon_srshl_scalar) { 21389 SETUP(); 21390 21391 START(); 21392 21393 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf); 21394 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040); 21395 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001); 21396 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff); 21397 21398 __ Srshl(d28, d0, d2); 21399 __ Srshl(d29, d0, d3); 21400 __ Srshl(d30, d1, d2); 21401 __ Srshl(d31, d1, d3); 21402 21403 END(); 21404 21405 RUN(); 21406 21407 ASSERT_EQUAL_128(0, 0x7fffffff7fff7f7e, q28); 21408 ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29); 21409 ASSERT_EQUAL_128(0, 0x8000000080008080, q30); 21410 ASSERT_EQUAL_128(0, 0x2000000020002020, q31); 21411 21412 TEARDOWN(); 21413 } 21414 21415 21416 TEST(neon_uqrshl_scalar) { 21417 SETUP(); 21418 21419 START(); 21420 21421 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0); 21422 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f); 21423 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001); 21424 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff); 21425 21426 __ Uqrshl(b16, b0, b2); 21427 __ Uqrshl(b17, b0, b3); 21428 __ Uqrshl(b18, b1, b2); 21429 __ Uqrshl(b19, b1, b3); 21430 __ Uqrshl(h20, h0, h2); 21431 __ Uqrshl(h21, h0, h3); 21432 __ Uqrshl(h22, h1, h2); 21433 __ Uqrshl(h23, h1, h3); 21434 __ Uqrshl(s24, s0, s2); 21435 __ Uqrshl(s25, s0, s3); 21436 __ Uqrshl(s26, s1, s2); 21437 __ Uqrshl(s27, s1, s3); 21438 __ Uqrshl(d28, d0, d2); 21439 __ Uqrshl(d29, d0, d3); 21440 __ Uqrshl(d30, d1, d2); 21441 __ Uqrshl(d31, d1, d3); 21442 21443 END(); 21444 21445 RUN(); 21446 21447 ASSERT_EQUAL_128(0, 0xff, q16); 21448 ASSERT_EQUAL_128(0, 0x78, q17); 21449 ASSERT_EQUAL_128(0, 0xfe, q18); 21450 ASSERT_EQUAL_128(0, 0x40, q19); 21451 ASSERT_EQUAL_128(0, 0xffff, q20); 21452 ASSERT_EQUAL_128(0, 0x7878, q21); 21453 ASSERT_EQUAL_128(0, 0xfefe, q22); 21454 ASSERT_EQUAL_128(0, 0x3fc0, q23); 21455 ASSERT_EQUAL_128(0, 0xffffffff, q24); 21456 ASSERT_EQUAL_128(0, 0x78007878, q25); 21457 ASSERT_EQUAL_128(0, 0xfffefefe, q26); 21458 ASSERT_EQUAL_128(0, 0x3fffbfc0, q27); 21459 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28); 21460 ASSERT_EQUAL_128(0, 0x7800000078007878, q29); 21461 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30); 21462 ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31); 21463 21464 TEARDOWN(); 21465 } 21466 21467 21468 TEST(neon_sqrshl_scalar) { 21469 SETUP(); 21470 21471 START(); 21472 21473 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf); 21474 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040); 21475 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001); 21476 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff); 21477 21478 __ Sqrshl(b16, b0, b2); 21479 __ Sqrshl(b17, b0, b3); 21480 __ Sqrshl(b18, b1, b2); 21481 __ Sqrshl(b19, b1, b3); 21482 __ Sqrshl(h20, h0, h2); 21483 __ Sqrshl(h21, h0, h3); 21484 __ Sqrshl(h22, h1, h2); 21485 __ Sqrshl(h23, h1, h3); 21486 __ Sqrshl(s24, s0, s2); 21487 __ Sqrshl(s25, s0, s3); 21488 __ Sqrshl(s26, s1, s2); 21489 __ Sqrshl(s27, s1, s3); 21490 __ Sqrshl(d28, d0, d2); 21491 __ Sqrshl(d29, d0, d3); 21492 __ Sqrshl(d30, d1, d2); 21493 __ Sqrshl(d31, d1, d3); 21494 21495 END(); 21496 21497 RUN(); 21498 21499 ASSERT_EQUAL_128(0, 0x80, q16); 21500 ASSERT_EQUAL_128(0, 0xe0, q17); 21501 ASSERT_EQUAL_128(0, 0x7f, q18); 21502 ASSERT_EQUAL_128(0, 0x20, q19); 21503 ASSERT_EQUAL_128(0, 0x8000, q20); 21504 ASSERT_EQUAL_128(0, 0xdfe0, q21); 21505 ASSERT_EQUAL_128(0, 0x7fff, q22); 21506 ASSERT_EQUAL_128(0, 0x2020, q23); 21507 ASSERT_EQUAL_128(0, 0x80000000, q24); 21508 ASSERT_EQUAL_128(0, 0xdfffdfe0, q25); 21509 ASSERT_EQUAL_128(0, 0x7fffffff, q26); 21510 ASSERT_EQUAL_128(0, 0x20002020, q27); 21511 ASSERT_EQUAL_128(0, 0x8000000000000000, q28); 21512 ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29); 21513 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30); 21514 ASSERT_EQUAL_128(0, 0x2000000020002020, q31); 21515 21516 TEARDOWN(); 21517 } 21518 21519 21520 TEST(neon_uqadd_scalar) { 21521 SETUP(); 21522 21523 START(); 21524 21525 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0); 21526 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f); 21527 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010); 21528 21529 __ Uqadd(b16, b0, b0); 21530 __ Uqadd(b17, b1, b1); 21531 __ Uqadd(b18, b2, b2); 21532 __ Uqadd(h19, h0, h0); 21533 __ Uqadd(h20, h1, h1); 21534 __ Uqadd(h21, h2, h2); 21535 __ Uqadd(s22, s0, s0); 21536 __ Uqadd(s23, s1, s1); 21537 __ Uqadd(s24, s2, s2); 21538 __ Uqadd(d25, d0, d0); 21539 __ Uqadd(d26, d1, d1); 21540 __ Uqadd(d27, d2, d2); 21541 21542 END(); 21543 21544 RUN(); 21545 21546 ASSERT_EQUAL_128(0, 0xff, q16); 21547 ASSERT_EQUAL_128(0, 0xfe, q17); 21548 ASSERT_EQUAL_128(0, 0x20, q18); 21549 ASSERT_EQUAL_128(0, 0xffff, q19); 21550 ASSERT_EQUAL_128(0, 0xfefe, q20); 21551 ASSERT_EQUAL_128(0, 0x2020, q21); 21552 ASSERT_EQUAL_128(0, 0xffffffff, q22); 21553 ASSERT_EQUAL_128(0, 0xfffefefe, q23); 21554 ASSERT_EQUAL_128(0, 0x20002020, q24); 21555 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25); 21556 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q26); 21557 ASSERT_EQUAL_128(0, 0x2000000020002020, q27); 21558 21559 TEARDOWN(); 21560 } 21561 21562 21563 TEST(neon_sqadd_scalar) { 21564 SETUP(); 21565 21566 START(); 21567 21568 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0x8000000180018181); 21569 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f); 21570 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010); 21571 21572 __ Sqadd(b16, b0, b0); 21573 __ Sqadd(b17, b1, b1); 21574 __ Sqadd(b18, b2, b2); 21575 __ Sqadd(h19, h0, h0); 21576 __ Sqadd(h20, h1, h1); 21577 __ Sqadd(h21, h2, h2); 21578 __ Sqadd(s22, s0, s0); 21579 __ Sqadd(s23, s1, s1); 21580 __ Sqadd(s24, s2, s2); 21581 __ Sqadd(d25, d0, d0); 21582 __ Sqadd(d26, d1, d1); 21583 __ Sqadd(d27, d2, d2); 21584 21585 END(); 21586 21587 RUN(); 21588 21589 ASSERT_EQUAL_128(0, 0x80, q16); 21590 ASSERT_EQUAL_128(0, 0x7f, q17); 21591 ASSERT_EQUAL_128(0, 0x20, q18); 21592 ASSERT_EQUAL_128(0, 0x8000, q19); 21593 ASSERT_EQUAL_128(0, 0x7fff, q20); 21594 ASSERT_EQUAL_128(0, 0x2020, q21); 21595 ASSERT_EQUAL_128(0, 0x80000000, q22); 21596 ASSERT_EQUAL_128(0, 0x7fffffff, q23); 21597 ASSERT_EQUAL_128(0, 0x20002020, q24); 21598 ASSERT_EQUAL_128(0, 0x8000000000000000, q25); 21599 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26); 21600 ASSERT_EQUAL_128(0, 0x2000000020002020, q27); 21601 21602 TEARDOWN(); 21603 } 21604 21605 21606 TEST(neon_uqsub_scalar) { 21607 SETUP(); 21608 21609 START(); 21610 21611 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0); 21612 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f); 21613 21614 __ Uqsub(b16, b0, b0); 21615 __ Uqsub(b17, b0, b1); 21616 __ Uqsub(b18, b1, b0); 21617 __ Uqsub(h19, h0, h0); 21618 __ Uqsub(h20, h0, h1); 21619 __ Uqsub(h21, h1, h0); 21620 __ Uqsub(s22, s0, s0); 21621 __ Uqsub(s23, s0, s1); 21622 __ Uqsub(s24, s1, s0); 21623 __ Uqsub(d25, d0, d0); 21624 __ Uqsub(d26, d0, d1); 21625 __ Uqsub(d27, d1, d0); 21626 21627 END(); 21628 21629 RUN(); 21630 21631 ASSERT_EQUAL_128(0, 0, q16); 21632 ASSERT_EQUAL_128(0, 0x71, q17); 21633 ASSERT_EQUAL_128(0, 0, q18); 21634 21635 ASSERT_EQUAL_128(0, 0, q19); 21636 ASSERT_EQUAL_128(0, 0x7171, q20); 21637 ASSERT_EQUAL_128(0, 0, q21); 21638 21639 ASSERT_EQUAL_128(0, 0, q22); 21640 ASSERT_EQUAL_128(0, 0x70017171, q23); 21641 ASSERT_EQUAL_128(0, 0, q24); 21642 21643 ASSERT_EQUAL_128(0, 0, q25); 21644 ASSERT_EQUAL_128(0, 0x7000000170017171, q26); 21645 ASSERT_EQUAL_128(0, 0, q27); 21646 21647 TEARDOWN(); 21648 } 21649 21650 21651 TEST(neon_sqsub_scalar) { 21652 SETUP(); 21653 21654 START(); 21655 21656 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0); 21657 __ Movi(v1.V2D(), 0x5555555555555555, 0x7eeeeeee7eee7e7e); 21658 21659 __ Sqsub(b16, b0, b0); 21660 __ Sqsub(b17, b0, b1); 21661 __ Sqsub(b18, b1, b0); 21662 __ Sqsub(h19, h0, h0); 21663 __ Sqsub(h20, h0, h1); 21664 __ Sqsub(h21, h1, h0); 21665 __ Sqsub(s22, s0, s0); 21666 __ Sqsub(s23, s0, s1); 21667 __ Sqsub(s24, s1, s0); 21668 __ Sqsub(d25, d0, d0); 21669 __ Sqsub(d26, d0, d1); 21670 __ Sqsub(d27, d1, d0); 21671 21672 END(); 21673 21674 RUN(); 21675 21676 ASSERT_EQUAL_128(0, 0, q16); 21677 ASSERT_EQUAL_128(0, 0x80, q17); 21678 ASSERT_EQUAL_128(0, 0x7f, q18); 21679 21680 ASSERT_EQUAL_128(0, 0, q19); 21681 ASSERT_EQUAL_128(0, 0x8000, q20); 21682 ASSERT_EQUAL_128(0, 0x7fff, q21); 21683 21684 ASSERT_EQUAL_128(0, 0, q22); 21685 ASSERT_EQUAL_128(0, 0x80000000, q23); 21686 ASSERT_EQUAL_128(0, 0x7fffffff, q24); 21687 21688 ASSERT_EQUAL_128(0, 0, q25); 21689 ASSERT_EQUAL_128(0, 0x8000000000000000, q26); 21690 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27); 21691 21692 TEARDOWN(); 21693 } 21694 21695 21696 TEST(neon_fmla_fmls) { 21697 SETUP(); 21698 21699 START(); 21700 __ Movi(v0.V2D(), 0x3f80000040000000, 0x4100000000000000); 21701 __ Movi(v1.V2D(), 0x400000003f800000, 0x000000003f800000); 21702 __ Movi(v2.V2D(), 0x3f800000ffffffff, 0x7f800000ff800000); 21703 __ Mov(v16.V16B(), v0.V16B()); 21704 __ Mov(v17.V16B(), v0.V16B()); 21705 __ Mov(v18.V16B(), v0.V16B()); 21706 __ Mov(v19.V16B(), v0.V16B()); 21707 __ Mov(v20.V16B(), v0.V16B()); 21708 __ Mov(v21.V16B(), v0.V16B()); 21709 21710 __ Fmla(v16.V2S(), v1.V2S(), v2.V2S()); 21711 __ Fmla(v17.V4S(), v1.V4S(), v2.V4S()); 21712 __ Fmla(v18.V2D(), v1.V2D(), v2.V2D()); 21713 __ Fmls(v19.V2S(), v1.V2S(), v2.V2S()); 21714 __ Fmls(v20.V4S(), v1.V4S(), v2.V4S()); 21715 __ Fmls(v21.V2D(), v1.V2D(), v2.V2D()); 21716 END(); 21717 21718 RUN(); 21719 21720 ASSERT_EQUAL_128(0x0000000000000000, 0x7fc00000ff800000, q16); 21721 ASSERT_EQUAL_128(0x40400000ffffffff, 0x7fc00000ff800000, q17); 21722 ASSERT_EQUAL_128(0x3f9800015f8003f7, 0x41000000000000fe, q18); 21723 ASSERT_EQUAL_128(0x0000000000000000, 0x7fc000007f800000, q19); 21724 ASSERT_EQUAL_128(0xbf800000ffffffff, 0x7fc000007f800000, q20); 21725 ASSERT_EQUAL_128(0xbf8000023f0007ee, 0x40fffffffffffe04, q21); 21726 21727 TEARDOWN(); 21728 } 21729 21730 21731 TEST(neon_fmulx_scalar) { 21732 SETUP(); 21733 21734 START(); 21735 __ Fmov(s0, 2.0); 21736 __ Fmov(s1, 0.5); 21737 __ Fmov(s2, 0.0); 21738 __ Fmov(s3, -0.0); 21739 __ Fmov(s4, kFP32PositiveInfinity); 21740 __ Fmov(s5, kFP32NegativeInfinity); 21741 __ Fmulx(s16, s0, s1); 21742 __ Fmulx(s17, s2, s4); 21743 __ Fmulx(s18, s2, s5); 21744 __ Fmulx(s19, s3, s4); 21745 __ Fmulx(s20, s3, s5); 21746 21747 __ Fmov(d21, 2.0); 21748 __ Fmov(d22, 0.5); 21749 __ Fmov(d23, 0.0); 21750 __ Fmov(d24, -0.0); 21751 __ Fmov(d25, kFP64PositiveInfinity); 21752 __ Fmov(d26, kFP64NegativeInfinity); 21753 __ Fmulx(d27, d21, d22); 21754 __ Fmulx(d28, d23, d25); 21755 __ Fmulx(d29, d23, d26); 21756 __ Fmulx(d30, d24, d25); 21757 __ Fmulx(d31, d24, d26); 21758 END(); 21759 21760 RUN(); 21761 21762 ASSERT_EQUAL_FP32(1.0, s16); 21763 ASSERT_EQUAL_FP32(2.0, s17); 21764 ASSERT_EQUAL_FP32(-2.0, s18); 21765 ASSERT_EQUAL_FP32(-2.0, s19); 21766 ASSERT_EQUAL_FP32(2.0, s20); 21767 ASSERT_EQUAL_FP64(1.0, d27); 21768 ASSERT_EQUAL_FP64(2.0, d28); 21769 ASSERT_EQUAL_FP64(-2.0, d29); 21770 ASSERT_EQUAL_FP64(-2.0, d30); 21771 ASSERT_EQUAL_FP64(2.0, d31); 21772 21773 TEARDOWN(); 21774 } 21775 21776 21777 // We currently disable tests for CRC32 instructions when running natively. 21778 // Support for this family of instruction is optional, and so native platforms 21779 // may simply fail to execute the test. 21780 // TODO: Run the test on native platforms where the CRC32 instructions are 21781 // available. 21782 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 21783 TEST(crc32b) { 21784 SETUP(); 21785 START(); 21786 21787 __ Mov(w0, 0); 21788 __ Mov(w1, 0); 21789 __ Crc32b(w10, w0, w1); 21790 21791 __ Mov(w0, 0x1); 21792 __ Mov(w1, 0x138); 21793 __ Crc32b(w11, w0, w1); 21794 21795 __ Mov(w0, 0x1); 21796 __ Mov(w1, 0x38); 21797 __ Crc32b(w12, w0, w1); 21798 21799 __ Mov(w0, 0); 21800 __ Mov(w1, 128); 21801 __ Crc32b(w13, w0, w1); 21802 21803 __ Mov(w0, UINT32_MAX); 21804 __ Mov(w1, 255); 21805 __ Crc32b(w14, w0, w1); 21806 21807 __ Mov(w0, 0x00010001); 21808 __ Mov(w1, 0x10001000); 21809 __ Crc32b(w15, w0, w1); 21810 21811 END(); 21812 RUN(); 21813 21814 ASSERT_EQUAL_64(0x0, x10); 21815 ASSERT_EQUAL_64(0x5f058808, x11); 21816 ASSERT_EQUAL_64(0x5f058808, x12); 21817 ASSERT_EQUAL_64(0xedb88320, x13); 21818 ASSERT_EQUAL_64(0x00ffffff, x14); 21819 ASSERT_EQUAL_64(0x77073196, x15); 21820 21821 TEARDOWN(); 21822 } 21823 21824 21825 TEST(crc32h) { 21826 SETUP(); 21827 START(); 21828 21829 __ Mov(w0, 0); 21830 __ Mov(w1, 0); 21831 __ Crc32h(w10, w0, w1); 21832 21833 __ Mov(w0, 0x1); 21834 __ Mov(w1, 0x10038); 21835 __ Crc32h(w11, w0, w1); 21836 21837 __ Mov(w0, 0x1); 21838 __ Mov(w1, 0x38); 21839 __ Crc32h(w12, w0, w1); 21840 21841 __ Mov(w0, 0); 21842 __ Mov(w1, 128); 21843 __ Crc32h(w13, w0, w1); 21844 21845 __ Mov(w0, UINT32_MAX); 21846 __ Mov(w1, 255); 21847 __ Crc32h(w14, w0, w1); 21848 21849 __ Mov(w0, 0x00010001); 21850 __ Mov(w1, 0x10001000); 21851 __ Crc32h(w15, w0, w1); 21852 21853 END(); 21854 RUN(); 21855 21856 ASSERT_EQUAL_64(0x0, x10); 21857 ASSERT_EQUAL_64(0x0e848dba, x11); 21858 ASSERT_EQUAL_64(0x0e848dba, x12); 21859 ASSERT_EQUAL_64(0x3b83984b, x13); 21860 ASSERT_EQUAL_64(0x2d021072, x14); 21861 ASSERT_EQUAL_64(0x04ac2124, x15); 21862 21863 TEARDOWN(); 21864 } 21865 21866 21867 TEST(crc32w) { 21868 SETUP(); 21869 START(); 21870 21871 __ Mov(w0, 0); 21872 __ Mov(w1, 0); 21873 __ Crc32w(w10, w0, w1); 21874 21875 __ Mov(w0, 0x1); 21876 __ Mov(w1, 0x80000031); 21877 __ Crc32w(w11, w0, w1); 21878 21879 __ Mov(w0, 0); 21880 __ Mov(w1, 128); 21881 __ Crc32w(w13, w0, w1); 21882 21883 __ Mov(w0, UINT32_MAX); 21884 __ Mov(w1, 255); 21885 __ Crc32w(w14, w0, w1); 21886 21887 __ Mov(w0, 0x00010001); 21888 __ Mov(w1, 0x10001000); 21889 __ Crc32w(w15, w0, w1); 21890 21891 END(); 21892 RUN(); 21893 21894 ASSERT_EQUAL_64(0x0, x10); 21895 ASSERT_EQUAL_64(0x1d937b81, x11); 21896 ASSERT_EQUAL_64(0xed59b63b, x13); 21897 ASSERT_EQUAL_64(0x00be2612, x14); 21898 ASSERT_EQUAL_64(0xa036e530, x15); 21899 21900 TEARDOWN(); 21901 } 21902 21903 21904 TEST(crc32x) { 21905 SETUP(); 21906 START(); 21907 21908 __ Mov(w0, 0); 21909 __ Mov(x1, 0); 21910 __ Crc32x(w10, w0, x1); 21911 21912 __ Mov(w0, 0x1); 21913 __ Mov(x1, UINT64_C(0x0000000800000031)); 21914 __ Crc32x(w11, w0, x1); 21915 21916 __ Mov(w0, 0); 21917 __ Mov(x1, 128); 21918 __ Crc32x(w13, w0, x1); 21919 21920 __ Mov(w0, UINT32_MAX); 21921 __ Mov(x1, 255); 21922 __ Crc32x(w14, w0, x1); 21923 21924 __ Mov(w0, 0x00010001); 21925 __ Mov(x1, UINT64_C(0x1000100000000000)); 21926 __ Crc32x(w15, w0, x1); 21927 21928 END(); 21929 RUN(); 21930 21931 ASSERT_EQUAL_64(0x0, x10); 21932 ASSERT_EQUAL_64(0x40797b92, x11); 21933 ASSERT_EQUAL_64(0x533b85da, x13); 21934 ASSERT_EQUAL_64(0xbc962670, x14); 21935 ASSERT_EQUAL_64(0x0667602f, x15); 21936 21937 TEARDOWN(); 21938 } 21939 21940 21941 TEST(crc32cb) { 21942 SETUP(); 21943 START(); 21944 21945 __ Mov(w0, 0); 21946 __ Mov(w1, 0); 21947 __ Crc32cb(w10, w0, w1); 21948 21949 __ Mov(w0, 0x1); 21950 __ Mov(w1, 0x138); 21951 __ Crc32cb(w11, w0, w1); 21952 21953 __ Mov(w0, 0x1); 21954 __ Mov(w1, 0x38); 21955 __ Crc32cb(w12, w0, w1); 21956 21957 __ Mov(w0, 0); 21958 __ Mov(w1, 128); 21959 __ Crc32cb(w13, w0, w1); 21960 21961 __ Mov(w0, UINT32_MAX); 21962 __ Mov(w1, 255); 21963 __ Crc32cb(w14, w0, w1); 21964 21965 __ Mov(w0, 0x00010001); 21966 __ Mov(w1, 0x10001000); 21967 __ Crc32cb(w15, w0, w1); 21968 21969 END(); 21970 RUN(); 21971 21972 ASSERT_EQUAL_64(0x0, x10); 21973 ASSERT_EQUAL_64(0x4851927d, x11); 21974 ASSERT_EQUAL_64(0x4851927d, x12); 21975 ASSERT_EQUAL_64(0x82f63b78, x13); 21976 ASSERT_EQUAL_64(0x00ffffff, x14); 21977 ASSERT_EQUAL_64(0xf26b8203, x15); 21978 21979 TEARDOWN(); 21980 } 21981 21982 21983 TEST(crc32ch) { 21984 SETUP(); 21985 START(); 21986 21987 __ Mov(w0, 0); 21988 __ Mov(w1, 0); 21989 __ Crc32ch(w10, w0, w1); 21990 21991 __ Mov(w0, 0x1); 21992 __ Mov(w1, 0x10038); 21993 __ Crc32ch(w11, w0, w1); 21994 21995 __ Mov(w0, 0x1); 21996 __ Mov(w1, 0x38); 21997 __ Crc32ch(w12, w0, w1); 21998 21999 __ Mov(w0, 0); 22000 __ Mov(w1, 128); 22001 __ Crc32ch(w13, w0, w1); 22002 22003 __ Mov(w0, UINT32_MAX); 22004 __ Mov(w1, 255); 22005 __ Crc32ch(w14, w0, w1); 22006 22007 __ Mov(w0, 0x00010001); 22008 __ Mov(w1, 0x10001000); 22009 __ Crc32ch(w15, w0, w1); 22010 22011 END(); 22012 RUN(); 22013 22014 ASSERT_EQUAL_64(0x0, x10); 22015 ASSERT_EQUAL_64(0xcef8494c, x11); 22016 ASSERT_EQUAL_64(0xcef8494c, x12); 22017 ASSERT_EQUAL_64(0xfbc3faf9, x13); 22018 ASSERT_EQUAL_64(0xad7dacae, x14); 22019 ASSERT_EQUAL_64(0x03fc5f19, x15); 22020 22021 TEARDOWN(); 22022 } 22023 22024 22025 TEST(crc32cw) { 22026 SETUP(); 22027 START(); 22028 22029 __ Mov(w0, 0); 22030 __ Mov(w1, 0); 22031 __ Crc32cw(w10, w0, w1); 22032 22033 __ Mov(w0, 0x1); 22034 __ Mov(w1, 0x80000031); 22035 __ Crc32cw(w11, w0, w1); 22036 22037 __ Mov(w0, 0); 22038 __ Mov(w1, 128); 22039 __ Crc32cw(w13, w0, w1); 22040 22041 __ Mov(w0, UINT32_MAX); 22042 __ Mov(w1, 255); 22043 __ Crc32cw(w14, w0, w1); 22044 22045 __ Mov(w0, 0x00010001); 22046 __ Mov(w1, 0x10001000); 22047 __ Crc32cw(w15, w0, w1); 22048 22049 END(); 22050 RUN(); 22051 22052 ASSERT_EQUAL_64(0x0, x10); 22053 ASSERT_EQUAL_64(0xbcb79ece, x11); 22054 ASSERT_EQUAL_64(0x52a0c93f, x13); 22055 ASSERT_EQUAL_64(0x9f9b5c7a, x14); 22056 ASSERT_EQUAL_64(0xae1b882a, x15); 22057 22058 TEARDOWN(); 22059 } 22060 22061 22062 TEST(crc32cx) { 22063 SETUP(); 22064 START(); 22065 22066 __ Mov(w0, 0); 22067 __ Mov(x1, 0); 22068 __ Crc32cx(w10, w0, x1); 22069 22070 __ Mov(w0, 0x1); 22071 __ Mov(x1, UINT64_C(0x0000000800000031)); 22072 __ Crc32cx(w11, w0, x1); 22073 22074 __ Mov(w0, 0); 22075 __ Mov(x1, 128); 22076 __ Crc32cx(w13, w0, x1); 22077 22078 __ Mov(w0, UINT32_MAX); 22079 __ Mov(x1, 255); 22080 __ Crc32cx(w14, w0, x1); 22081 22082 __ Mov(w0, 0x00010001); 22083 __ Mov(x1, UINT64_C(0x1000100000000000)); 22084 __ Crc32cx(w15, w0, x1); 22085 22086 END(); 22087 RUN(); 22088 22089 ASSERT_EQUAL_64(0x0, x10); 22090 ASSERT_EQUAL_64(0x7f320fcb, x11); 22091 ASSERT_EQUAL_64(0x34019664, x13); 22092 ASSERT_EQUAL_64(0x6cc27dd0, x14); 22093 ASSERT_EQUAL_64(0xc6f0acdb, x15); 22094 22095 TEARDOWN(); 22096 } 22097 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64 22098 22099 22100 TEST(neon_fabd_scalar) { 22101 SETUP(); 22102 22103 START(); 22104 __ Fmov(s0, 2.0); 22105 __ Fmov(s1, 0.5); 22106 __ Fmov(s2, 0.0); 22107 __ Fmov(s3, -0.0); 22108 __ Fmov(s4, kFP32PositiveInfinity); 22109 __ Fmov(s5, kFP32NegativeInfinity); 22110 __ Fabd(s16, s1, s0); 22111 __ Fabd(s17, s2, s3); 22112 __ Fabd(s18, s2, s5); 22113 __ Fabd(s19, s3, s4); 22114 __ Fabd(s20, s3, s5); 22115 22116 __ Fmov(d21, 2.0); 22117 __ Fmov(d22, 0.5); 22118 __ Fmov(d23, 0.0); 22119 __ Fmov(d24, -0.0); 22120 __ Fmov(d25, kFP64PositiveInfinity); 22121 __ Fmov(d26, kFP64NegativeInfinity); 22122 __ Fabd(d27, d21, d22); 22123 __ Fabd(d28, d23, d24); 22124 __ Fabd(d29, d23, d26); 22125 __ Fabd(d30, d24, d25); 22126 __ Fabd(d31, d24, d26); 22127 END(); 22128 22129 RUN(); 22130 22131 ASSERT_EQUAL_FP32(1.5, s16); 22132 ASSERT_EQUAL_FP32(0.0, s17); 22133 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s18); 22134 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s19); 22135 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s20); 22136 ASSERT_EQUAL_FP64(1.5, d27); 22137 ASSERT_EQUAL_FP64(0.0, d28); 22138 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d29); 22139 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d30); 22140 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d31); 22141 22142 TEARDOWN(); 22143 } 22144 22145 22146 TEST(neon_faddp_scalar) { 22147 SETUP(); 22148 22149 START(); 22150 __ Movi(d0, 0x3f80000040000000); 22151 __ Movi(d1, 0xff8000007f800000); 22152 __ Movi(d2, 0x0000000080000000); 22153 __ Faddp(s0, v0.V2S()); 22154 __ Faddp(s1, v1.V2S()); 22155 __ Faddp(s2, v2.V2S()); 22156 22157 __ Movi(v3.V2D(), 0xc000000000000000, 0x4000000000000000); 22158 __ Movi(v4.V2D(), 0xfff8000000000000, 0x7ff8000000000000); 22159 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000); 22160 __ Faddp(d3, v3.V2D()); 22161 __ Faddp(d4, v4.V2D()); 22162 __ Faddp(d5, v5.V2D()); 22163 END(); 22164 22165 RUN(); 22166 22167 ASSERT_EQUAL_FP32(3.0, s0); 22168 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s1); 22169 ASSERT_EQUAL_FP32(0.0, s2); 22170 ASSERT_EQUAL_FP64(0.0, d3); 22171 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d4); 22172 ASSERT_EQUAL_FP64(0.0, d5); 22173 22174 TEARDOWN(); 22175 } 22176 22177 22178 TEST(neon_fmaxp_scalar) { 22179 SETUP(); 22180 22181 START(); 22182 __ Movi(d0, 0x3f80000040000000); 22183 __ Movi(d1, 0xff8000007f800000); 22184 __ Movi(d2, 0x7fc00000ff800000); 22185 __ Fmaxp(s0, v0.V2S()); 22186 __ Fmaxp(s1, v1.V2S()); 22187 __ Fmaxp(s2, v2.V2S()); 22188 22189 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000); 22190 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000); 22191 __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000); 22192 __ Fmaxp(d3, v3.V2D()); 22193 __ Fmaxp(d4, v4.V2D()); 22194 __ Fmaxp(d5, v5.V2D()); 22195 END(); 22196 22197 RUN(); 22198 22199 ASSERT_EQUAL_FP32(2.0, s0); 22200 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1); 22201 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2); 22202 ASSERT_EQUAL_FP64(2.0, d3); 22203 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4); 22204 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5); 22205 22206 TEARDOWN(); 22207 } 22208 22209 22210 TEST(neon_fmaxnmp_scalar) { 22211 SETUP(); 22212 22213 START(); 22214 __ Movi(d0, 0x3f80000040000000); 22215 __ Movi(d1, 0xff8000007f800000); 22216 __ Movi(d2, 0x7fc00000ff800000); 22217 __ Fmaxnmp(s0, v0.V2S()); 22218 __ Fmaxnmp(s1, v1.V2S()); 22219 __ Fmaxnmp(s2, v2.V2S()); 22220 22221 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000); 22222 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000); 22223 __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000); 22224 __ Fmaxnmp(d3, v3.V2D()); 22225 __ Fmaxnmp(d4, v4.V2D()); 22226 __ Fmaxnmp(d5, v5.V2D()); 22227 END(); 22228 22229 RUN(); 22230 22231 ASSERT_EQUAL_FP32(2.0, s0); 22232 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1); 22233 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2); 22234 ASSERT_EQUAL_FP64(2.0, d3); 22235 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4); 22236 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5); 22237 22238 TEARDOWN(); 22239 } 22240 22241 22242 TEST(neon_fminp_scalar) { 22243 SETUP(); 22244 22245 START(); 22246 __ Movi(d0, 0x3f80000040000000); 22247 __ Movi(d1, 0xff8000007f800000); 22248 __ Movi(d2, 0x7fc00000ff800000); 22249 __ Fminp(s0, v0.V2S()); 22250 __ Fminp(s1, v1.V2S()); 22251 __ Fminp(s2, v2.V2S()); 22252 22253 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000); 22254 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000); 22255 __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000); 22256 __ Fminp(d3, v3.V2D()); 22257 __ Fminp(d4, v4.V2D()); 22258 __ Fminp(d5, v5.V2D()); 22259 END(); 22260 22261 RUN(); 22262 22263 ASSERT_EQUAL_FP32(1.0, s0); 22264 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1); 22265 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2); 22266 ASSERT_EQUAL_FP64(1.0, d3); 22267 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4); 22268 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5); 22269 22270 TEARDOWN(); 22271 } 22272 22273 22274 TEST(neon_fminnmp_scalar) { 22275 SETUP(); 22276 22277 START(); 22278 __ Movi(d0, 0x3f80000040000000); 22279 __ Movi(d1, 0xff8000007f800000); 22280 __ Movi(d2, 0x7fc00000ff800000); 22281 __ Fminnmp(s0, v0.V2S()); 22282 __ Fminnmp(s1, v1.V2S()); 22283 __ Fminnmp(s2, v2.V2S()); 22284 22285 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000); 22286 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000); 22287 __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000); 22288 __ Fminnmp(d3, v3.V2D()); 22289 __ Fminnmp(d4, v4.V2D()); 22290 __ Fminnmp(d5, v5.V2D()); 22291 END(); 22292 22293 RUN(); 22294 22295 ASSERT_EQUAL_FP32(1.0, s0); 22296 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1); 22297 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2); 22298 ASSERT_EQUAL_FP64(1.0, d3); 22299 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4); 22300 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5); 22301 22302 TEARDOWN(); 22303 } 22304 22305 22306 TEST(neon_tbl) { 22307 SETUP(); 22308 22309 START(); 22310 __ Movi(v30.V2D(), 0xbf561e188b1280e9, 0xbd542b8cbd24e8e8); 22311 __ Movi(v31.V2D(), 0xb5e9883d2c88a46d, 0x12276d5b614c915e); 22312 __ Movi(v0.V2D(), 0xc45b7782bc5ecd72, 0x5dd4fe5a4bc6bf5e); 22313 __ Movi(v1.V2D(), 0x1e3254094bd1746a, 0xf099ecf50e861c80); 22314 22315 __ Movi(v4.V2D(), 0xf80c030100031f16, 0x00070504031201ff); 22316 __ Movi(v5.V2D(), 0x1f01001afc14202a, 0x2a081e1b0c02020c); 22317 __ Movi(v6.V2D(), 0x353f1a13022a2360, 0x2c464a00203a0a33); 22318 __ Movi(v7.V2D(), 0x64801a1c054cf30d, 0x793a2c052e213739); 22319 22320 __ Movi(v8.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842); 22321 __ Movi(v9.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443); 22322 __ Movi(v10.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2); 22323 __ Movi(v11.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8); 22324 __ Movi(v12.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b); 22325 __ Movi(v13.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669); 22326 __ Movi(v14.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682); 22327 __ Movi(v15.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd); 22328 22329 __ Tbl(v8.V16B(), v1.V16B(), v4.V16B()); 22330 __ Tbl(v9.V16B(), v0.V16B(), v1.V16B(), v5.V16B()); 22331 __ Tbl(v10.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B()); 22332 __ Tbl(v11.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B()); 22333 __ Tbl(v12.V8B(), v1.V16B(), v4.V8B()); 22334 __ Tbl(v13.V8B(), v0.V16B(), v1.V16B(), v5.V8B()); 22335 __ Tbl(v14.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B()); 22336 __ Tbl(v15.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B()); 22337 22338 __ Movi(v16.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842); 22339 __ Movi(v17.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443); 22340 __ Movi(v18.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2); 22341 __ Movi(v19.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8); 22342 __ Movi(v20.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b); 22343 __ Movi(v21.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669); 22344 __ Movi(v22.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682); 22345 __ Movi(v23.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd); 22346 22347 __ Tbx(v16.V16B(), v1.V16B(), v4.V16B()); 22348 __ Tbx(v17.V16B(), v0.V16B(), v1.V16B(), v5.V16B()); 22349 __ Tbx(v18.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B()); 22350 __ Tbx(v19.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B()); 22351 __ Tbx(v20.V8B(), v1.V16B(), v4.V8B()); 22352 __ Tbx(v21.V8B(), v0.V16B(), v1.V16B(), v5.V8B()); 22353 __ Tbx(v22.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B()); 22354 __ Tbx(v23.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B()); 22355 END(); 22356 22357 RUN(); 22358 22359 ASSERT_EQUAL_128(0x00090e1c800e0000, 0x80f0ecf50e001c00, v8); 22360 ASSERT_EQUAL_128(0x1ebf5ed100f50000, 0x0072324b82c6c682, v9); 22361 ASSERT_EQUAL_128(0x00005e4b4cd10e00, 0x0900005e80008800, v10); 22362 ASSERT_EQUAL_128(0x0000883d2b00001e, 0x00d1822b5bbff074, v11); 22363 ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e001c00, v12); 22364 ASSERT_EQUAL_128(0x0000000000000000, 0x0072324b82c6c682, v13); 22365 ASSERT_EQUAL_128(0x0000000000000000, 0x0900005e80008800, v14); 22366 ASSERT_EQUAL_128(0x0000000000000000, 0x00d1822b5bbff074, v15); 22367 22368 ASSERT_EQUAL_128(0xb7090e1c800e8f13, 0x80f0ecf50e961c42, v16); 22369 ASSERT_EQUAL_128(0x1ebf5ed1c6f547ec, 0x8e72324b82c6c682, v17); 22370 ASSERT_EQUAL_128(0x9bd25e4b4cd10e8f, 0x0943d05e802688d2, v18); 22371 ASSERT_EQUAL_128(0xc31d883d2b39301e, 0x1ed1822b5bbff074, v19); 22372 ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e011c3b, v20); 22373 ASSERT_EQUAL_128(0x0000000000000000, 0x2072324b82c6c682, v21); 22374 ASSERT_EQUAL_128(0x0000000000000000, 0x0946cd5e80ba8882, v22); 22375 ASSERT_EQUAL_128(0x0000000000000000, 0xe6d1822b5bbff074, v23); 22376 22377 TEARDOWN(); 22378 } 22379 22380 22381 TEST(regress_cmp_shift_imm) { 22382 SETUP(); 22383 22384 START(); 22385 22386 __ Mov(x0, 0x3d720c8d); 22387 __ Cmp(x0, Operand(0x3d720c8d)); 22388 22389 END(); 22390 RUN(); 22391 22392 ASSERT_EQUAL_NZCV(ZCFlag); 22393 22394 TEARDOWN(); 22395 } 22396 22397 22398 TEST(compute_address) { 22399 SETUP(); 22400 22401 START(); 22402 int64_t base_address = INT64_C(0x123000000abc); 22403 int64_t reg_offset = INT64_C(0x1087654321); 22404 Register base = x0; 22405 Register offset = x1; 22406 22407 __ Mov(base, base_address); 22408 __ Mov(offset, reg_offset); 22409 22410 22411 __ ComputeAddress(x2, MemOperand(base, 0)); 22412 __ ComputeAddress(x3, MemOperand(base, 8)); 22413 __ ComputeAddress(x4, MemOperand(base, -100)); 22414 22415 __ ComputeAddress(x5, MemOperand(base, offset)); 22416 __ ComputeAddress(x6, MemOperand(base, offset, LSL, 2)); 22417 __ ComputeAddress(x7, MemOperand(base, offset, LSL, 4)); 22418 __ ComputeAddress(x8, MemOperand(base, offset, LSL, 8)); 22419 22420 __ ComputeAddress(x9, MemOperand(base, offset, SXTW)); 22421 __ ComputeAddress(x10, MemOperand(base, offset, UXTW, 1)); 22422 __ ComputeAddress(x11, MemOperand(base, offset, SXTW, 2)); 22423 __ ComputeAddress(x12, MemOperand(base, offset, UXTW, 3)); 22424 22425 END(); 22426 22427 RUN(); 22428 22429 ASSERT_EQUAL_64(base_address, base); 22430 22431 ASSERT_EQUAL_64(INT64_C(0x123000000abc), x2); 22432 ASSERT_EQUAL_64(INT64_C(0x123000000ac4), x3); 22433 ASSERT_EQUAL_64(INT64_C(0x123000000a58), x4); 22434 22435 ASSERT_EQUAL_64(INT64_C(0x124087654ddd), x5); 22436 ASSERT_EQUAL_64(INT64_C(0x12721d951740), x6); 22437 ASSERT_EQUAL_64(INT64_C(0x133876543ccc), x7); 22438 ASSERT_EQUAL_64(INT64_C(0x22b765432bbc), x8); 22439 22440 ASSERT_EQUAL_64(INT64_C(0x122f87654ddd), x9); 22441 ASSERT_EQUAL_64(INT64_C(0x12310eca90fe), x10); 22442 ASSERT_EQUAL_64(INT64_C(0x122e1d951740), x11); 22443 ASSERT_EQUAL_64(INT64_C(0x12343b2a23c4), x12); 22444 22445 TEARDOWN(); 22446 } 22447 22448 22449 TEST(far_branch_backward) { 22450 // Test that the MacroAssembler correctly resolves backward branches to labels 22451 // that are outside the immediate range of branch instructions. 22452 // Take into account that backward branches can reach one instruction further 22453 // than forward branches. 22454 const int overflow_size = 22455 kInstructionSize + 22456 std::max(Instruction::GetImmBranchForwardRange(TestBranchType), 22457 std::max(Instruction::GetImmBranchForwardRange( 22458 CompareBranchType), 22459 Instruction::GetImmBranchForwardRange(CondBranchType))); 22460 22461 SETUP(); 22462 START(); 22463 22464 Label done, fail; 22465 Label test_tbz, test_cbz, test_bcond; 22466 Label success_tbz, success_cbz, success_bcond; 22467 22468 __ Mov(x0, 0); 22469 __ Mov(x1, 1); 22470 __ Mov(x10, 0); 22471 22472 __ B(&test_tbz); 22473 __ Bind(&success_tbz); 22474 __ Orr(x0, x0, 1 << 0); 22475 __ B(&test_cbz); 22476 __ Bind(&success_cbz); 22477 __ Orr(x0, x0, 1 << 1); 22478 __ B(&test_bcond); 22479 __ Bind(&success_bcond); 22480 __ Orr(x0, x0, 1 << 2); 22481 22482 __ B(&done); 22483 22484 // Generate enough code to overflow the immediate range of the three types of 22485 // branches below. 22486 for (unsigned i = 0; i < overflow_size / kInstructionSize; ++i) { 22487 if (i % 100 == 0) { 22488 // If we do land in this code, we do not want to execute so many nops 22489 // before reaching the end of test (especially if tracing is activated). 22490 __ B(&fail); 22491 } else { 22492 __ Nop(); 22493 } 22494 } 22495 __ B(&fail); 22496 22497 __ Bind(&test_tbz); 22498 __ Tbz(x10, 7, &success_tbz); 22499 __ Bind(&test_cbz); 22500 __ Cbz(x10, &success_cbz); 22501 __ Bind(&test_bcond); 22502 __ Cmp(x10, 0); 22503 __ B(eq, &success_bcond); 22504 22505 // For each out-of-range branch instructions, at least two instructions should 22506 // have been generated. 22507 VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&test_tbz) >= 22508 7 * kInstructionSize); 22509 22510 __ Bind(&fail); 22511 __ Mov(x1, 0); 22512 __ Bind(&done); 22513 22514 END(); 22515 RUN(); 22516 22517 ASSERT_EQUAL_64(0x7, x0); 22518 ASSERT_EQUAL_64(0x1, x1); 22519 22520 TEARDOWN(); 22521 } 22522 22523 22524 TEST(single_veneer) { 22525 SETUP(); 22526 START(); 22527 22528 const int max_range = Instruction::GetImmBranchForwardRange(TestBranchType); 22529 22530 Label success, fail, done; 22531 22532 __ Mov(x0, 0); 22533 __ Mov(x1, 1); 22534 __ Mov(x10, 0); 22535 22536 __ Tbz(x10, 7, &success); 22537 22538 // Generate enough code to overflow the immediate range of the `tbz`. 22539 for (unsigned i = 0; i < max_range / kInstructionSize + 1; ++i) { 22540 if (i % 100 == 0) { 22541 // If we do land in this code, we do not want to execute so many nops 22542 // before reaching the end of test (especially if tracing is activated). 22543 __ B(&fail); 22544 } else { 22545 __ Nop(); 22546 } 22547 } 22548 __ B(&fail); 22549 22550 __ Bind(&success); 22551 __ Mov(x0, 1); 22552 22553 __ B(&done); 22554 __ Bind(&fail); 22555 __ Mov(x1, 0); 22556 __ Bind(&done); 22557 22558 END(); 22559 RUN(); 22560 22561 ASSERT_EQUAL_64(1, x0); 22562 ASSERT_EQUAL_64(1, x1); 22563 22564 TEARDOWN(); 22565 } 22566 22567 22568 TEST(simple_veneers) { 22569 // Test that the MacroAssembler correctly emits veneers for forward branches 22570 // to labels that are outside the immediate range of branch instructions. 22571 const int max_range = 22572 std::max(Instruction::GetImmBranchForwardRange(TestBranchType), 22573 std::max(Instruction::GetImmBranchForwardRange( 22574 CompareBranchType), 22575 Instruction::GetImmBranchForwardRange(CondBranchType))); 22576 22577 SETUP(); 22578 START(); 22579 22580 Label done, fail; 22581 Label test_tbz, test_cbz, test_bcond; 22582 Label success_tbz, success_cbz, success_bcond; 22583 22584 __ Mov(x0, 0); 22585 __ Mov(x1, 1); 22586 __ Mov(x10, 0); 22587 22588 __ Bind(&test_tbz); 22589 __ Tbz(x10, 7, &success_tbz); 22590 __ Bind(&test_cbz); 22591 __ Cbz(x10, &success_cbz); 22592 __ Bind(&test_bcond); 22593 __ Cmp(x10, 0); 22594 __ B(eq, &success_bcond); 22595 22596 // Generate enough code to overflow the immediate range of the three types of 22597 // branches below. 22598 for (unsigned i = 0; i < max_range / kInstructionSize + 1; ++i) { 22599 if (i % 100 == 0) { 22600 // If we do land in this code, we do not want to execute so many nops 22601 // before reaching the end of test (especially if tracing is activated). 22602 __ B(&fail); 22603 } else { 22604 __ Nop(); 22605 } 22606 } 22607 __ B(&fail); 22608 22609 __ Bind(&success_tbz); 22610 __ Orr(x0, x0, 1 << 0); 22611 __ B(&test_cbz); 22612 __ Bind(&success_cbz); 22613 __ Orr(x0, x0, 1 << 1); 22614 __ B(&test_bcond); 22615 __ Bind(&success_bcond); 22616 __ Orr(x0, x0, 1 << 2); 22617 22618 __ B(&done); 22619 __ Bind(&fail); 22620 __ Mov(x1, 0); 22621 __ Bind(&done); 22622 22623 END(); 22624 RUN(); 22625 22626 ASSERT_EQUAL_64(0x7, x0); 22627 ASSERT_EQUAL_64(0x1, x1); 22628 22629 TEARDOWN(); 22630 } 22631 22632 22633 TEST(veneers_stress) { 22634 SETUP(); 22635 START(); 22636 22637 // This is a code generation test stressing the emission of veneers. The code 22638 // generated is not executed. 22639 22640 Label target; 22641 const unsigned max_range = 22642 Instruction::GetImmBranchForwardRange(CondBranchType); 22643 const unsigned iterations = 22644 (max_range + max_range / 4) / (4 * kInstructionSize); 22645 for (unsigned i = 0; i < iterations; i++) { 22646 __ B(&target); 22647 __ B(eq, &target); 22648 __ Cbz(x0, &target); 22649 __ Tbz(x0, 0, &target); 22650 } 22651 __ Bind(&target); 22652 22653 END(); 22654 TEARDOWN(); 22655 } 22656 22657 22658 TEST(veneers_two_out_of_range) { 22659 SETUP(); 22660 START(); 22661 22662 // This is a code generation test. The code generated is not executed. 22663 // Ensure that the MacroAssembler considers unresolved branches to chose when 22664 // a veneer pool should be emitted. We generate two branches that go out of 22665 // range at the same offset. When the MacroAssembler decides to emit the 22666 // veneer pool, the emission of a first veneer should not cause the other 22667 // branch to go out of range. 22668 22669 int range_cbz = Instruction::GetImmBranchForwardRange(CompareBranchType); 22670 int range_tbz = Instruction::GetImmBranchForwardRange(TestBranchType); 22671 int max_target = static_cast<int>(masm.GetCursorOffset()) + range_cbz; 22672 22673 Label done; 22674 22675 // We use different labels to prevent the MacroAssembler from sharing veneers. 22676 Label target_cbz, target_tbz; 22677 22678 __ Cbz(x0, &target_cbz); 22679 while (masm.GetCursorOffset() < max_target - range_tbz) { 22680 __ Nop(); 22681 } 22682 __ Tbz(x0, 0, &target_tbz); 22683 while (masm.GetCursorOffset() < max_target) { 22684 __ Nop(); 22685 } 22686 22687 // This additional nop makes the branches go out of range. 22688 __ Nop(); 22689 22690 __ Bind(&target_cbz); 22691 __ Bind(&target_tbz); 22692 22693 END(); 22694 TEARDOWN(); 22695 } 22696 22697 22698 TEST(veneers_hanging) { 22699 SETUP(); 22700 START(); 22701 22702 // This is a code generation test. The code generated is not executed. 22703 // Ensure that the MacroAssembler considers unresolved branches to chose when 22704 // a veneer pool should be emitted. This is similar to the 22705 // 'veneers_two_out_of_range' test. We try to trigger the following situation: 22706 // b.eq label 22707 // b.eq label 22708 // ... 22709 // nop 22710 // ... 22711 // cbz x0, label 22712 // cbz x0, label 22713 // ... 22714 // tbz x0, 0 label 22715 // nop 22716 // ... 22717 // nop <- From here the `b.eq` and `cbz` instructions run out of range, 22718 // so a literal pool is required. 22719 // veneer 22720 // veneer 22721 // veneer <- The `tbz` runs out of range somewhere in the middle of the 22722 // veneer veneer pool. 22723 // veneer 22724 22725 const int range_bcond = Instruction::GetImmBranchForwardRange(CondBranchType); 22726 const int range_cbz = 22727 Instruction::GetImmBranchForwardRange(CompareBranchType); 22728 const int range_tbz = Instruction::GetImmBranchForwardRange(TestBranchType); 22729 const int max_target = static_cast<int>(masm.GetCursorOffset()) + range_bcond; 22730 22731 Label done; 22732 const int n_bcond = 100; 22733 const int n_cbz = 100; 22734 const int n_tbz = 1; 22735 const int kNTotalBranches = n_bcond + n_cbz + n_tbz; 22736 22737 // We use different labels to prevent the MacroAssembler from sharing veneers. 22738 Label labels[kNTotalBranches]; 22739 for (int i = 0; i < kNTotalBranches; i++) { 22740 new (&labels[i]) Label(); 22741 } 22742 22743 for (int i = 0; i < n_bcond; i++) { 22744 __ B(eq, &labels[i]); 22745 } 22746 22747 while (masm.GetCursorOffset() < max_target - range_cbz) { 22748 __ Nop(); 22749 } 22750 22751 for (int i = 0; i < n_cbz; i++) { 22752 __ Cbz(x0, &labels[n_bcond + i]); 22753 } 22754 22755 // Ensure the 'tbz' will go out of range after some of the previously 22756 // generated branches. 22757 int margin = (n_bcond / 2) * kInstructionSize; 22758 while (masm.GetCursorOffset() < max_target - range_tbz + margin) { 22759 __ Nop(); 22760 } 22761 22762 __ Tbz(x0, 0, &labels[n_bcond + n_cbz]); 22763 22764 while (masm.GetCursorOffset() < max_target) { 22765 __ Nop(); 22766 } 22767 22768 // This additional nop makes the 'b.eq' and 'cbz' instructions go out of range 22769 // and forces the emission of a veneer pool. The 'tbz' is not yet out of 22770 // range, but will go out of range while veneers are emitted for the other 22771 // branches. 22772 // The MacroAssembler should ensure that veneers are correctly emitted for all 22773 // the branches, including the 'tbz'. Checks will fail if the target of a 22774 // branch is out of range. 22775 __ Nop(); 22776 22777 for (int i = 0; i < kNTotalBranches; i++) { 22778 __ Bind(&labels[i]); 22779 } 22780 22781 END(); 22782 TEARDOWN(); 22783 } 22784 22785 22786 TEST(collision_literal_veneer_pools) { 22787 SETUP(); 22788 START(); 22789 22790 // This is a code generation test. The code generated is not executed. 22791 22792 // Make sure the literal pool is empty; 22793 masm.EmitLiteralPool(LiteralPool::kBranchRequired); 22794 ASSERT_LITERAL_POOL_SIZE(0); 22795 22796 // We chose the offsets below to (try to) trigger the following situation: 22797 // buffer offset 22798 // 0: tbz x0, 0, target_tbz ----------------------------------. 22799 // 4: nop | 22800 // ... | 22801 // nop | 22802 // literal gen: ldr s0, [pc + ...] ; load from `pool start + 0` | 22803 // ldr s0, [pc + ...] ; load from `pool start + 4` | 22804 // ... | 22805 // ldr s0, [pc + ...] | 22806 // pool start: floating-point literal (0.1) | 22807 // floating-point literal (1.1) | 22808 // ... | 22809 // floating-point literal (<n>.1) <-----tbz-max-range--' 22810 // floating-point literal (<n+1>.1) 22811 // ... 22812 22813 const int range_tbz = Instruction::GetImmBranchForwardRange(TestBranchType); 22814 const int max_target = static_cast<int>(masm.GetCursorOffset()) + range_tbz; 22815 22816 const size_t target_literal_pool_size = 100 * kInstructionSize; 22817 const int offset_start_literal_gen = 22818 target_literal_pool_size + target_literal_pool_size / 2; 22819 22820 22821 Label target_tbz; 22822 22823 __ Tbz(x0, 0, &target_tbz); 22824 VIXL_CHECK(masm.GetNumberOfPotentialVeneers() == 1); 22825 while (masm.GetCursorOffset() < max_target - offset_start_literal_gen) { 22826 __ Nop(); 22827 } 22828 VIXL_CHECK(masm.GetNumberOfPotentialVeneers() == 1); 22829 22830 for (int i = 0; i < 100; i++) { 22831 // Use a different value to force one literal pool entry per iteration. 22832 __ Ldr(s0, i + 0.1); 22833 } 22834 VIXL_CHECK(masm.GetLiteralPoolSize() >= target_literal_pool_size); 22835 22836 // Force emission of a literal pool. 22837 masm.EmitLiteralPool(LiteralPool::kBranchRequired); 22838 ASSERT_LITERAL_POOL_SIZE(0); 22839 22840 // The branch should not have gone out of range during the emission of the 22841 // literal pool. 22842 __ Bind(&target_tbz); 22843 22844 VIXL_CHECK(masm.GetNumberOfPotentialVeneers() == 0); 22845 22846 END(); 22847 TEARDOWN(); 22848 } 22849 22850 22851 TEST(ldr_literal_explicit) { 22852 SETUP(); 22853 22854 START(); 22855 Literal<int64_t> automatically_placed_literal(1, masm.GetLiteralPool()); 22856 Literal<int64_t> manually_placed_literal(2); 22857 { 22858 ExactAssemblyScope scope(&masm, kInstructionSize + sizeof(int64_t)); 22859 Label over_literal; 22860 __ b(&over_literal); 22861 __ place(&manually_placed_literal); 22862 __ bind(&over_literal); 22863 } 22864 __ Ldr(x1, &manually_placed_literal); 22865 __ Ldr(x2, &automatically_placed_literal); 22866 __ Add(x0, x1, x2); 22867 END(); 22868 22869 RUN(); 22870 22871 ASSERT_EQUAL_64(3, x0); 22872 22873 TEARDOWN(); 22874 } 22875 22876 22877 TEST(ldr_literal_automatically_placed) { 22878 SETUP(); 22879 22880 START(); 22881 22882 // We start with an empty literal pool. 22883 ASSERT_LITERAL_POOL_SIZE(0); 22884 22885 // Create a literal that should be placed by the literal pool. 22886 Literal<int64_t> explicit_literal(2, masm.GetLiteralPool()); 22887 // It should not appear in the literal pool until its first use. 22888 ASSERT_LITERAL_POOL_SIZE(0); 22889 22890 // Check that using standard literals does not break the use of explicitly 22891 // created literals. 22892 __ Ldr(d1, 1.1); 22893 ASSERT_LITERAL_POOL_SIZE(8); 22894 masm.EmitLiteralPool(LiteralPool::kBranchRequired); 22895 ASSERT_LITERAL_POOL_SIZE(0); 22896 22897 __ Ldr(x2, &explicit_literal); 22898 ASSERT_LITERAL_POOL_SIZE(8); 22899 masm.EmitLiteralPool(LiteralPool::kBranchRequired); 22900 ASSERT_LITERAL_POOL_SIZE(0); 22901 22902 __ Ldr(d3, 3.3); 22903 ASSERT_LITERAL_POOL_SIZE(8); 22904 masm.EmitLiteralPool(LiteralPool::kBranchRequired); 22905 ASSERT_LITERAL_POOL_SIZE(0); 22906 22907 // Re-use our explicitly created literal. It has already been placed, so it 22908 // should not impact the literal pool. 22909 __ Ldr(x4, &explicit_literal); 22910 ASSERT_LITERAL_POOL_SIZE(0); 22911 22912 END(); 22913 22914 RUN(); 22915 22916 ASSERT_EQUAL_FP64(1.1, d1); 22917 ASSERT_EQUAL_64(2, x2); 22918 ASSERT_EQUAL_FP64(3.3, d3); 22919 ASSERT_EQUAL_64(2, x4); 22920 22921 TEARDOWN(); 22922 } 22923 22924 22925 TEST(literal_update_overwrite) { 22926 SETUP(); 22927 22928 START(); 22929 22930 ASSERT_LITERAL_POOL_SIZE(0); 22931 LiteralPool* literal_pool = masm.GetLiteralPool(); 22932 22933 Literal<int32_t> lit_32_update_before_pool(0xbad, literal_pool); 22934 Literal<int32_t> lit_32_update_after_pool(0xbad, literal_pool); 22935 Literal<int64_t> lit_64_update_before_pool(0xbad, literal_pool); 22936 Literal<int64_t> lit_64_update_after_pool(0xbad, literal_pool); 22937 22938 ASSERT_LITERAL_POOL_SIZE(0); 22939 22940 lit_32_update_before_pool.UpdateValue(32); 22941 lit_64_update_before_pool.UpdateValue(64); 22942 22943 __ Ldr(w1, &lit_32_update_before_pool); 22944 __ Ldr(x2, &lit_64_update_before_pool); 22945 __ Ldr(w3, &lit_32_update_after_pool); 22946 __ Ldr(x4, &lit_64_update_after_pool); 22947 22948 masm.EmitLiteralPool(LiteralPool::kBranchRequired); 22949 22950 VIXL_ASSERT(lit_32_update_after_pool.IsPlaced()); 22951 VIXL_ASSERT(lit_64_update_after_pool.IsPlaced()); 22952 lit_32_update_after_pool.UpdateValue(128, &masm); 22953 lit_64_update_after_pool.UpdateValue(256, &masm); 22954 22955 END(); 22956 22957 RUN(); 22958 22959 ASSERT_EQUAL_64(32, x1); 22960 ASSERT_EQUAL_64(64, x2); 22961 ASSERT_EQUAL_64(128, x3); 22962 ASSERT_EQUAL_64(256, x4); 22963 22964 TEARDOWN(); 22965 } 22966 22967 22968 TEST(literal_deletion_policies) { 22969 SETUP(); 22970 22971 START(); 22972 22973 // We cannot check exactly when the deletion of the literals occur, but we 22974 // check that usage of the deletion policies is not broken. 22975 22976 ASSERT_LITERAL_POOL_SIZE(0); 22977 LiteralPool* literal_pool = masm.GetLiteralPool(); 22978 22979 Literal<int32_t> lit_manual(0xbad, literal_pool); 22980 Literal<int32_t>* lit_deleted_on_placement = 22981 new Literal<int32_t>(0xbad, 22982 literal_pool, 22983 RawLiteral::kDeletedOnPlacementByPool); 22984 Literal<int32_t>* lit_deleted_on_pool_destruction = 22985 new Literal<int32_t>(0xbad, 22986 literal_pool, 22987 RawLiteral::kDeletedOnPoolDestruction); 22988 22989 ASSERT_LITERAL_POOL_SIZE(0); 22990 22991 lit_manual.UpdateValue(32); 22992 lit_deleted_on_placement->UpdateValue(64); 22993 22994 __ Ldr(w1, &lit_manual); 22995 __ Ldr(w2, lit_deleted_on_placement); 22996 __ Ldr(w3, lit_deleted_on_pool_destruction); 22997 22998 masm.EmitLiteralPool(LiteralPool::kBranchRequired); 22999 23000 VIXL_ASSERT(lit_manual.IsPlaced()); 23001 VIXL_ASSERT(lit_deleted_on_pool_destruction->IsPlaced()); 23002 lit_deleted_on_pool_destruction->UpdateValue(128, &masm); 23003 23004 END(); 23005 23006 RUN(); 23007 23008 ASSERT_EQUAL_64(32, x1); 23009 ASSERT_EQUAL_64(64, x2); 23010 ASSERT_EQUAL_64(128, x3); 23011 23012 TEARDOWN(); 23013 } 23014 23015 23016 TEST(move_immediate_helpers) { 23017 // Using these helpers to query information (without generating code) should 23018 // not crash. 23019 MacroAssembler::MoveImmediateHelper(NULL, x0, 0x12345678); 23020 MacroAssembler::OneInstrMoveImmediateHelper(NULL, x1, 0xabcdef); 23021 } 23022 23023 23024 TEST(generic_operand_helpers) { 23025 GenericOperand invalid_1; 23026 GenericOperand invalid_2; 23027 GenericOperand reg(x3); 23028 GenericOperand mem(MemOperand(sp, 8), kXRegSizeInBytes); 23029 23030 VIXL_CHECK(!invalid_1.IsValid()); 23031 VIXL_CHECK(!invalid_2.IsValid()); 23032 23033 VIXL_CHECK(invalid_1.Equals(invalid_1)); 23034 VIXL_CHECK(invalid_2.Equals(invalid_2)); 23035 VIXL_CHECK(reg.Equals(reg)); 23036 VIXL_CHECK(mem.Equals(mem)); 23037 23038 VIXL_CHECK(invalid_1.Equals(invalid_2)); 23039 VIXL_CHECK(invalid_2.Equals(invalid_1)); 23040 23041 VIXL_CHECK(!invalid_1.Equals(reg)); 23042 VIXL_CHECK(!invalid_1.Equals(mem)); 23043 VIXL_CHECK(!reg.Equals(invalid_1)); 23044 VIXL_CHECK(!reg.Equals(invalid_2)); 23045 VIXL_CHECK(!reg.Equals(mem)); 23046 VIXL_CHECK(!mem.Equals(invalid_1)); 23047 VIXL_CHECK(!mem.Equals(reg)); 23048 } 23049 23050 23051 TEST(generic_operand) { 23052 SETUP(); 23053 23054 int32_t data_32_array[5] = {0xbadbeef, 23055 0x11111111, 23056 0xbadbeef, 23057 0x33333333, 23058 0xbadbeef}; 23059 int64_t data_64_array[5] = {INT64_C(0xbadbadbadbeef), 23060 INT64_C(0x1111111111111111), 23061 INT64_C(0xbadbadbadbeef), 23062 INT64_C(0x3333333333333333), 23063 INT64_C(0xbadbadbadbeef)}; 23064 size_t size_32 = sizeof(data_32_array[0]); 23065 size_t size_64 = sizeof(data_64_array[0]); 23066 23067 START(); 23068 23069 intptr_t data_32_address = reinterpret_cast<intptr_t>(&data_32_array[0]); 23070 intptr_t data_64_address = reinterpret_cast<intptr_t>(&data_64_array[0]); 23071 Register data_32 = x27; 23072 Register data_64 = x28; 23073 __ Mov(data_32, data_32_address); 23074 __ Mov(data_64, data_64_address); 23075 23076 __ Move(GenericOperand(w0), 23077 GenericOperand(MemOperand(data_32, 1 * size_32), size_32)); 23078 __ Move(GenericOperand(s0), 23079 GenericOperand(MemOperand(data_32, 3 * size_32), size_32)); 23080 __ Move(GenericOperand(x10), 23081 GenericOperand(MemOperand(data_64, 1 * size_64), size_64)); 23082 __ Move(GenericOperand(d10), 23083 GenericOperand(MemOperand(data_64, 3 * size_64), size_64)); 23084 23085 __ Move(GenericOperand(w1), GenericOperand(w0)); 23086 __ Move(GenericOperand(s1), GenericOperand(s0)); 23087 __ Move(GenericOperand(x11), GenericOperand(x10)); 23088 __ Move(GenericOperand(d11), GenericOperand(d10)); 23089 23090 __ Move(GenericOperand(MemOperand(data_32, 0 * size_32), size_32), 23091 GenericOperand(w1)); 23092 __ Move(GenericOperand(MemOperand(data_32, 2 * size_32), size_32), 23093 GenericOperand(s1)); 23094 __ Move(GenericOperand(MemOperand(data_64, 0 * size_64), size_64), 23095 GenericOperand(x11)); 23096 __ Move(GenericOperand(MemOperand(data_64, 2 * size_64), size_64), 23097 GenericOperand(d11)); 23098 23099 __ Move(GenericOperand(MemOperand(data_32, 4 * size_32), size_32), 23100 GenericOperand(MemOperand(data_32, 0 * size_32), size_32)); 23101 __ Move(GenericOperand(MemOperand(data_64, 4 * size_64), size_64), 23102 GenericOperand(MemOperand(data_64, 0 * size_64), size_64)); 23103 END(); 23104 23105 RUN(); 23106 23107 ASSERT_EQUAL_64(data_32_address, data_32); 23108 ASSERT_EQUAL_64(data_64_address, data_64); 23109 23110 ASSERT_EQUAL_32(0x11111111, w0); 23111 ASSERT_EQUAL_32(0x33333333, core.sreg_bits(0)); 23112 ASSERT_EQUAL_64(INT64_C(0x1111111111111111), x10); 23113 ASSERT_EQUAL_64(INT64_C(0x3333333333333333), core.dreg_bits(10)); 23114 23115 ASSERT_EQUAL_32(0x11111111, w1); 23116 ASSERT_EQUAL_32(0x33333333, core.sreg_bits(1)); 23117 ASSERT_EQUAL_64(INT64_C(0x1111111111111111), x11); 23118 ASSERT_EQUAL_64(INT64_C(0x3333333333333333), core.dreg_bits(11)); 23119 23120 VIXL_CHECK(data_32_array[0] == 0x11111111); 23121 VIXL_CHECK(data_32_array[1] == 0x11111111); 23122 VIXL_CHECK(data_32_array[2] == 0x33333333); 23123 VIXL_CHECK(data_32_array[3] == 0x33333333); 23124 VIXL_CHECK(data_32_array[4] == 0x11111111); 23125 23126 VIXL_CHECK(data_64_array[0] == INT64_C(0x1111111111111111)); 23127 VIXL_CHECK(data_64_array[1] == INT64_C(0x1111111111111111)); 23128 VIXL_CHECK(data_64_array[2] == INT64_C(0x3333333333333333)); 23129 VIXL_CHECK(data_64_array[3] == INT64_C(0x3333333333333333)); 23130 VIXL_CHECK(data_64_array[4] == INT64_C(0x1111111111111111)); 23131 23132 TEARDOWN(); 23133 } 23134 23135 23136 int32_t runtime_call_add_one(int32_t a) { return a + 1; } 23137 23138 double runtime_call_add_doubles(double a, double b, double c) { 23139 return a + b + c; 23140 } 23141 23142 int64_t runtime_call_one_argument_on_stack(int64_t arg1 __attribute__((unused)), 23143 int64_t arg2 __attribute__((unused)), 23144 int64_t arg3 __attribute__((unused)), 23145 int64_t arg4 __attribute__((unused)), 23146 int64_t arg5 __attribute__((unused)), 23147 int64_t arg6 __attribute__((unused)), 23148 int64_t arg7 __attribute__((unused)), 23149 int64_t arg8 __attribute__((unused)), 23150 int64_t arg9) { 23151 return arg9; 23152 } 23153 23154 double runtime_call_two_arguments_on_stack(int64_t arg1 __attribute__((unused)), 23155 int64_t arg2 __attribute__((unused)), 23156 int64_t arg3 __attribute__((unused)), 23157 int64_t arg4 __attribute__((unused)), 23158 int64_t arg5 __attribute__((unused)), 23159 int64_t arg6 __attribute__((unused)), 23160 int64_t arg7 __attribute__((unused)), 23161 int64_t arg8 __attribute__((unused)), 23162 double arg9, 23163 double arg10) { 23164 return arg9 - arg10; 23165 } 23166 23167 void runtime_call_store_at_address(int64_t* address) { *address = 0xf00d; } 23168 23169 // Test feature detection of calls to runtime functions. 23170 23171 // C++11 should be sufficient to provide simulated runtime calls, except for a 23172 // GCC bug before 4.9.1. 23173 #if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) && (__cplusplus >= 201103L) && \ 23174 (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1)) && \ 23175 !defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) 23176 #error \ 23177 "C++11 should be sufficient to provide support for simulated runtime calls." 23178 #endif // #if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) && ... 23179 23180 #if (__cplusplus >= 201103L) && \ 23181 !defined(VIXL_HAS_MACROASSEMBLER_RUNTIME_CALL_SUPPORT) 23182 #error \ 23183 "C++11 should be sufficient to provide support for `MacroAssembler::CallRuntime()`." 23184 #endif // #if (__cplusplus >= 201103L) && ... 23185 23186 #ifdef VIXL_HAS_MACROASSEMBLER_RUNTIME_CALL_SUPPORT 23187 TEST(runtime_calls) { 23188 SETUP(); 23189 23190 #ifndef VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT 23191 if (masm.GenerateSimulatorCode()) { 23192 // This configuration is unsupported and a `VIXL_UNREACHABLE()` would fire 23193 // while trying to generate `CallRuntime`. This configuration should only be 23194 // reachable with C++11 and a (buggy) version of GCC pre-4.9.1. 23195 TEARDOWN(); 23196 return; 23197 } 23198 #endif 23199 23200 START(); 23201 __ Mov(w0, 0); 23202 __ CallRuntime(runtime_call_add_one); 23203 __ Mov(w20, w0); 23204 23205 __ Fmov(d0, 0.0); 23206 __ Fmov(d1, 1.5); 23207 __ Fmov(d2, 2.5); 23208 __ CallRuntime(runtime_call_add_doubles); 23209 __ Fmov(d20, d0); 23210 23211 __ Mov(x0, 0x123); 23212 __ Push(x0, x0); 23213 __ CallRuntime(runtime_call_one_argument_on_stack); 23214 __ Mov(x21, x0); 23215 __ Pop(x0, x1); 23216 23217 __ Fmov(d0, 314.0); 23218 __ Fmov(d1, 4.0); 23219 __ Push(d1, d0); 23220 __ CallRuntime(runtime_call_two_arguments_on_stack); 23221 __ Fmov(d21, d0); 23222 __ Pop(d1, d0); 23223 23224 int64_t value = 0xbadbeef; 23225 __ Mov(x0, reinterpret_cast<uint64_t>(&value)); 23226 __ CallRuntime(runtime_call_store_at_address); 23227 23228 END(); 23229 23230 #if defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) || \ 23231 !defined(VIXL_INCLUDE_SIMULATOR_AARCH64) 23232 RUN(); 23233 23234 ASSERT_EQUAL_32(1, w20); 23235 ASSERT_EQUAL_FP64(4.0, d20); 23236 ASSERT_EQUAL_64(0x123, x21); 23237 ASSERT_EQUAL_FP64(310.0, d21); 23238 VIXL_CHECK(value == 0xf00d); 23239 #endif // #if defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) || ... 23240 23241 TEARDOWN(); 23242 } 23243 #endif // #ifdef VIXL_HAS_MACROASSEMBLER_RUNTIME_CALL_SUPPORT 23244 23245 23246 TEST(optimised_mov_register) { 23247 SETUP(); 23248 23249 START(); 23250 Label start; 23251 __ Bind(&start); 23252 __ Mov(x0, x0); 23253 VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&start) == 0); 23254 __ Mov(w0, w0, kDiscardForSameWReg); 23255 VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&start) == 0); 23256 __ Mov(w0, w0); 23257 VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&start) == kInstructionSize); 23258 23259 END(); 23260 23261 RUN(); 23262 23263 TEARDOWN(); 23264 } 23265 23266 23267 TEST(nop) { 23268 MacroAssembler masm; 23269 23270 Label start; 23271 __ Bind(&start); 23272 __ Nop(); 23273 // `MacroAssembler::Nop` must generate at least one nop. 23274 VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&start) >= kInstructionSize); 23275 23276 masm.FinalizeCode(); 23277 } 23278 23279 TEST(scratch_scope_basic_v) { 23280 MacroAssembler masm; 23281 23282 { 23283 UseScratchRegisterScope temps(&masm); 23284 VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize); 23285 VIXL_CHECK(temp.Aliases(v31)); 23286 } 23287 { 23288 UseScratchRegisterScope temps(&masm); 23289 VRegister temp = temps.AcquireVRegisterOfSize(kDRegSize); 23290 VIXL_CHECK(temp.Aliases(v31)); 23291 } 23292 { 23293 UseScratchRegisterScope temps(&masm); 23294 VRegister temp = temps.AcquireVRegisterOfSize(kSRegSize); 23295 VIXL_CHECK(temp.Aliases(v31)); 23296 } 23297 } 23298 23299 TEST(static_register_types) { 23300 SETUP(); 23301 START(); 23302 23303 // [WX]Register implicitly casts to Register. 23304 XRegister x_x0(0); 23305 WRegister w_w0(0); 23306 Register r_x0 = x_x0; 23307 Register r_w0 = w_w0; 23308 VIXL_CHECK(r_x0.Is(x_x0)); 23309 VIXL_CHECK(x_x0.Is(r_x0)); 23310 VIXL_CHECK(r_w0.Is(w_w0)); 23311 VIXL_CHECK(w_w0.Is(r_w0)); 23312 23313 // Register explicitly casts to [WX]Register. 23314 Register r_x1(1, kXRegSize); 23315 Register r_w1(1, kWRegSize); 23316 XRegister x_x1(r_x1); 23317 WRegister w_w1(r_w1); 23318 VIXL_CHECK(r_x1.Is(x_x1)); 23319 VIXL_CHECK(x_x1.Is(r_x1)); 23320 VIXL_CHECK(r_w1.Is(w_w1)); 23321 VIXL_CHECK(w_w1.Is(r_w1)); 23322 23323 // [WX]Register implicitly casts to CPURegister. 23324 XRegister x_x2(2); 23325 WRegister w_w2(2); 23326 CPURegister cpu_x2 = x_x2; 23327 CPURegister cpu_w2 = w_w2; 23328 VIXL_CHECK(cpu_x2.Is(x_x2)); 23329 VIXL_CHECK(x_x2.Is(cpu_x2)); 23330 VIXL_CHECK(cpu_w2.Is(w_w2)); 23331 VIXL_CHECK(w_w2.Is(cpu_w2)); 23332 23333 END(); 23334 TEARDOWN(); 23335 } 23336 23337 23338 } // namespace aarch64 23339 } // namespace vixl 23340