1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "code_generator_arm64.h" 18 19 #include "arch/arm64/instruction_set_features_arm64.h" 20 #include "mirror/array-inl.h" 21 #include "mirror/string.h" 22 23 using namespace vixl::aarch64; // NOLINT(build/namespaces) 24 25 namespace art { 26 namespace arm64 { 27 28 using helpers::ARM64EncodableConstantOrRegister; 29 using helpers::Arm64CanEncodeConstantAsImmediate; 30 using helpers::DRegisterFrom; 31 using helpers::HeapOperand; 32 using helpers::InputRegisterAt; 33 using helpers::Int64FromLocation; 34 using helpers::OutputRegister; 35 using helpers::VRegisterFrom; 36 using helpers::WRegisterFrom; 37 using helpers::XRegisterFrom; 38 39 #define __ GetVIXLAssembler()-> 40 41 // Build-time switch for Armv8.4-a dot product instructions. 42 // TODO: Enable dot product when there is a device to test it on. 43 static constexpr bool kArm64EmitDotProdInstructions = false; 44 45 // Returns whether dot product instructions should be emitted. 46 static bool ShouldEmitDotProductInstructions(const CodeGeneratorARM64* codegen_) { 47 return kArm64EmitDotProdInstructions && codegen_->GetInstructionSetFeatures().HasDotProd(); 48 } 49 50 void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { 51 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 52 HInstruction* input = instruction->InputAt(0); 53 switch (instruction->GetPackedType()) { 54 case DataType::Type::kBool: 55 case DataType::Type::kUint8: 56 case DataType::Type::kInt8: 57 case DataType::Type::kUint16: 58 case DataType::Type::kInt16: 59 case DataType::Type::kInt32: 60 case DataType::Type::kInt64: 61 locations->SetInAt(0, ARM64EncodableConstantOrRegister(input, instruction)); 62 locations->SetOut(Location::RequiresFpuRegister()); 63 break; 64 case DataType::Type::kFloat32: 65 case DataType::Type::kFloat64: 66 if (input->IsConstant() && 67 Arm64CanEncodeConstantAsImmediate(input->AsConstant(), instruction)) { 68 locations->SetInAt(0, Location::ConstantLocation(input->AsConstant())); 69 locations->SetOut(Location::RequiresFpuRegister()); 70 } else { 71 locations->SetInAt(0, Location::RequiresFpuRegister()); 72 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 73 } 74 break; 75 default: 76 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 77 UNREACHABLE(); 78 } 79 } 80 81 void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { 82 LocationSummary* locations = instruction->GetLocations(); 83 Location src_loc = locations->InAt(0); 84 VRegister dst = VRegisterFrom(locations->Out()); 85 switch (instruction->GetPackedType()) { 86 case DataType::Type::kBool: 87 case DataType::Type::kUint8: 88 case DataType::Type::kInt8: 89 DCHECK_EQ(16u, instruction->GetVectorLength()); 90 if (src_loc.IsConstant()) { 91 __ Movi(dst.V16B(), Int64FromLocation(src_loc)); 92 } else { 93 __ Dup(dst.V16B(), InputRegisterAt(instruction, 0)); 94 } 95 break; 96 case DataType::Type::kUint16: 97 case DataType::Type::kInt16: 98 DCHECK_EQ(8u, instruction->GetVectorLength()); 99 if (src_loc.IsConstant()) { 100 __ Movi(dst.V8H(), Int64FromLocation(src_loc)); 101 } else { 102 __ Dup(dst.V8H(), InputRegisterAt(instruction, 0)); 103 } 104 break; 105 case DataType::Type::kInt32: 106 DCHECK_EQ(4u, instruction->GetVectorLength()); 107 if (src_loc.IsConstant()) { 108 __ Movi(dst.V4S(), Int64FromLocation(src_loc)); 109 } else { 110 __ Dup(dst.V4S(), InputRegisterAt(instruction, 0)); 111 } 112 break; 113 case DataType::Type::kInt64: 114 DCHECK_EQ(2u, instruction->GetVectorLength()); 115 if (src_loc.IsConstant()) { 116 __ Movi(dst.V2D(), Int64FromLocation(src_loc)); 117 } else { 118 __ Dup(dst.V2D(), XRegisterFrom(src_loc)); 119 } 120 break; 121 case DataType::Type::kFloat32: 122 DCHECK_EQ(4u, instruction->GetVectorLength()); 123 if (src_loc.IsConstant()) { 124 __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue()); 125 } else { 126 __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0); 127 } 128 break; 129 case DataType::Type::kFloat64: 130 DCHECK_EQ(2u, instruction->GetVectorLength()); 131 if (src_loc.IsConstant()) { 132 __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue()); 133 } else { 134 __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0); 135 } 136 break; 137 default: 138 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 139 UNREACHABLE(); 140 } 141 } 142 143 void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) { 144 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 145 switch (instruction->GetPackedType()) { 146 case DataType::Type::kBool: 147 case DataType::Type::kUint8: 148 case DataType::Type::kInt8: 149 case DataType::Type::kUint16: 150 case DataType::Type::kInt16: 151 case DataType::Type::kInt32: 152 case DataType::Type::kInt64: 153 locations->SetInAt(0, Location::RequiresFpuRegister()); 154 locations->SetOut(Location::RequiresRegister()); 155 break; 156 case DataType::Type::kFloat32: 157 case DataType::Type::kFloat64: 158 locations->SetInAt(0, Location::RequiresFpuRegister()); 159 locations->SetOut(Location::SameAsFirstInput()); 160 break; 161 default: 162 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 163 UNREACHABLE(); 164 } 165 } 166 167 void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) { 168 LocationSummary* locations = instruction->GetLocations(); 169 VRegister src = VRegisterFrom(locations->InAt(0)); 170 switch (instruction->GetPackedType()) { 171 case DataType::Type::kInt32: 172 DCHECK_EQ(4u, instruction->GetVectorLength()); 173 __ Umov(OutputRegister(instruction), src.V4S(), 0); 174 break; 175 case DataType::Type::kInt64: 176 DCHECK_EQ(2u, instruction->GetVectorLength()); 177 __ Umov(OutputRegister(instruction), src.V2D(), 0); 178 break; 179 case DataType::Type::kFloat32: 180 case DataType::Type::kFloat64: 181 DCHECK_LE(2u, instruction->GetVectorLength()); 182 DCHECK_LE(instruction->GetVectorLength(), 4u); 183 DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required 184 break; 185 default: 186 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 187 UNREACHABLE(); 188 } 189 } 190 191 // Helper to set up locations for vector unary operations. 192 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) { 193 LocationSummary* locations = new (allocator) LocationSummary(instruction); 194 switch (instruction->GetPackedType()) { 195 case DataType::Type::kBool: 196 locations->SetInAt(0, Location::RequiresFpuRegister()); 197 locations->SetOut(Location::RequiresFpuRegister(), 198 instruction->IsVecNot() ? Location::kOutputOverlap 199 : Location::kNoOutputOverlap); 200 break; 201 case DataType::Type::kUint8: 202 case DataType::Type::kInt8: 203 case DataType::Type::kUint16: 204 case DataType::Type::kInt16: 205 case DataType::Type::kInt32: 206 case DataType::Type::kInt64: 207 case DataType::Type::kFloat32: 208 case DataType::Type::kFloat64: 209 locations->SetInAt(0, Location::RequiresFpuRegister()); 210 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 211 break; 212 default: 213 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 214 UNREACHABLE(); 215 } 216 } 217 218 void LocationsBuilderARM64::VisitVecReduce(HVecReduce* instruction) { 219 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); 220 } 221 222 void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) { 223 LocationSummary* locations = instruction->GetLocations(); 224 VRegister src = VRegisterFrom(locations->InAt(0)); 225 VRegister dst = DRegisterFrom(locations->Out()); 226 switch (instruction->GetPackedType()) { 227 case DataType::Type::kInt32: 228 DCHECK_EQ(4u, instruction->GetVectorLength()); 229 switch (instruction->GetReductionKind()) { 230 case HVecReduce::kSum: 231 __ Addv(dst.S(), src.V4S()); 232 break; 233 case HVecReduce::kMin: 234 __ Sminv(dst.S(), src.V4S()); 235 break; 236 case HVecReduce::kMax: 237 __ Smaxv(dst.S(), src.V4S()); 238 break; 239 } 240 break; 241 case DataType::Type::kInt64: 242 DCHECK_EQ(2u, instruction->GetVectorLength()); 243 switch (instruction->GetReductionKind()) { 244 case HVecReduce::kSum: 245 __ Addp(dst.D(), src.V2D()); 246 break; 247 default: 248 LOG(FATAL) << "Unsupported SIMD min/max"; 249 UNREACHABLE(); 250 } 251 break; 252 default: 253 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 254 UNREACHABLE(); 255 } 256 } 257 258 void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) { 259 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); 260 } 261 262 void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) { 263 LocationSummary* locations = instruction->GetLocations(); 264 VRegister src = VRegisterFrom(locations->InAt(0)); 265 VRegister dst = VRegisterFrom(locations->Out()); 266 DataType::Type from = instruction->GetInputType(); 267 DataType::Type to = instruction->GetResultType(); 268 if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) { 269 DCHECK_EQ(4u, instruction->GetVectorLength()); 270 __ Scvtf(dst.V4S(), src.V4S()); 271 } else { 272 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 273 } 274 } 275 276 void LocationsBuilderARM64::VisitVecNeg(HVecNeg* instruction) { 277 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); 278 } 279 280 void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) { 281 LocationSummary* locations = instruction->GetLocations(); 282 VRegister src = VRegisterFrom(locations->InAt(0)); 283 VRegister dst = VRegisterFrom(locations->Out()); 284 switch (instruction->GetPackedType()) { 285 case DataType::Type::kUint8: 286 case DataType::Type::kInt8: 287 DCHECK_EQ(16u, instruction->GetVectorLength()); 288 __ Neg(dst.V16B(), src.V16B()); 289 break; 290 case DataType::Type::kUint16: 291 case DataType::Type::kInt16: 292 DCHECK_EQ(8u, instruction->GetVectorLength()); 293 __ Neg(dst.V8H(), src.V8H()); 294 break; 295 case DataType::Type::kInt32: 296 DCHECK_EQ(4u, instruction->GetVectorLength()); 297 __ Neg(dst.V4S(), src.V4S()); 298 break; 299 case DataType::Type::kInt64: 300 DCHECK_EQ(2u, instruction->GetVectorLength()); 301 __ Neg(dst.V2D(), src.V2D()); 302 break; 303 case DataType::Type::kFloat32: 304 DCHECK_EQ(4u, instruction->GetVectorLength()); 305 __ Fneg(dst.V4S(), src.V4S()); 306 break; 307 case DataType::Type::kFloat64: 308 DCHECK_EQ(2u, instruction->GetVectorLength()); 309 __ Fneg(dst.V2D(), src.V2D()); 310 break; 311 default: 312 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 313 UNREACHABLE(); 314 } 315 } 316 317 void LocationsBuilderARM64::VisitVecAbs(HVecAbs* instruction) { 318 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); 319 } 320 321 void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) { 322 LocationSummary* locations = instruction->GetLocations(); 323 VRegister src = VRegisterFrom(locations->InAt(0)); 324 VRegister dst = VRegisterFrom(locations->Out()); 325 switch (instruction->GetPackedType()) { 326 case DataType::Type::kInt8: 327 DCHECK_EQ(16u, instruction->GetVectorLength()); 328 __ Abs(dst.V16B(), src.V16B()); 329 break; 330 case DataType::Type::kInt16: 331 DCHECK_EQ(8u, instruction->GetVectorLength()); 332 __ Abs(dst.V8H(), src.V8H()); 333 break; 334 case DataType::Type::kInt32: 335 DCHECK_EQ(4u, instruction->GetVectorLength()); 336 __ Abs(dst.V4S(), src.V4S()); 337 break; 338 case DataType::Type::kInt64: 339 DCHECK_EQ(2u, instruction->GetVectorLength()); 340 __ Abs(dst.V2D(), src.V2D()); 341 break; 342 case DataType::Type::kFloat32: 343 DCHECK_EQ(4u, instruction->GetVectorLength()); 344 __ Fabs(dst.V4S(), src.V4S()); 345 break; 346 case DataType::Type::kFloat64: 347 DCHECK_EQ(2u, instruction->GetVectorLength()); 348 __ Fabs(dst.V2D(), src.V2D()); 349 break; 350 default: 351 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 352 UNREACHABLE(); 353 } 354 } 355 356 void LocationsBuilderARM64::VisitVecNot(HVecNot* instruction) { 357 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); 358 } 359 360 void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) { 361 LocationSummary* locations = instruction->GetLocations(); 362 VRegister src = VRegisterFrom(locations->InAt(0)); 363 VRegister dst = VRegisterFrom(locations->Out()); 364 switch (instruction->GetPackedType()) { 365 case DataType::Type::kBool: // special case boolean-not 366 DCHECK_EQ(16u, instruction->GetVectorLength()); 367 __ Movi(dst.V16B(), 1); 368 __ Eor(dst.V16B(), dst.V16B(), src.V16B()); 369 break; 370 case DataType::Type::kUint8: 371 case DataType::Type::kInt8: 372 case DataType::Type::kUint16: 373 case DataType::Type::kInt16: 374 case DataType::Type::kInt32: 375 case DataType::Type::kInt64: 376 __ Not(dst.V16B(), src.V16B()); // lanes do not matter 377 break; 378 default: 379 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 380 UNREACHABLE(); 381 } 382 } 383 384 // Helper to set up locations for vector binary operations. 385 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { 386 LocationSummary* locations = new (allocator) LocationSummary(instruction); 387 switch (instruction->GetPackedType()) { 388 case DataType::Type::kBool: 389 case DataType::Type::kUint8: 390 case DataType::Type::kInt8: 391 case DataType::Type::kUint16: 392 case DataType::Type::kInt16: 393 case DataType::Type::kInt32: 394 case DataType::Type::kInt64: 395 case DataType::Type::kFloat32: 396 case DataType::Type::kFloat64: 397 locations->SetInAt(0, Location::RequiresFpuRegister()); 398 locations->SetInAt(1, Location::RequiresFpuRegister()); 399 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 400 break; 401 default: 402 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 403 UNREACHABLE(); 404 } 405 } 406 407 void LocationsBuilderARM64::VisitVecAdd(HVecAdd* instruction) { 408 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 409 } 410 411 void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) { 412 LocationSummary* locations = instruction->GetLocations(); 413 VRegister lhs = VRegisterFrom(locations->InAt(0)); 414 VRegister rhs = VRegisterFrom(locations->InAt(1)); 415 VRegister dst = VRegisterFrom(locations->Out()); 416 switch (instruction->GetPackedType()) { 417 case DataType::Type::kUint8: 418 case DataType::Type::kInt8: 419 DCHECK_EQ(16u, instruction->GetVectorLength()); 420 __ Add(dst.V16B(), lhs.V16B(), rhs.V16B()); 421 break; 422 case DataType::Type::kUint16: 423 case DataType::Type::kInt16: 424 DCHECK_EQ(8u, instruction->GetVectorLength()); 425 __ Add(dst.V8H(), lhs.V8H(), rhs.V8H()); 426 break; 427 case DataType::Type::kInt32: 428 DCHECK_EQ(4u, instruction->GetVectorLength()); 429 __ Add(dst.V4S(), lhs.V4S(), rhs.V4S()); 430 break; 431 case DataType::Type::kInt64: 432 DCHECK_EQ(2u, instruction->GetVectorLength()); 433 __ Add(dst.V2D(), lhs.V2D(), rhs.V2D()); 434 break; 435 case DataType::Type::kFloat32: 436 DCHECK_EQ(4u, instruction->GetVectorLength()); 437 __ Fadd(dst.V4S(), lhs.V4S(), rhs.V4S()); 438 break; 439 case DataType::Type::kFloat64: 440 DCHECK_EQ(2u, instruction->GetVectorLength()); 441 __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D()); 442 break; 443 default: 444 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 445 UNREACHABLE(); 446 } 447 } 448 449 void LocationsBuilderARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { 450 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 451 } 452 453 void InstructionCodeGeneratorARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { 454 LocationSummary* locations = instruction->GetLocations(); 455 VRegister lhs = VRegisterFrom(locations->InAt(0)); 456 VRegister rhs = VRegisterFrom(locations->InAt(1)); 457 VRegister dst = VRegisterFrom(locations->Out()); 458 switch (instruction->GetPackedType()) { 459 case DataType::Type::kUint8: 460 DCHECK_EQ(16u, instruction->GetVectorLength()); 461 __ Uqadd(dst.V16B(), lhs.V16B(), rhs.V16B()); 462 break; 463 case DataType::Type::kInt8: 464 DCHECK_EQ(16u, instruction->GetVectorLength()); 465 __ Sqadd(dst.V16B(), lhs.V16B(), rhs.V16B()); 466 break; 467 case DataType::Type::kUint16: 468 DCHECK_EQ(8u, instruction->GetVectorLength()); 469 __ Uqadd(dst.V8H(), lhs.V8H(), rhs.V8H()); 470 break; 471 case DataType::Type::kInt16: 472 DCHECK_EQ(8u, instruction->GetVectorLength()); 473 __ Sqadd(dst.V8H(), lhs.V8H(), rhs.V8H()); 474 break; 475 default: 476 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 477 UNREACHABLE(); 478 } 479 } 480 481 void LocationsBuilderARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { 482 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 483 } 484 485 void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { 486 LocationSummary* locations = instruction->GetLocations(); 487 VRegister lhs = VRegisterFrom(locations->InAt(0)); 488 VRegister rhs = VRegisterFrom(locations->InAt(1)); 489 VRegister dst = VRegisterFrom(locations->Out()); 490 switch (instruction->GetPackedType()) { 491 case DataType::Type::kUint8: 492 DCHECK_EQ(16u, instruction->GetVectorLength()); 493 instruction->IsRounded() 494 ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B()) 495 : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B()); 496 break; 497 case DataType::Type::kInt8: 498 DCHECK_EQ(16u, instruction->GetVectorLength()); 499 instruction->IsRounded() 500 ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B()) 501 : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B()); 502 break; 503 case DataType::Type::kUint16: 504 DCHECK_EQ(8u, instruction->GetVectorLength()); 505 instruction->IsRounded() 506 ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H()) 507 : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H()); 508 break; 509 case DataType::Type::kInt16: 510 DCHECK_EQ(8u, instruction->GetVectorLength()); 511 instruction->IsRounded() 512 ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H()) 513 : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H()); 514 break; 515 default: 516 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 517 UNREACHABLE(); 518 } 519 } 520 521 void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) { 522 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 523 } 524 525 void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) { 526 LocationSummary* locations = instruction->GetLocations(); 527 VRegister lhs = VRegisterFrom(locations->InAt(0)); 528 VRegister rhs = VRegisterFrom(locations->InAt(1)); 529 VRegister dst = VRegisterFrom(locations->Out()); 530 switch (instruction->GetPackedType()) { 531 case DataType::Type::kUint8: 532 case DataType::Type::kInt8: 533 DCHECK_EQ(16u, instruction->GetVectorLength()); 534 __ Sub(dst.V16B(), lhs.V16B(), rhs.V16B()); 535 break; 536 case DataType::Type::kUint16: 537 case DataType::Type::kInt16: 538 DCHECK_EQ(8u, instruction->GetVectorLength()); 539 __ Sub(dst.V8H(), lhs.V8H(), rhs.V8H()); 540 break; 541 case DataType::Type::kInt32: 542 DCHECK_EQ(4u, instruction->GetVectorLength()); 543 __ Sub(dst.V4S(), lhs.V4S(), rhs.V4S()); 544 break; 545 case DataType::Type::kInt64: 546 DCHECK_EQ(2u, instruction->GetVectorLength()); 547 __ Sub(dst.V2D(), lhs.V2D(), rhs.V2D()); 548 break; 549 case DataType::Type::kFloat32: 550 DCHECK_EQ(4u, instruction->GetVectorLength()); 551 __ Fsub(dst.V4S(), lhs.V4S(), rhs.V4S()); 552 break; 553 case DataType::Type::kFloat64: 554 DCHECK_EQ(2u, instruction->GetVectorLength()); 555 __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D()); 556 break; 557 default: 558 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 559 UNREACHABLE(); 560 } 561 } 562 563 void LocationsBuilderARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) { 564 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 565 } 566 567 void InstructionCodeGeneratorARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) { 568 LocationSummary* locations = instruction->GetLocations(); 569 VRegister lhs = VRegisterFrom(locations->InAt(0)); 570 VRegister rhs = VRegisterFrom(locations->InAt(1)); 571 VRegister dst = VRegisterFrom(locations->Out()); 572 switch (instruction->GetPackedType()) { 573 case DataType::Type::kUint8: 574 DCHECK_EQ(16u, instruction->GetVectorLength()); 575 __ Uqsub(dst.V16B(), lhs.V16B(), rhs.V16B()); 576 break; 577 case DataType::Type::kInt8: 578 DCHECK_EQ(16u, instruction->GetVectorLength()); 579 __ Sqsub(dst.V16B(), lhs.V16B(), rhs.V16B()); 580 break; 581 case DataType::Type::kUint16: 582 DCHECK_EQ(8u, instruction->GetVectorLength()); 583 __ Uqsub(dst.V8H(), lhs.V8H(), rhs.V8H()); 584 break; 585 case DataType::Type::kInt16: 586 DCHECK_EQ(8u, instruction->GetVectorLength()); 587 __ Sqsub(dst.V8H(), lhs.V8H(), rhs.V8H()); 588 break; 589 default: 590 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 591 UNREACHABLE(); 592 } 593 } 594 595 void LocationsBuilderARM64::VisitVecMul(HVecMul* instruction) { 596 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 597 } 598 599 void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) { 600 LocationSummary* locations = instruction->GetLocations(); 601 VRegister lhs = VRegisterFrom(locations->InAt(0)); 602 VRegister rhs = VRegisterFrom(locations->InAt(1)); 603 VRegister dst = VRegisterFrom(locations->Out()); 604 switch (instruction->GetPackedType()) { 605 case DataType::Type::kUint8: 606 case DataType::Type::kInt8: 607 DCHECK_EQ(16u, instruction->GetVectorLength()); 608 __ Mul(dst.V16B(), lhs.V16B(), rhs.V16B()); 609 break; 610 case DataType::Type::kUint16: 611 case DataType::Type::kInt16: 612 DCHECK_EQ(8u, instruction->GetVectorLength()); 613 __ Mul(dst.V8H(), lhs.V8H(), rhs.V8H()); 614 break; 615 case DataType::Type::kInt32: 616 DCHECK_EQ(4u, instruction->GetVectorLength()); 617 __ Mul(dst.V4S(), lhs.V4S(), rhs.V4S()); 618 break; 619 case DataType::Type::kFloat32: 620 DCHECK_EQ(4u, instruction->GetVectorLength()); 621 __ Fmul(dst.V4S(), lhs.V4S(), rhs.V4S()); 622 break; 623 case DataType::Type::kFloat64: 624 DCHECK_EQ(2u, instruction->GetVectorLength()); 625 __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D()); 626 break; 627 default: 628 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 629 UNREACHABLE(); 630 } 631 } 632 633 void LocationsBuilderARM64::VisitVecDiv(HVecDiv* instruction) { 634 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 635 } 636 637 void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) { 638 LocationSummary* locations = instruction->GetLocations(); 639 VRegister lhs = VRegisterFrom(locations->InAt(0)); 640 VRegister rhs = VRegisterFrom(locations->InAt(1)); 641 VRegister dst = VRegisterFrom(locations->Out()); 642 switch (instruction->GetPackedType()) { 643 case DataType::Type::kFloat32: 644 DCHECK_EQ(4u, instruction->GetVectorLength()); 645 __ Fdiv(dst.V4S(), lhs.V4S(), rhs.V4S()); 646 break; 647 case DataType::Type::kFloat64: 648 DCHECK_EQ(2u, instruction->GetVectorLength()); 649 __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D()); 650 break; 651 default: 652 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 653 UNREACHABLE(); 654 } 655 } 656 657 void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) { 658 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 659 } 660 661 void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) { 662 LocationSummary* locations = instruction->GetLocations(); 663 VRegister lhs = VRegisterFrom(locations->InAt(0)); 664 VRegister rhs = VRegisterFrom(locations->InAt(1)); 665 VRegister dst = VRegisterFrom(locations->Out()); 666 switch (instruction->GetPackedType()) { 667 case DataType::Type::kUint8: 668 DCHECK_EQ(16u, instruction->GetVectorLength()); 669 __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B()); 670 break; 671 case DataType::Type::kInt8: 672 DCHECK_EQ(16u, instruction->GetVectorLength()); 673 __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B()); 674 break; 675 case DataType::Type::kUint16: 676 DCHECK_EQ(8u, instruction->GetVectorLength()); 677 __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H()); 678 break; 679 case DataType::Type::kInt16: 680 DCHECK_EQ(8u, instruction->GetVectorLength()); 681 __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H()); 682 break; 683 case DataType::Type::kUint32: 684 DCHECK_EQ(4u, instruction->GetVectorLength()); 685 __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S()); 686 break; 687 case DataType::Type::kInt32: 688 DCHECK_EQ(4u, instruction->GetVectorLength()); 689 __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S()); 690 break; 691 case DataType::Type::kFloat32: 692 DCHECK_EQ(4u, instruction->GetVectorLength()); 693 __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S()); 694 break; 695 case DataType::Type::kFloat64: 696 DCHECK_EQ(2u, instruction->GetVectorLength()); 697 __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D()); 698 break; 699 default: 700 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 701 UNREACHABLE(); 702 } 703 } 704 705 void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) { 706 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 707 } 708 709 void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) { 710 LocationSummary* locations = instruction->GetLocations(); 711 VRegister lhs = VRegisterFrom(locations->InAt(0)); 712 VRegister rhs = VRegisterFrom(locations->InAt(1)); 713 VRegister dst = VRegisterFrom(locations->Out()); 714 switch (instruction->GetPackedType()) { 715 case DataType::Type::kUint8: 716 DCHECK_EQ(16u, instruction->GetVectorLength()); 717 __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B()); 718 break; 719 case DataType::Type::kInt8: 720 DCHECK_EQ(16u, instruction->GetVectorLength()); 721 __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B()); 722 break; 723 case DataType::Type::kUint16: 724 DCHECK_EQ(8u, instruction->GetVectorLength()); 725 __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H()); 726 break; 727 case DataType::Type::kInt16: 728 DCHECK_EQ(8u, instruction->GetVectorLength()); 729 __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H()); 730 break; 731 case DataType::Type::kUint32: 732 DCHECK_EQ(4u, instruction->GetVectorLength()); 733 __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S()); 734 break; 735 case DataType::Type::kInt32: 736 DCHECK_EQ(4u, instruction->GetVectorLength()); 737 __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S()); 738 break; 739 case DataType::Type::kFloat32: 740 DCHECK_EQ(4u, instruction->GetVectorLength()); 741 __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S()); 742 break; 743 case DataType::Type::kFloat64: 744 DCHECK_EQ(2u, instruction->GetVectorLength()); 745 __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D()); 746 break; 747 default: 748 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 749 UNREACHABLE(); 750 } 751 } 752 753 void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) { 754 // TODO: Allow constants supported by BIC (vector, immediate). 755 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 756 } 757 758 void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) { 759 LocationSummary* locations = instruction->GetLocations(); 760 VRegister lhs = VRegisterFrom(locations->InAt(0)); 761 VRegister rhs = VRegisterFrom(locations->InAt(1)); 762 VRegister dst = VRegisterFrom(locations->Out()); 763 switch (instruction->GetPackedType()) { 764 case DataType::Type::kBool: 765 case DataType::Type::kUint8: 766 case DataType::Type::kInt8: 767 case DataType::Type::kUint16: 768 case DataType::Type::kInt16: 769 case DataType::Type::kInt32: 770 case DataType::Type::kInt64: 771 case DataType::Type::kFloat32: 772 case DataType::Type::kFloat64: 773 __ And(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter 774 break; 775 default: 776 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 777 UNREACHABLE(); 778 } 779 } 780 781 void LocationsBuilderARM64::VisitVecAndNot(HVecAndNot* instruction) { 782 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId(); 783 } 784 785 void InstructionCodeGeneratorARM64::VisitVecAndNot(HVecAndNot* instruction) { 786 // TODO: Use BIC (vector, register). 787 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId(); 788 } 789 790 void LocationsBuilderARM64::VisitVecOr(HVecOr* instruction) { 791 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 792 } 793 794 void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) { 795 LocationSummary* locations = instruction->GetLocations(); 796 VRegister lhs = VRegisterFrom(locations->InAt(0)); 797 VRegister rhs = VRegisterFrom(locations->InAt(1)); 798 VRegister dst = VRegisterFrom(locations->Out()); 799 switch (instruction->GetPackedType()) { 800 case DataType::Type::kBool: 801 case DataType::Type::kUint8: 802 case DataType::Type::kInt8: 803 case DataType::Type::kUint16: 804 case DataType::Type::kInt16: 805 case DataType::Type::kInt32: 806 case DataType::Type::kInt64: 807 case DataType::Type::kFloat32: 808 case DataType::Type::kFloat64: 809 __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter 810 break; 811 default: 812 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 813 UNREACHABLE(); 814 } 815 } 816 817 void LocationsBuilderARM64::VisitVecXor(HVecXor* instruction) { 818 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 819 } 820 821 void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) { 822 LocationSummary* locations = instruction->GetLocations(); 823 VRegister lhs = VRegisterFrom(locations->InAt(0)); 824 VRegister rhs = VRegisterFrom(locations->InAt(1)); 825 VRegister dst = VRegisterFrom(locations->Out()); 826 switch (instruction->GetPackedType()) { 827 case DataType::Type::kBool: 828 case DataType::Type::kUint8: 829 case DataType::Type::kInt8: 830 case DataType::Type::kUint16: 831 case DataType::Type::kInt16: 832 case DataType::Type::kInt32: 833 case DataType::Type::kInt64: 834 case DataType::Type::kFloat32: 835 case DataType::Type::kFloat64: 836 __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter 837 break; 838 default: 839 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 840 UNREACHABLE(); 841 } 842 } 843 844 // Helper to set up locations for vector shift operations. 845 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { 846 LocationSummary* locations = new (allocator) LocationSummary(instruction); 847 switch (instruction->GetPackedType()) { 848 case DataType::Type::kUint8: 849 case DataType::Type::kInt8: 850 case DataType::Type::kUint16: 851 case DataType::Type::kInt16: 852 case DataType::Type::kInt32: 853 case DataType::Type::kInt64: 854 locations->SetInAt(0, Location::RequiresFpuRegister()); 855 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); 856 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 857 break; 858 default: 859 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 860 UNREACHABLE(); 861 } 862 } 863 864 void LocationsBuilderARM64::VisitVecShl(HVecShl* instruction) { 865 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); 866 } 867 868 void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) { 869 LocationSummary* locations = instruction->GetLocations(); 870 VRegister lhs = VRegisterFrom(locations->InAt(0)); 871 VRegister dst = VRegisterFrom(locations->Out()); 872 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 873 switch (instruction->GetPackedType()) { 874 case DataType::Type::kUint8: 875 case DataType::Type::kInt8: 876 DCHECK_EQ(16u, instruction->GetVectorLength()); 877 __ Shl(dst.V16B(), lhs.V16B(), value); 878 break; 879 case DataType::Type::kUint16: 880 case DataType::Type::kInt16: 881 DCHECK_EQ(8u, instruction->GetVectorLength()); 882 __ Shl(dst.V8H(), lhs.V8H(), value); 883 break; 884 case DataType::Type::kInt32: 885 DCHECK_EQ(4u, instruction->GetVectorLength()); 886 __ Shl(dst.V4S(), lhs.V4S(), value); 887 break; 888 case DataType::Type::kInt64: 889 DCHECK_EQ(2u, instruction->GetVectorLength()); 890 __ Shl(dst.V2D(), lhs.V2D(), value); 891 break; 892 default: 893 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 894 UNREACHABLE(); 895 } 896 } 897 898 void LocationsBuilderARM64::VisitVecShr(HVecShr* instruction) { 899 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); 900 } 901 902 void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) { 903 LocationSummary* locations = instruction->GetLocations(); 904 VRegister lhs = VRegisterFrom(locations->InAt(0)); 905 VRegister dst = VRegisterFrom(locations->Out()); 906 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 907 switch (instruction->GetPackedType()) { 908 case DataType::Type::kUint8: 909 case DataType::Type::kInt8: 910 DCHECK_EQ(16u, instruction->GetVectorLength()); 911 __ Sshr(dst.V16B(), lhs.V16B(), value); 912 break; 913 case DataType::Type::kUint16: 914 case DataType::Type::kInt16: 915 DCHECK_EQ(8u, instruction->GetVectorLength()); 916 __ Sshr(dst.V8H(), lhs.V8H(), value); 917 break; 918 case DataType::Type::kInt32: 919 DCHECK_EQ(4u, instruction->GetVectorLength()); 920 __ Sshr(dst.V4S(), lhs.V4S(), value); 921 break; 922 case DataType::Type::kInt64: 923 DCHECK_EQ(2u, instruction->GetVectorLength()); 924 __ Sshr(dst.V2D(), lhs.V2D(), value); 925 break; 926 default: 927 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 928 UNREACHABLE(); 929 } 930 } 931 932 void LocationsBuilderARM64::VisitVecUShr(HVecUShr* instruction) { 933 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); 934 } 935 936 void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) { 937 LocationSummary* locations = instruction->GetLocations(); 938 VRegister lhs = VRegisterFrom(locations->InAt(0)); 939 VRegister dst = VRegisterFrom(locations->Out()); 940 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 941 switch (instruction->GetPackedType()) { 942 case DataType::Type::kUint8: 943 case DataType::Type::kInt8: 944 DCHECK_EQ(16u, instruction->GetVectorLength()); 945 __ Ushr(dst.V16B(), lhs.V16B(), value); 946 break; 947 case DataType::Type::kUint16: 948 case DataType::Type::kInt16: 949 DCHECK_EQ(8u, instruction->GetVectorLength()); 950 __ Ushr(dst.V8H(), lhs.V8H(), value); 951 break; 952 case DataType::Type::kInt32: 953 DCHECK_EQ(4u, instruction->GetVectorLength()); 954 __ Ushr(dst.V4S(), lhs.V4S(), value); 955 break; 956 case DataType::Type::kInt64: 957 DCHECK_EQ(2u, instruction->GetVectorLength()); 958 __ Ushr(dst.V2D(), lhs.V2D(), value); 959 break; 960 default: 961 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 962 UNREACHABLE(); 963 } 964 } 965 966 void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) { 967 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 968 969 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented 970 971 HInstruction* input = instruction->InputAt(0); 972 bool is_zero = IsZeroBitPattern(input); 973 974 switch (instruction->GetPackedType()) { 975 case DataType::Type::kBool: 976 case DataType::Type::kUint8: 977 case DataType::Type::kInt8: 978 case DataType::Type::kUint16: 979 case DataType::Type::kInt16: 980 case DataType::Type::kInt32: 981 case DataType::Type::kInt64: 982 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) 983 : Location::RequiresRegister()); 984 locations->SetOut(Location::RequiresFpuRegister()); 985 break; 986 case DataType::Type::kFloat32: 987 case DataType::Type::kFloat64: 988 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) 989 : Location::RequiresFpuRegister()); 990 locations->SetOut(Location::RequiresFpuRegister()); 991 break; 992 default: 993 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 994 UNREACHABLE(); 995 } 996 } 997 998 void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) { 999 LocationSummary* locations = instruction->GetLocations(); 1000 VRegister dst = VRegisterFrom(locations->Out()); 1001 1002 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented 1003 1004 // Zero out all other elements first. 1005 __ Movi(dst.V16B(), 0); 1006 1007 // Shorthand for any type of zero. 1008 if (IsZeroBitPattern(instruction->InputAt(0))) { 1009 return; 1010 } 1011 1012 // Set required elements. 1013 switch (instruction->GetPackedType()) { 1014 case DataType::Type::kBool: 1015 case DataType::Type::kUint8: 1016 case DataType::Type::kInt8: 1017 DCHECK_EQ(16u, instruction->GetVectorLength()); 1018 __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0)); 1019 break; 1020 case DataType::Type::kUint16: 1021 case DataType::Type::kInt16: 1022 DCHECK_EQ(8u, instruction->GetVectorLength()); 1023 __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0)); 1024 break; 1025 case DataType::Type::kInt32: 1026 DCHECK_EQ(4u, instruction->GetVectorLength()); 1027 __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0)); 1028 break; 1029 case DataType::Type::kInt64: 1030 DCHECK_EQ(2u, instruction->GetVectorLength()); 1031 __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0)); 1032 break; 1033 default: 1034 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 1035 UNREACHABLE(); 1036 } 1037 } 1038 1039 // Helper to set up locations for vector accumulations. 1040 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) { 1041 LocationSummary* locations = new (allocator) LocationSummary(instruction); 1042 switch (instruction->GetPackedType()) { 1043 case DataType::Type::kUint8: 1044 case DataType::Type::kInt8: 1045 case DataType::Type::kUint16: 1046 case DataType::Type::kInt16: 1047 case DataType::Type::kInt32: 1048 case DataType::Type::kInt64: 1049 locations->SetInAt(0, Location::RequiresFpuRegister()); 1050 locations->SetInAt(1, Location::RequiresFpuRegister()); 1051 locations->SetInAt(2, Location::RequiresFpuRegister()); 1052 locations->SetOut(Location::SameAsFirstInput()); 1053 break; 1054 default: 1055 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 1056 UNREACHABLE(); 1057 } 1058 } 1059 1060 void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { 1061 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); 1062 } 1063 1064 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a 1065 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result. 1066 // However vector MultiplyAccumulate instruction is not affected. 1067 void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { 1068 LocationSummary* locations = instruction->GetLocations(); 1069 VRegister acc = VRegisterFrom(locations->InAt(0)); 1070 VRegister left = VRegisterFrom(locations->InAt(1)); 1071 VRegister right = VRegisterFrom(locations->InAt(2)); 1072 1073 DCHECK(locations->InAt(0).Equals(locations->Out())); 1074 1075 switch (instruction->GetPackedType()) { 1076 case DataType::Type::kUint8: 1077 case DataType::Type::kInt8: 1078 DCHECK_EQ(16u, instruction->GetVectorLength()); 1079 if (instruction->GetOpKind() == HInstruction::kAdd) { 1080 __ Mla(acc.V16B(), left.V16B(), right.V16B()); 1081 } else { 1082 __ Mls(acc.V16B(), left.V16B(), right.V16B()); 1083 } 1084 break; 1085 case DataType::Type::kUint16: 1086 case DataType::Type::kInt16: 1087 DCHECK_EQ(8u, instruction->GetVectorLength()); 1088 if (instruction->GetOpKind() == HInstruction::kAdd) { 1089 __ Mla(acc.V8H(), left.V8H(), right.V8H()); 1090 } else { 1091 __ Mls(acc.V8H(), left.V8H(), right.V8H()); 1092 } 1093 break; 1094 case DataType::Type::kInt32: 1095 DCHECK_EQ(4u, instruction->GetVectorLength()); 1096 if (instruction->GetOpKind() == HInstruction::kAdd) { 1097 __ Mla(acc.V4S(), left.V4S(), right.V4S()); 1098 } else { 1099 __ Mls(acc.V4S(), left.V4S(), right.V4S()); 1100 } 1101 break; 1102 default: 1103 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 1104 UNREACHABLE(); 1105 } 1106 } 1107 1108 void LocationsBuilderARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { 1109 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); 1110 // Some conversions require temporary registers. 1111 LocationSummary* locations = instruction->GetLocations(); 1112 HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); 1113 HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); 1114 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), 1115 HVecOperation::ToSignedType(b->GetPackedType())); 1116 switch (a->GetPackedType()) { 1117 case DataType::Type::kUint8: 1118 case DataType::Type::kInt8: 1119 switch (instruction->GetPackedType()) { 1120 case DataType::Type::kInt64: 1121 locations->AddTemp(Location::RequiresFpuRegister()); 1122 locations->AddTemp(Location::RequiresFpuRegister()); 1123 FALLTHROUGH_INTENDED; 1124 case DataType::Type::kInt32: 1125 locations->AddTemp(Location::RequiresFpuRegister()); 1126 locations->AddTemp(Location::RequiresFpuRegister()); 1127 break; 1128 default: 1129 break; 1130 } 1131 break; 1132 case DataType::Type::kUint16: 1133 case DataType::Type::kInt16: 1134 if (instruction->GetPackedType() == DataType::Type::kInt64) { 1135 locations->AddTemp(Location::RequiresFpuRegister()); 1136 locations->AddTemp(Location::RequiresFpuRegister()); 1137 } 1138 break; 1139 case DataType::Type::kInt32: 1140 case DataType::Type::kInt64: 1141 if (instruction->GetPackedType() == a->GetPackedType()) { 1142 locations->AddTemp(Location::RequiresFpuRegister()); 1143 } 1144 break; 1145 default: 1146 break; 1147 } 1148 } 1149 1150 void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { 1151 LocationSummary* locations = instruction->GetLocations(); 1152 VRegister acc = VRegisterFrom(locations->InAt(0)); 1153 VRegister left = VRegisterFrom(locations->InAt(1)); 1154 VRegister right = VRegisterFrom(locations->InAt(2)); 1155 1156 DCHECK(locations->InAt(0).Equals(locations->Out())); 1157 1158 // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S). 1159 HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); 1160 HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); 1161 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), 1162 HVecOperation::ToSignedType(b->GetPackedType())); 1163 switch (a->GetPackedType()) { 1164 case DataType::Type::kUint8: 1165 case DataType::Type::kInt8: 1166 DCHECK_EQ(16u, a->GetVectorLength()); 1167 switch (instruction->GetPackedType()) { 1168 case DataType::Type::kInt16: 1169 DCHECK_EQ(8u, instruction->GetVectorLength()); 1170 __ Sabal(acc.V8H(), left.V8B(), right.V8B()); 1171 __ Sabal2(acc.V8H(), left.V16B(), right.V16B()); 1172 break; 1173 case DataType::Type::kInt32: { 1174 DCHECK_EQ(4u, instruction->GetVectorLength()); 1175 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0)); 1176 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1)); 1177 __ Sxtl(tmp1.V8H(), left.V8B()); 1178 __ Sxtl(tmp2.V8H(), right.V8B()); 1179 __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H()); 1180 __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H()); 1181 __ Sxtl2(tmp1.V8H(), left.V16B()); 1182 __ Sxtl2(tmp2.V8H(), right.V16B()); 1183 __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H()); 1184 __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H()); 1185 break; 1186 } 1187 case DataType::Type::kInt64: { 1188 DCHECK_EQ(2u, instruction->GetVectorLength()); 1189 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0)); 1190 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1)); 1191 VRegister tmp3 = VRegisterFrom(locations->GetTemp(2)); 1192 VRegister tmp4 = VRegisterFrom(locations->GetTemp(3)); 1193 __ Sxtl(tmp1.V8H(), left.V8B()); 1194 __ Sxtl(tmp2.V8H(), right.V8B()); 1195 __ Sxtl(tmp3.V4S(), tmp1.V4H()); 1196 __ Sxtl(tmp4.V4S(), tmp2.V4H()); 1197 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S()); 1198 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S()); 1199 __ Sxtl2(tmp3.V4S(), tmp1.V8H()); 1200 __ Sxtl2(tmp4.V4S(), tmp2.V8H()); 1201 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S()); 1202 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S()); 1203 __ Sxtl2(tmp1.V8H(), left.V16B()); 1204 __ Sxtl2(tmp2.V8H(), right.V16B()); 1205 __ Sxtl(tmp3.V4S(), tmp1.V4H()); 1206 __ Sxtl(tmp4.V4S(), tmp2.V4H()); 1207 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S()); 1208 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S()); 1209 __ Sxtl2(tmp3.V4S(), tmp1.V8H()); 1210 __ Sxtl2(tmp4.V4S(), tmp2.V8H()); 1211 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S()); 1212 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S()); 1213 break; 1214 } 1215 default: 1216 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 1217 UNREACHABLE(); 1218 } 1219 break; 1220 case DataType::Type::kUint16: 1221 case DataType::Type::kInt16: 1222 DCHECK_EQ(8u, a->GetVectorLength()); 1223 switch (instruction->GetPackedType()) { 1224 case DataType::Type::kInt32: 1225 DCHECK_EQ(4u, instruction->GetVectorLength()); 1226 __ Sabal(acc.V4S(), left.V4H(), right.V4H()); 1227 __ Sabal2(acc.V4S(), left.V8H(), right.V8H()); 1228 break; 1229 case DataType::Type::kInt64: { 1230 DCHECK_EQ(2u, instruction->GetVectorLength()); 1231 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0)); 1232 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1)); 1233 __ Sxtl(tmp1.V4S(), left.V4H()); 1234 __ Sxtl(tmp2.V4S(), right.V4H()); 1235 __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S()); 1236 __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S()); 1237 __ Sxtl2(tmp1.V4S(), left.V8H()); 1238 __ Sxtl2(tmp2.V4S(), right.V8H()); 1239 __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S()); 1240 __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S()); 1241 break; 1242 } 1243 default: 1244 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 1245 UNREACHABLE(); 1246 } 1247 break; 1248 case DataType::Type::kInt32: 1249 DCHECK_EQ(4u, a->GetVectorLength()); 1250 switch (instruction->GetPackedType()) { 1251 case DataType::Type::kInt32: { 1252 DCHECK_EQ(4u, instruction->GetVectorLength()); 1253 VRegister tmp = VRegisterFrom(locations->GetTemp(0)); 1254 __ Sub(tmp.V4S(), left.V4S(), right.V4S()); 1255 __ Abs(tmp.V4S(), tmp.V4S()); 1256 __ Add(acc.V4S(), acc.V4S(), tmp.V4S()); 1257 break; 1258 } 1259 case DataType::Type::kInt64: 1260 DCHECK_EQ(2u, instruction->GetVectorLength()); 1261 __ Sabal(acc.V2D(), left.V2S(), right.V2S()); 1262 __ Sabal2(acc.V2D(), left.V4S(), right.V4S()); 1263 break; 1264 default: 1265 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 1266 UNREACHABLE(); 1267 } 1268 break; 1269 case DataType::Type::kInt64: 1270 DCHECK_EQ(2u, a->GetVectorLength()); 1271 switch (instruction->GetPackedType()) { 1272 case DataType::Type::kInt64: { 1273 DCHECK_EQ(2u, instruction->GetVectorLength()); 1274 VRegister tmp = VRegisterFrom(locations->GetTemp(0)); 1275 __ Sub(tmp.V2D(), left.V2D(), right.V2D()); 1276 __ Abs(tmp.V2D(), tmp.V2D()); 1277 __ Add(acc.V2D(), acc.V2D(), tmp.V2D()); 1278 break; 1279 } 1280 default: 1281 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 1282 UNREACHABLE(); 1283 } 1284 break; 1285 default: 1286 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 1287 } 1288 } 1289 1290 void LocationsBuilderARM64::VisitVecDotProd(HVecDotProd* instruction) { 1291 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 1292 DCHECK(instruction->GetPackedType() == DataType::Type::kInt32); 1293 locations->SetInAt(0, Location::RequiresFpuRegister()); 1294 locations->SetInAt(1, Location::RequiresFpuRegister()); 1295 locations->SetInAt(2, Location::RequiresFpuRegister()); 1296 locations->SetOut(Location::SameAsFirstInput()); 1297 1298 // For Int8 and Uint8 general case we need a temp register. 1299 if ((DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) && 1300 !ShouldEmitDotProductInstructions(codegen_)) { 1301 locations->AddTemp(Location::RequiresFpuRegister()); 1302 } 1303 } 1304 1305 void InstructionCodeGeneratorARM64::VisitVecDotProd(HVecDotProd* instruction) { 1306 LocationSummary* locations = instruction->GetLocations(); 1307 DCHECK(locations->InAt(0).Equals(locations->Out())); 1308 VRegister acc = VRegisterFrom(locations->InAt(0)); 1309 VRegister left = VRegisterFrom(locations->InAt(1)); 1310 VRegister right = VRegisterFrom(locations->InAt(2)); 1311 HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); 1312 HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); 1313 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), 1314 HVecOperation::ToSignedType(b->GetPackedType())); 1315 DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32); 1316 DCHECK_EQ(4u, instruction->GetVectorLength()); 1317 1318 size_t inputs_data_size = DataType::Size(a->GetPackedType()); 1319 switch (inputs_data_size) { 1320 case 1u: { 1321 DCHECK_EQ(16u, a->GetVectorLength()); 1322 if (instruction->IsZeroExtending()) { 1323 if (ShouldEmitDotProductInstructions(codegen_)) { 1324 __ Udot(acc.V4S(), left.V16B(), right.V16B()); 1325 } else { 1326 VRegister tmp = VRegisterFrom(locations->GetTemp(0)); 1327 __ Umull(tmp.V8H(), left.V8B(), right.V8B()); 1328 __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H()); 1329 __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H()); 1330 1331 __ Umull2(tmp.V8H(), left.V16B(), right.V16B()); 1332 __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H()); 1333 __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H()); 1334 } 1335 } else { 1336 if (ShouldEmitDotProductInstructions(codegen_)) { 1337 __ Sdot(acc.V4S(), left.V16B(), right.V16B()); 1338 } else { 1339 VRegister tmp = VRegisterFrom(locations->GetTemp(0)); 1340 __ Smull(tmp.V8H(), left.V8B(), right.V8B()); 1341 __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H()); 1342 __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H()); 1343 1344 __ Smull2(tmp.V8H(), left.V16B(), right.V16B()); 1345 __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H()); 1346 __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H()); 1347 } 1348 } 1349 break; 1350 } 1351 case 2u: 1352 DCHECK_EQ(8u, a->GetVectorLength()); 1353 if (instruction->IsZeroExtending()) { 1354 __ Umlal(acc.V4S(), left.V4H(), right.V4H()); 1355 __ Umlal2(acc.V4S(), left.V8H(), right.V8H()); 1356 } else { 1357 __ Smlal(acc.V4S(), left.V4H(), right.V4H()); 1358 __ Smlal2(acc.V4S(), left.V8H(), right.V8H()); 1359 } 1360 break; 1361 default: 1362 LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size; 1363 } 1364 } 1365 1366 // Helper to set up locations for vector memory operations. 1367 static void CreateVecMemLocations(ArenaAllocator* allocator, 1368 HVecMemoryOperation* instruction, 1369 bool is_load) { 1370 LocationSummary* locations = new (allocator) LocationSummary(instruction); 1371 switch (instruction->GetPackedType()) { 1372 case DataType::Type::kBool: 1373 case DataType::Type::kUint8: 1374 case DataType::Type::kInt8: 1375 case DataType::Type::kUint16: 1376 case DataType::Type::kInt16: 1377 case DataType::Type::kInt32: 1378 case DataType::Type::kInt64: 1379 case DataType::Type::kFloat32: 1380 case DataType::Type::kFloat64: 1381 locations->SetInAt(0, Location::RequiresRegister()); 1382 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 1383 if (is_load) { 1384 locations->SetOut(Location::RequiresFpuRegister()); 1385 } else { 1386 locations->SetInAt(2, Location::RequiresFpuRegister()); 1387 } 1388 break; 1389 default: 1390 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 1391 UNREACHABLE(); 1392 } 1393 } 1394 1395 // Helper to set up locations for vector memory operations. Returns the memory operand and, 1396 // if used, sets the output parameter scratch to a temporary register used in this operand, 1397 // so that the client can release it right after the memory operand use. 1398 MemOperand InstructionCodeGeneratorARM64::VecAddress( 1399 HVecMemoryOperation* instruction, 1400 UseScratchRegisterScope* temps_scope, 1401 size_t size, 1402 bool is_string_char_at, 1403 /*out*/ Register* scratch) { 1404 LocationSummary* locations = instruction->GetLocations(); 1405 Register base = InputRegisterAt(instruction, 0); 1406 1407 if (instruction->InputAt(1)->IsIntermediateAddressIndex()) { 1408 DCHECK(!is_string_char_at); 1409 return MemOperand(base.X(), InputRegisterAt(instruction, 1).X()); 1410 } 1411 1412 Location index = locations->InAt(1); 1413 uint32_t offset = is_string_char_at 1414 ? mirror::String::ValueOffset().Uint32Value() 1415 : mirror::Array::DataOffset(size).Uint32Value(); 1416 size_t shift = ComponentSizeShiftWidth(size); 1417 1418 // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet. 1419 DCHECK(!instruction->InputAt(0)->IsIntermediateAddress()); 1420 1421 if (index.IsConstant()) { 1422 offset += Int64FromLocation(index) << shift; 1423 return HeapOperand(base, offset); 1424 } else { 1425 *scratch = temps_scope->AcquireSameSizeAs(base); 1426 __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift)); 1427 return HeapOperand(*scratch, offset); 1428 } 1429 } 1430 1431 void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) { 1432 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true); 1433 } 1434 1435 void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) { 1436 LocationSummary* locations = instruction->GetLocations(); 1437 size_t size = DataType::Size(instruction->GetPackedType()); 1438 VRegister reg = VRegisterFrom(locations->Out()); 1439 UseScratchRegisterScope temps(GetVIXLAssembler()); 1440 Register scratch; 1441 1442 switch (instruction->GetPackedType()) { 1443 case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt. 1444 case DataType::Type::kUint16: 1445 DCHECK_EQ(8u, instruction->GetVectorLength()); 1446 // Special handling of compressed/uncompressed string load. 1447 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 1448 vixl::aarch64::Label uncompressed_load, done; 1449 // Test compression bit. 1450 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1451 "Expecting 0=compressed, 1=uncompressed"); 1452 uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 1453 Register length = temps.AcquireW(); 1454 __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset)); 1455 __ Tbnz(length.W(), 0, &uncompressed_load); 1456 temps.Release(length); // no longer needed 1457 // Zero extend 8 compressed bytes into 8 chars. 1458 __ Ldr(DRegisterFrom(locations->Out()).V8B(), 1459 VecAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch)); 1460 __ Uxtl(reg.V8H(), reg.V8B()); 1461 __ B(&done); 1462 if (scratch.IsValid()) { 1463 temps.Release(scratch); // if used, no longer needed 1464 } 1465 // Load 8 direct uncompressed chars. 1466 __ Bind(&uncompressed_load); 1467 __ Ldr(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch)); 1468 __ Bind(&done); 1469 return; 1470 } 1471 FALLTHROUGH_INTENDED; 1472 case DataType::Type::kBool: 1473 case DataType::Type::kUint8: 1474 case DataType::Type::kInt8: 1475 case DataType::Type::kInt32: 1476 case DataType::Type::kFloat32: 1477 case DataType::Type::kInt64: 1478 case DataType::Type::kFloat64: 1479 DCHECK_LE(2u, instruction->GetVectorLength()); 1480 DCHECK_LE(instruction->GetVectorLength(), 16u); 1481 __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch)); 1482 break; 1483 default: 1484 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 1485 UNREACHABLE(); 1486 } 1487 } 1488 1489 void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) { 1490 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false); 1491 } 1492 1493 void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) { 1494 LocationSummary* locations = instruction->GetLocations(); 1495 size_t size = DataType::Size(instruction->GetPackedType()); 1496 VRegister reg = VRegisterFrom(locations->InAt(2)); 1497 UseScratchRegisterScope temps(GetVIXLAssembler()); 1498 Register scratch; 1499 1500 switch (instruction->GetPackedType()) { 1501 case DataType::Type::kBool: 1502 case DataType::Type::kUint8: 1503 case DataType::Type::kInt8: 1504 case DataType::Type::kUint16: 1505 case DataType::Type::kInt16: 1506 case DataType::Type::kInt32: 1507 case DataType::Type::kFloat32: 1508 case DataType::Type::kInt64: 1509 case DataType::Type::kFloat64: 1510 DCHECK_LE(2u, instruction->GetVectorLength()); 1511 DCHECK_LE(instruction->GetVectorLength(), 16u); 1512 __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch)); 1513 break; 1514 default: 1515 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); 1516 UNREACHABLE(); 1517 } 1518 } 1519 1520 #undef __ 1521 1522 } // namespace arm64 1523 } // namespace art 1524