1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "code_generator_arm64.h" 18 19 #include "mirror/array-inl.h" 20 #include "mirror/string.h" 21 22 using namespace vixl::aarch64; // NOLINT(build/namespaces) 23 24 namespace art { 25 namespace arm64 { 26 27 using helpers::ARM64EncodableConstantOrRegister; 28 using helpers::Arm64CanEncodeConstantAsImmediate; 29 using helpers::DRegisterFrom; 30 using helpers::HeapOperand; 31 using helpers::InputRegisterAt; 32 using helpers::Int64ConstantFrom; 33 using helpers::OutputRegister; 34 using helpers::VRegisterFrom; 35 using helpers::WRegisterFrom; 36 using helpers::XRegisterFrom; 37 38 #define __ GetVIXLAssembler()-> 39 40 void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { 41 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 42 HInstruction* input = instruction->InputAt(0); 43 switch (instruction->GetPackedType()) { 44 case DataType::Type::kBool: 45 case DataType::Type::kUint8: 46 case DataType::Type::kInt8: 47 case DataType::Type::kUint16: 48 case DataType::Type::kInt16: 49 case DataType::Type::kInt32: 50 case DataType::Type::kInt64: 51 locations->SetInAt(0, ARM64EncodableConstantOrRegister(input, instruction)); 52 locations->SetOut(Location::RequiresFpuRegister()); 53 break; 54 case DataType::Type::kFloat32: 55 case DataType::Type::kFloat64: 56 if (input->IsConstant() && 57 Arm64CanEncodeConstantAsImmediate(input->AsConstant(), instruction)) { 58 locations->SetInAt(0, Location::ConstantLocation(input->AsConstant())); 59 locations->SetOut(Location::RequiresFpuRegister()); 60 } else { 61 locations->SetInAt(0, Location::RequiresFpuRegister()); 62 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 63 } 64 break; 65 default: 66 LOG(FATAL) << "Unsupported SIMD type"; 67 UNREACHABLE(); 68 } 69 } 70 71 void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { 72 LocationSummary* locations = instruction->GetLocations(); 73 Location src_loc = locations->InAt(0); 74 VRegister dst = VRegisterFrom(locations->Out()); 75 switch (instruction->GetPackedType()) { 76 case DataType::Type::kBool: 77 case DataType::Type::kUint8: 78 case DataType::Type::kInt8: 79 DCHECK_EQ(16u, instruction->GetVectorLength()); 80 if (src_loc.IsConstant()) { 81 __ Movi(dst.V16B(), Int64ConstantFrom(src_loc)); 82 } else { 83 __ Dup(dst.V16B(), InputRegisterAt(instruction, 0)); 84 } 85 break; 86 case DataType::Type::kUint16: 87 case DataType::Type::kInt16: 88 DCHECK_EQ(8u, instruction->GetVectorLength()); 89 if (src_loc.IsConstant()) { 90 __ Movi(dst.V8H(), Int64ConstantFrom(src_loc)); 91 } else { 92 __ Dup(dst.V8H(), InputRegisterAt(instruction, 0)); 93 } 94 break; 95 case DataType::Type::kInt32: 96 DCHECK_EQ(4u, instruction->GetVectorLength()); 97 if (src_loc.IsConstant()) { 98 __ Movi(dst.V4S(), Int64ConstantFrom(src_loc)); 99 } else { 100 __ Dup(dst.V4S(), InputRegisterAt(instruction, 0)); 101 } 102 break; 103 case DataType::Type::kInt64: 104 DCHECK_EQ(2u, instruction->GetVectorLength()); 105 if (src_loc.IsConstant()) { 106 __ Movi(dst.V2D(), Int64ConstantFrom(src_loc)); 107 } else { 108 __ Dup(dst.V2D(), XRegisterFrom(src_loc)); 109 } 110 break; 111 case DataType::Type::kFloat32: 112 DCHECK_EQ(4u, instruction->GetVectorLength()); 113 if (src_loc.IsConstant()) { 114 __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue()); 115 } else { 116 __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0); 117 } 118 break; 119 case DataType::Type::kFloat64: 120 DCHECK_EQ(2u, instruction->GetVectorLength()); 121 if (src_loc.IsConstant()) { 122 __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue()); 123 } else { 124 __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0); 125 } 126 break; 127 default: 128 LOG(FATAL) << "Unsupported SIMD type"; 129 UNREACHABLE(); 130 } 131 } 132 133 void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) { 134 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 135 switch (instruction->GetPackedType()) { 136 case DataType::Type::kBool: 137 case DataType::Type::kUint8: 138 case DataType::Type::kInt8: 139 case DataType::Type::kUint16: 140 case DataType::Type::kInt16: 141 case DataType::Type::kInt32: 142 case DataType::Type::kInt64: 143 locations->SetInAt(0, Location::RequiresFpuRegister()); 144 locations->SetOut(Location::RequiresRegister()); 145 break; 146 case DataType::Type::kFloat32: 147 case DataType::Type::kFloat64: 148 locations->SetInAt(0, Location::RequiresFpuRegister()); 149 locations->SetOut(Location::SameAsFirstInput()); 150 break; 151 default: 152 LOG(FATAL) << "Unsupported SIMD type"; 153 UNREACHABLE(); 154 } 155 } 156 157 void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) { 158 LocationSummary* locations = instruction->GetLocations(); 159 VRegister src = VRegisterFrom(locations->InAt(0)); 160 switch (instruction->GetPackedType()) { 161 case DataType::Type::kInt32: 162 DCHECK_EQ(4u, instruction->GetVectorLength()); 163 __ Umov(OutputRegister(instruction), src.V4S(), 0); 164 break; 165 case DataType::Type::kInt64: 166 DCHECK_EQ(2u, instruction->GetVectorLength()); 167 __ Umov(OutputRegister(instruction), src.V2D(), 0); 168 break; 169 case DataType::Type::kFloat32: 170 case DataType::Type::kFloat64: 171 DCHECK_LE(2u, instruction->GetVectorLength()); 172 DCHECK_LE(instruction->GetVectorLength(), 4u); 173 DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required 174 break; 175 default: 176 LOG(FATAL) << "Unsupported SIMD type"; 177 UNREACHABLE(); 178 } 179 } 180 181 // Helper to set up locations for vector unary operations. 182 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) { 183 LocationSummary* locations = new (allocator) LocationSummary(instruction); 184 switch (instruction->GetPackedType()) { 185 case DataType::Type::kBool: 186 locations->SetInAt(0, Location::RequiresFpuRegister()); 187 locations->SetOut(Location::RequiresFpuRegister(), 188 instruction->IsVecNot() ? Location::kOutputOverlap 189 : Location::kNoOutputOverlap); 190 break; 191 case DataType::Type::kUint8: 192 case DataType::Type::kInt8: 193 case DataType::Type::kUint16: 194 case DataType::Type::kInt16: 195 case DataType::Type::kInt32: 196 case DataType::Type::kInt64: 197 case DataType::Type::kFloat32: 198 case DataType::Type::kFloat64: 199 locations->SetInAt(0, Location::RequiresFpuRegister()); 200 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 201 break; 202 default: 203 LOG(FATAL) << "Unsupported SIMD type"; 204 UNREACHABLE(); 205 } 206 } 207 208 void LocationsBuilderARM64::VisitVecReduce(HVecReduce* instruction) { 209 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); 210 } 211 212 void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) { 213 LocationSummary* locations = instruction->GetLocations(); 214 VRegister src = VRegisterFrom(locations->InAt(0)); 215 VRegister dst = DRegisterFrom(locations->Out()); 216 switch (instruction->GetPackedType()) { 217 case DataType::Type::kInt32: 218 DCHECK_EQ(4u, instruction->GetVectorLength()); 219 switch (instruction->GetKind()) { 220 case HVecReduce::kSum: 221 __ Addv(dst.S(), src.V4S()); 222 break; 223 case HVecReduce::kMin: 224 __ Sminv(dst.S(), src.V4S()); 225 break; 226 case HVecReduce::kMax: 227 __ Smaxv(dst.S(), src.V4S()); 228 break; 229 } 230 break; 231 case DataType::Type::kInt64: 232 DCHECK_EQ(2u, instruction->GetVectorLength()); 233 switch (instruction->GetKind()) { 234 case HVecReduce::kSum: 235 __ Addp(dst.D(), src.V2D()); 236 break; 237 default: 238 LOG(FATAL) << "Unsupported SIMD min/max"; 239 UNREACHABLE(); 240 } 241 break; 242 default: 243 LOG(FATAL) << "Unsupported SIMD type"; 244 UNREACHABLE(); 245 } 246 } 247 248 void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) { 249 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); 250 } 251 252 void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) { 253 LocationSummary* locations = instruction->GetLocations(); 254 VRegister src = VRegisterFrom(locations->InAt(0)); 255 VRegister dst = VRegisterFrom(locations->Out()); 256 DataType::Type from = instruction->GetInputType(); 257 DataType::Type to = instruction->GetResultType(); 258 if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) { 259 DCHECK_EQ(4u, instruction->GetVectorLength()); 260 __ Scvtf(dst.V4S(), src.V4S()); 261 } else { 262 LOG(FATAL) << "Unsupported SIMD type"; 263 } 264 } 265 266 void LocationsBuilderARM64::VisitVecNeg(HVecNeg* instruction) { 267 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); 268 } 269 270 void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) { 271 LocationSummary* locations = instruction->GetLocations(); 272 VRegister src = VRegisterFrom(locations->InAt(0)); 273 VRegister dst = VRegisterFrom(locations->Out()); 274 switch (instruction->GetPackedType()) { 275 case DataType::Type::kUint8: 276 case DataType::Type::kInt8: 277 DCHECK_EQ(16u, instruction->GetVectorLength()); 278 __ Neg(dst.V16B(), src.V16B()); 279 break; 280 case DataType::Type::kUint16: 281 case DataType::Type::kInt16: 282 DCHECK_EQ(8u, instruction->GetVectorLength()); 283 __ Neg(dst.V8H(), src.V8H()); 284 break; 285 case DataType::Type::kInt32: 286 DCHECK_EQ(4u, instruction->GetVectorLength()); 287 __ Neg(dst.V4S(), src.V4S()); 288 break; 289 case DataType::Type::kInt64: 290 DCHECK_EQ(2u, instruction->GetVectorLength()); 291 __ Neg(dst.V2D(), src.V2D()); 292 break; 293 case DataType::Type::kFloat32: 294 DCHECK_EQ(4u, instruction->GetVectorLength()); 295 __ Fneg(dst.V4S(), src.V4S()); 296 break; 297 case DataType::Type::kFloat64: 298 DCHECK_EQ(2u, instruction->GetVectorLength()); 299 __ Fneg(dst.V2D(), src.V2D()); 300 break; 301 default: 302 LOG(FATAL) << "Unsupported SIMD type"; 303 UNREACHABLE(); 304 } 305 } 306 307 void LocationsBuilderARM64::VisitVecAbs(HVecAbs* instruction) { 308 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); 309 } 310 311 void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) { 312 LocationSummary* locations = instruction->GetLocations(); 313 VRegister src = VRegisterFrom(locations->InAt(0)); 314 VRegister dst = VRegisterFrom(locations->Out()); 315 switch (instruction->GetPackedType()) { 316 case DataType::Type::kInt8: 317 DCHECK_EQ(16u, instruction->GetVectorLength()); 318 __ Abs(dst.V16B(), src.V16B()); 319 break; 320 case DataType::Type::kInt16: 321 DCHECK_EQ(8u, instruction->GetVectorLength()); 322 __ Abs(dst.V8H(), src.V8H()); 323 break; 324 case DataType::Type::kInt32: 325 DCHECK_EQ(4u, instruction->GetVectorLength()); 326 __ Abs(dst.V4S(), src.V4S()); 327 break; 328 case DataType::Type::kInt64: 329 DCHECK_EQ(2u, instruction->GetVectorLength()); 330 __ Abs(dst.V2D(), src.V2D()); 331 break; 332 case DataType::Type::kFloat32: 333 DCHECK_EQ(4u, instruction->GetVectorLength()); 334 __ Fabs(dst.V4S(), src.V4S()); 335 break; 336 case DataType::Type::kFloat64: 337 DCHECK_EQ(2u, instruction->GetVectorLength()); 338 __ Fabs(dst.V2D(), src.V2D()); 339 break; 340 default: 341 LOG(FATAL) << "Unsupported SIMD type"; 342 UNREACHABLE(); 343 } 344 } 345 346 void LocationsBuilderARM64::VisitVecNot(HVecNot* instruction) { 347 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); 348 } 349 350 void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) { 351 LocationSummary* locations = instruction->GetLocations(); 352 VRegister src = VRegisterFrom(locations->InAt(0)); 353 VRegister dst = VRegisterFrom(locations->Out()); 354 switch (instruction->GetPackedType()) { 355 case DataType::Type::kBool: // special case boolean-not 356 DCHECK_EQ(16u, instruction->GetVectorLength()); 357 __ Movi(dst.V16B(), 1); 358 __ Eor(dst.V16B(), dst.V16B(), src.V16B()); 359 break; 360 case DataType::Type::kUint8: 361 case DataType::Type::kInt8: 362 case DataType::Type::kUint16: 363 case DataType::Type::kInt16: 364 case DataType::Type::kInt32: 365 case DataType::Type::kInt64: 366 __ Not(dst.V16B(), src.V16B()); // lanes do not matter 367 break; 368 default: 369 LOG(FATAL) << "Unsupported SIMD type"; 370 UNREACHABLE(); 371 } 372 } 373 374 // Helper to set up locations for vector binary operations. 375 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { 376 LocationSummary* locations = new (allocator) LocationSummary(instruction); 377 switch (instruction->GetPackedType()) { 378 case DataType::Type::kBool: 379 case DataType::Type::kUint8: 380 case DataType::Type::kInt8: 381 case DataType::Type::kUint16: 382 case DataType::Type::kInt16: 383 case DataType::Type::kInt32: 384 case DataType::Type::kInt64: 385 case DataType::Type::kFloat32: 386 case DataType::Type::kFloat64: 387 locations->SetInAt(0, Location::RequiresFpuRegister()); 388 locations->SetInAt(1, Location::RequiresFpuRegister()); 389 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 390 break; 391 default: 392 LOG(FATAL) << "Unsupported SIMD type"; 393 UNREACHABLE(); 394 } 395 } 396 397 void LocationsBuilderARM64::VisitVecAdd(HVecAdd* instruction) { 398 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 399 } 400 401 void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) { 402 LocationSummary* locations = instruction->GetLocations(); 403 VRegister lhs = VRegisterFrom(locations->InAt(0)); 404 VRegister rhs = VRegisterFrom(locations->InAt(1)); 405 VRegister dst = VRegisterFrom(locations->Out()); 406 switch (instruction->GetPackedType()) { 407 case DataType::Type::kUint8: 408 case DataType::Type::kInt8: 409 DCHECK_EQ(16u, instruction->GetVectorLength()); 410 __ Add(dst.V16B(), lhs.V16B(), rhs.V16B()); 411 break; 412 case DataType::Type::kUint16: 413 case DataType::Type::kInt16: 414 DCHECK_EQ(8u, instruction->GetVectorLength()); 415 __ Add(dst.V8H(), lhs.V8H(), rhs.V8H()); 416 break; 417 case DataType::Type::kInt32: 418 DCHECK_EQ(4u, instruction->GetVectorLength()); 419 __ Add(dst.V4S(), lhs.V4S(), rhs.V4S()); 420 break; 421 case DataType::Type::kInt64: 422 DCHECK_EQ(2u, instruction->GetVectorLength()); 423 __ Add(dst.V2D(), lhs.V2D(), rhs.V2D()); 424 break; 425 case DataType::Type::kFloat32: 426 DCHECK_EQ(4u, instruction->GetVectorLength()); 427 __ Fadd(dst.V4S(), lhs.V4S(), rhs.V4S()); 428 break; 429 case DataType::Type::kFloat64: 430 DCHECK_EQ(2u, instruction->GetVectorLength()); 431 __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D()); 432 break; 433 default: 434 LOG(FATAL) << "Unsupported SIMD type"; 435 UNREACHABLE(); 436 } 437 } 438 439 void LocationsBuilderARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { 440 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 441 } 442 443 void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { 444 LocationSummary* locations = instruction->GetLocations(); 445 VRegister lhs = VRegisterFrom(locations->InAt(0)); 446 VRegister rhs = VRegisterFrom(locations->InAt(1)); 447 VRegister dst = VRegisterFrom(locations->Out()); 448 switch (instruction->GetPackedType()) { 449 case DataType::Type::kUint8: 450 DCHECK_EQ(16u, instruction->GetVectorLength()); 451 instruction->IsRounded() 452 ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B()) 453 : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B()); 454 break; 455 case DataType::Type::kInt8: 456 DCHECK_EQ(16u, instruction->GetVectorLength()); 457 instruction->IsRounded() 458 ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B()) 459 : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B()); 460 break; 461 case DataType::Type::kUint16: 462 DCHECK_EQ(8u, instruction->GetVectorLength()); 463 instruction->IsRounded() 464 ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H()) 465 : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H()); 466 break; 467 case DataType::Type::kInt16: 468 DCHECK_EQ(8u, instruction->GetVectorLength()); 469 instruction->IsRounded() 470 ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H()) 471 : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H()); 472 break; 473 default: 474 LOG(FATAL) << "Unsupported SIMD type"; 475 UNREACHABLE(); 476 } 477 } 478 479 void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) { 480 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 481 } 482 483 void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) { 484 LocationSummary* locations = instruction->GetLocations(); 485 VRegister lhs = VRegisterFrom(locations->InAt(0)); 486 VRegister rhs = VRegisterFrom(locations->InAt(1)); 487 VRegister dst = VRegisterFrom(locations->Out()); 488 switch (instruction->GetPackedType()) { 489 case DataType::Type::kUint8: 490 case DataType::Type::kInt8: 491 DCHECK_EQ(16u, instruction->GetVectorLength()); 492 __ Sub(dst.V16B(), lhs.V16B(), rhs.V16B()); 493 break; 494 case DataType::Type::kUint16: 495 case DataType::Type::kInt16: 496 DCHECK_EQ(8u, instruction->GetVectorLength()); 497 __ Sub(dst.V8H(), lhs.V8H(), rhs.V8H()); 498 break; 499 case DataType::Type::kInt32: 500 DCHECK_EQ(4u, instruction->GetVectorLength()); 501 __ Sub(dst.V4S(), lhs.V4S(), rhs.V4S()); 502 break; 503 case DataType::Type::kInt64: 504 DCHECK_EQ(2u, instruction->GetVectorLength()); 505 __ Sub(dst.V2D(), lhs.V2D(), rhs.V2D()); 506 break; 507 case DataType::Type::kFloat32: 508 DCHECK_EQ(4u, instruction->GetVectorLength()); 509 __ Fsub(dst.V4S(), lhs.V4S(), rhs.V4S()); 510 break; 511 case DataType::Type::kFloat64: 512 DCHECK_EQ(2u, instruction->GetVectorLength()); 513 __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D()); 514 break; 515 default: 516 LOG(FATAL) << "Unsupported SIMD type"; 517 UNREACHABLE(); 518 } 519 } 520 521 void LocationsBuilderARM64::VisitVecMul(HVecMul* instruction) { 522 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 523 } 524 525 void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) { 526 LocationSummary* locations = instruction->GetLocations(); 527 VRegister lhs = VRegisterFrom(locations->InAt(0)); 528 VRegister rhs = VRegisterFrom(locations->InAt(1)); 529 VRegister dst = VRegisterFrom(locations->Out()); 530 switch (instruction->GetPackedType()) { 531 case DataType::Type::kUint8: 532 case DataType::Type::kInt8: 533 DCHECK_EQ(16u, instruction->GetVectorLength()); 534 __ Mul(dst.V16B(), lhs.V16B(), rhs.V16B()); 535 break; 536 case DataType::Type::kUint16: 537 case DataType::Type::kInt16: 538 DCHECK_EQ(8u, instruction->GetVectorLength()); 539 __ Mul(dst.V8H(), lhs.V8H(), rhs.V8H()); 540 break; 541 case DataType::Type::kInt32: 542 DCHECK_EQ(4u, instruction->GetVectorLength()); 543 __ Mul(dst.V4S(), lhs.V4S(), rhs.V4S()); 544 break; 545 case DataType::Type::kFloat32: 546 DCHECK_EQ(4u, instruction->GetVectorLength()); 547 __ Fmul(dst.V4S(), lhs.V4S(), rhs.V4S()); 548 break; 549 case DataType::Type::kFloat64: 550 DCHECK_EQ(2u, instruction->GetVectorLength()); 551 __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D()); 552 break; 553 default: 554 LOG(FATAL) << "Unsupported SIMD type"; 555 UNREACHABLE(); 556 } 557 } 558 559 void LocationsBuilderARM64::VisitVecDiv(HVecDiv* instruction) { 560 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 561 } 562 563 void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) { 564 LocationSummary* locations = instruction->GetLocations(); 565 VRegister lhs = VRegisterFrom(locations->InAt(0)); 566 VRegister rhs = VRegisterFrom(locations->InAt(1)); 567 VRegister dst = VRegisterFrom(locations->Out()); 568 switch (instruction->GetPackedType()) { 569 case DataType::Type::kFloat32: 570 DCHECK_EQ(4u, instruction->GetVectorLength()); 571 __ Fdiv(dst.V4S(), lhs.V4S(), rhs.V4S()); 572 break; 573 case DataType::Type::kFloat64: 574 DCHECK_EQ(2u, instruction->GetVectorLength()); 575 __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D()); 576 break; 577 default: 578 LOG(FATAL) << "Unsupported SIMD type"; 579 UNREACHABLE(); 580 } 581 } 582 583 void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) { 584 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 585 } 586 587 void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) { 588 LocationSummary* locations = instruction->GetLocations(); 589 VRegister lhs = VRegisterFrom(locations->InAt(0)); 590 VRegister rhs = VRegisterFrom(locations->InAt(1)); 591 VRegister dst = VRegisterFrom(locations->Out()); 592 switch (instruction->GetPackedType()) { 593 case DataType::Type::kUint8: 594 DCHECK_EQ(16u, instruction->GetVectorLength()); 595 __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B()); 596 break; 597 case DataType::Type::kInt8: 598 DCHECK_EQ(16u, instruction->GetVectorLength()); 599 __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B()); 600 break; 601 case DataType::Type::kUint16: 602 DCHECK_EQ(8u, instruction->GetVectorLength()); 603 __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H()); 604 break; 605 case DataType::Type::kInt16: 606 DCHECK_EQ(8u, instruction->GetVectorLength()); 607 __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H()); 608 break; 609 case DataType::Type::kUint32: 610 DCHECK_EQ(4u, instruction->GetVectorLength()); 611 __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S()); 612 break; 613 case DataType::Type::kInt32: 614 DCHECK_EQ(4u, instruction->GetVectorLength()); 615 __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S()); 616 break; 617 case DataType::Type::kFloat32: 618 DCHECK_EQ(4u, instruction->GetVectorLength()); 619 __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S()); 620 break; 621 case DataType::Type::kFloat64: 622 DCHECK_EQ(2u, instruction->GetVectorLength()); 623 __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D()); 624 break; 625 default: 626 LOG(FATAL) << "Unsupported SIMD type"; 627 UNREACHABLE(); 628 } 629 } 630 631 void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) { 632 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 633 } 634 635 void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) { 636 LocationSummary* locations = instruction->GetLocations(); 637 VRegister lhs = VRegisterFrom(locations->InAt(0)); 638 VRegister rhs = VRegisterFrom(locations->InAt(1)); 639 VRegister dst = VRegisterFrom(locations->Out()); 640 switch (instruction->GetPackedType()) { 641 case DataType::Type::kUint8: 642 DCHECK_EQ(16u, instruction->GetVectorLength()); 643 __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B()); 644 break; 645 case DataType::Type::kInt8: 646 DCHECK_EQ(16u, instruction->GetVectorLength()); 647 __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B()); 648 break; 649 case DataType::Type::kUint16: 650 DCHECK_EQ(8u, instruction->GetVectorLength()); 651 __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H()); 652 break; 653 case DataType::Type::kInt16: 654 DCHECK_EQ(8u, instruction->GetVectorLength()); 655 __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H()); 656 break; 657 case DataType::Type::kUint32: 658 DCHECK_EQ(4u, instruction->GetVectorLength()); 659 __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S()); 660 break; 661 case DataType::Type::kInt32: 662 DCHECK_EQ(4u, instruction->GetVectorLength()); 663 __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S()); 664 break; 665 case DataType::Type::kFloat32: 666 DCHECK_EQ(4u, instruction->GetVectorLength()); 667 __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S()); 668 break; 669 case DataType::Type::kFloat64: 670 DCHECK_EQ(2u, instruction->GetVectorLength()); 671 __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D()); 672 break; 673 default: 674 LOG(FATAL) << "Unsupported SIMD type"; 675 UNREACHABLE(); 676 } 677 } 678 679 void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) { 680 // TODO: Allow constants supported by BIC (vector, immediate). 681 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 682 } 683 684 void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) { 685 LocationSummary* locations = instruction->GetLocations(); 686 VRegister lhs = VRegisterFrom(locations->InAt(0)); 687 VRegister rhs = VRegisterFrom(locations->InAt(1)); 688 VRegister dst = VRegisterFrom(locations->Out()); 689 switch (instruction->GetPackedType()) { 690 case DataType::Type::kBool: 691 case DataType::Type::kUint8: 692 case DataType::Type::kInt8: 693 case DataType::Type::kUint16: 694 case DataType::Type::kInt16: 695 case DataType::Type::kInt32: 696 case DataType::Type::kInt64: 697 case DataType::Type::kFloat32: 698 case DataType::Type::kFloat64: 699 __ And(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter 700 break; 701 default: 702 LOG(FATAL) << "Unsupported SIMD type"; 703 UNREACHABLE(); 704 } 705 } 706 707 void LocationsBuilderARM64::VisitVecAndNot(HVecAndNot* instruction) { 708 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId(); 709 } 710 711 void InstructionCodeGeneratorARM64::VisitVecAndNot(HVecAndNot* instruction) { 712 // TODO: Use BIC (vector, register). 713 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId(); 714 } 715 716 void LocationsBuilderARM64::VisitVecOr(HVecOr* instruction) { 717 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 718 } 719 720 void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) { 721 LocationSummary* locations = instruction->GetLocations(); 722 VRegister lhs = VRegisterFrom(locations->InAt(0)); 723 VRegister rhs = VRegisterFrom(locations->InAt(1)); 724 VRegister dst = VRegisterFrom(locations->Out()); 725 switch (instruction->GetPackedType()) { 726 case DataType::Type::kBool: 727 case DataType::Type::kUint8: 728 case DataType::Type::kInt8: 729 case DataType::Type::kUint16: 730 case DataType::Type::kInt16: 731 case DataType::Type::kInt32: 732 case DataType::Type::kInt64: 733 case DataType::Type::kFloat32: 734 case DataType::Type::kFloat64: 735 __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter 736 break; 737 default: 738 LOG(FATAL) << "Unsupported SIMD type"; 739 UNREACHABLE(); 740 } 741 } 742 743 void LocationsBuilderARM64::VisitVecXor(HVecXor* instruction) { 744 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 745 } 746 747 void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) { 748 LocationSummary* locations = instruction->GetLocations(); 749 VRegister lhs = VRegisterFrom(locations->InAt(0)); 750 VRegister rhs = VRegisterFrom(locations->InAt(1)); 751 VRegister dst = VRegisterFrom(locations->Out()); 752 switch (instruction->GetPackedType()) { 753 case DataType::Type::kBool: 754 case DataType::Type::kUint8: 755 case DataType::Type::kInt8: 756 case DataType::Type::kUint16: 757 case DataType::Type::kInt16: 758 case DataType::Type::kInt32: 759 case DataType::Type::kInt64: 760 case DataType::Type::kFloat32: 761 case DataType::Type::kFloat64: 762 __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter 763 break; 764 default: 765 LOG(FATAL) << "Unsupported SIMD type"; 766 UNREACHABLE(); 767 } 768 } 769 770 // Helper to set up locations for vector shift operations. 771 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { 772 LocationSummary* locations = new (allocator) LocationSummary(instruction); 773 switch (instruction->GetPackedType()) { 774 case DataType::Type::kUint8: 775 case DataType::Type::kInt8: 776 case DataType::Type::kUint16: 777 case DataType::Type::kInt16: 778 case DataType::Type::kInt32: 779 case DataType::Type::kInt64: 780 locations->SetInAt(0, Location::RequiresFpuRegister()); 781 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); 782 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 783 break; 784 default: 785 LOG(FATAL) << "Unsupported SIMD type"; 786 UNREACHABLE(); 787 } 788 } 789 790 void LocationsBuilderARM64::VisitVecShl(HVecShl* instruction) { 791 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); 792 } 793 794 void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) { 795 LocationSummary* locations = instruction->GetLocations(); 796 VRegister lhs = VRegisterFrom(locations->InAt(0)); 797 VRegister dst = VRegisterFrom(locations->Out()); 798 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 799 switch (instruction->GetPackedType()) { 800 case DataType::Type::kUint8: 801 case DataType::Type::kInt8: 802 DCHECK_EQ(16u, instruction->GetVectorLength()); 803 __ Shl(dst.V16B(), lhs.V16B(), value); 804 break; 805 case DataType::Type::kUint16: 806 case DataType::Type::kInt16: 807 DCHECK_EQ(8u, instruction->GetVectorLength()); 808 __ Shl(dst.V8H(), lhs.V8H(), value); 809 break; 810 case DataType::Type::kInt32: 811 DCHECK_EQ(4u, instruction->GetVectorLength()); 812 __ Shl(dst.V4S(), lhs.V4S(), value); 813 break; 814 case DataType::Type::kInt64: 815 DCHECK_EQ(2u, instruction->GetVectorLength()); 816 __ Shl(dst.V2D(), lhs.V2D(), value); 817 break; 818 default: 819 LOG(FATAL) << "Unsupported SIMD type"; 820 UNREACHABLE(); 821 } 822 } 823 824 void LocationsBuilderARM64::VisitVecShr(HVecShr* instruction) { 825 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); 826 } 827 828 void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) { 829 LocationSummary* locations = instruction->GetLocations(); 830 VRegister lhs = VRegisterFrom(locations->InAt(0)); 831 VRegister dst = VRegisterFrom(locations->Out()); 832 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 833 switch (instruction->GetPackedType()) { 834 case DataType::Type::kUint8: 835 case DataType::Type::kInt8: 836 DCHECK_EQ(16u, instruction->GetVectorLength()); 837 __ Sshr(dst.V16B(), lhs.V16B(), value); 838 break; 839 case DataType::Type::kUint16: 840 case DataType::Type::kInt16: 841 DCHECK_EQ(8u, instruction->GetVectorLength()); 842 __ Sshr(dst.V8H(), lhs.V8H(), value); 843 break; 844 case DataType::Type::kInt32: 845 DCHECK_EQ(4u, instruction->GetVectorLength()); 846 __ Sshr(dst.V4S(), lhs.V4S(), value); 847 break; 848 case DataType::Type::kInt64: 849 DCHECK_EQ(2u, instruction->GetVectorLength()); 850 __ Sshr(dst.V2D(), lhs.V2D(), value); 851 break; 852 default: 853 LOG(FATAL) << "Unsupported SIMD type"; 854 UNREACHABLE(); 855 } 856 } 857 858 void LocationsBuilderARM64::VisitVecUShr(HVecUShr* instruction) { 859 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); 860 } 861 862 void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) { 863 LocationSummary* locations = instruction->GetLocations(); 864 VRegister lhs = VRegisterFrom(locations->InAt(0)); 865 VRegister dst = VRegisterFrom(locations->Out()); 866 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 867 switch (instruction->GetPackedType()) { 868 case DataType::Type::kUint8: 869 case DataType::Type::kInt8: 870 DCHECK_EQ(16u, instruction->GetVectorLength()); 871 __ Ushr(dst.V16B(), lhs.V16B(), value); 872 break; 873 case DataType::Type::kUint16: 874 case DataType::Type::kInt16: 875 DCHECK_EQ(8u, instruction->GetVectorLength()); 876 __ Ushr(dst.V8H(), lhs.V8H(), value); 877 break; 878 case DataType::Type::kInt32: 879 DCHECK_EQ(4u, instruction->GetVectorLength()); 880 __ Ushr(dst.V4S(), lhs.V4S(), value); 881 break; 882 case DataType::Type::kInt64: 883 DCHECK_EQ(2u, instruction->GetVectorLength()); 884 __ Ushr(dst.V2D(), lhs.V2D(), value); 885 break; 886 default: 887 LOG(FATAL) << "Unsupported SIMD type"; 888 UNREACHABLE(); 889 } 890 } 891 892 void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) { 893 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 894 895 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented 896 897 HInstruction* input = instruction->InputAt(0); 898 bool is_zero = IsZeroBitPattern(input); 899 900 switch (instruction->GetPackedType()) { 901 case DataType::Type::kBool: 902 case DataType::Type::kUint8: 903 case DataType::Type::kInt8: 904 case DataType::Type::kUint16: 905 case DataType::Type::kInt16: 906 case DataType::Type::kInt32: 907 case DataType::Type::kInt64: 908 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) 909 : Location::RequiresRegister()); 910 locations->SetOut(Location::RequiresFpuRegister()); 911 break; 912 case DataType::Type::kFloat32: 913 case DataType::Type::kFloat64: 914 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) 915 : Location::RequiresFpuRegister()); 916 locations->SetOut(Location::RequiresFpuRegister()); 917 break; 918 default: 919 LOG(FATAL) << "Unsupported SIMD type"; 920 UNREACHABLE(); 921 } 922 } 923 924 void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) { 925 LocationSummary* locations = instruction->GetLocations(); 926 VRegister dst = VRegisterFrom(locations->Out()); 927 928 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented 929 930 // Zero out all other elements first. 931 __ Movi(dst.V16B(), 0); 932 933 // Shorthand for any type of zero. 934 if (IsZeroBitPattern(instruction->InputAt(0))) { 935 return; 936 } 937 938 // Set required elements. 939 switch (instruction->GetPackedType()) { 940 case DataType::Type::kBool: 941 case DataType::Type::kUint8: 942 case DataType::Type::kInt8: 943 DCHECK_EQ(16u, instruction->GetVectorLength()); 944 __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0)); 945 break; 946 case DataType::Type::kUint16: 947 case DataType::Type::kInt16: 948 DCHECK_EQ(8u, instruction->GetVectorLength()); 949 __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0)); 950 break; 951 case DataType::Type::kInt32: 952 DCHECK_EQ(4u, instruction->GetVectorLength()); 953 __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0)); 954 break; 955 case DataType::Type::kInt64: 956 DCHECK_EQ(2u, instruction->GetVectorLength()); 957 __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0)); 958 break; 959 default: 960 LOG(FATAL) << "Unsupported SIMD type"; 961 UNREACHABLE(); 962 } 963 } 964 965 // Helper to set up locations for vector accumulations. 966 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) { 967 LocationSummary* locations = new (allocator) LocationSummary(instruction); 968 switch (instruction->GetPackedType()) { 969 case DataType::Type::kUint8: 970 case DataType::Type::kInt8: 971 case DataType::Type::kUint16: 972 case DataType::Type::kInt16: 973 case DataType::Type::kInt32: 974 case DataType::Type::kInt64: 975 locations->SetInAt(0, Location::RequiresFpuRegister()); 976 locations->SetInAt(1, Location::RequiresFpuRegister()); 977 locations->SetInAt(2, Location::RequiresFpuRegister()); 978 locations->SetOut(Location::SameAsFirstInput()); 979 break; 980 default: 981 LOG(FATAL) << "Unsupported SIMD type"; 982 UNREACHABLE(); 983 } 984 } 985 986 void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { 987 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); 988 } 989 990 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a 991 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result. 992 // However vector MultiplyAccumulate instruction is not affected. 993 void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { 994 LocationSummary* locations = instruction->GetLocations(); 995 VRegister acc = VRegisterFrom(locations->InAt(0)); 996 VRegister left = VRegisterFrom(locations->InAt(1)); 997 VRegister right = VRegisterFrom(locations->InAt(2)); 998 999 DCHECK(locations->InAt(0).Equals(locations->Out())); 1000 1001 switch (instruction->GetPackedType()) { 1002 case DataType::Type::kUint8: 1003 case DataType::Type::kInt8: 1004 DCHECK_EQ(16u, instruction->GetVectorLength()); 1005 if (instruction->GetOpKind() == HInstruction::kAdd) { 1006 __ Mla(acc.V16B(), left.V16B(), right.V16B()); 1007 } else { 1008 __ Mls(acc.V16B(), left.V16B(), right.V16B()); 1009 } 1010 break; 1011 case DataType::Type::kUint16: 1012 case DataType::Type::kInt16: 1013 DCHECK_EQ(8u, instruction->GetVectorLength()); 1014 if (instruction->GetOpKind() == HInstruction::kAdd) { 1015 __ Mla(acc.V8H(), left.V8H(), right.V8H()); 1016 } else { 1017 __ Mls(acc.V8H(), left.V8H(), right.V8H()); 1018 } 1019 break; 1020 case DataType::Type::kInt32: 1021 DCHECK_EQ(4u, instruction->GetVectorLength()); 1022 if (instruction->GetOpKind() == HInstruction::kAdd) { 1023 __ Mla(acc.V4S(), left.V4S(), right.V4S()); 1024 } else { 1025 __ Mls(acc.V4S(), left.V4S(), right.V4S()); 1026 } 1027 break; 1028 default: 1029 LOG(FATAL) << "Unsupported SIMD type"; 1030 UNREACHABLE(); 1031 } 1032 } 1033 1034 void LocationsBuilderARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { 1035 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); 1036 // Some conversions require temporary registers. 1037 LocationSummary* locations = instruction->GetLocations(); 1038 HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); 1039 HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); 1040 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), 1041 HVecOperation::ToSignedType(b->GetPackedType())); 1042 switch (a->GetPackedType()) { 1043 case DataType::Type::kUint8: 1044 case DataType::Type::kInt8: 1045 switch (instruction->GetPackedType()) { 1046 case DataType::Type::kInt64: 1047 locations->AddTemp(Location::RequiresFpuRegister()); 1048 locations->AddTemp(Location::RequiresFpuRegister()); 1049 FALLTHROUGH_INTENDED; 1050 case DataType::Type::kInt32: 1051 locations->AddTemp(Location::RequiresFpuRegister()); 1052 locations->AddTemp(Location::RequiresFpuRegister()); 1053 break; 1054 default: 1055 break; 1056 } 1057 break; 1058 case DataType::Type::kUint16: 1059 case DataType::Type::kInt16: 1060 if (instruction->GetPackedType() == DataType::Type::kInt64) { 1061 locations->AddTemp(Location::RequiresFpuRegister()); 1062 locations->AddTemp(Location::RequiresFpuRegister()); 1063 } 1064 break; 1065 case DataType::Type::kInt32: 1066 case DataType::Type::kInt64: 1067 if (instruction->GetPackedType() == a->GetPackedType()) { 1068 locations->AddTemp(Location::RequiresFpuRegister()); 1069 } 1070 break; 1071 default: 1072 break; 1073 } 1074 } 1075 1076 void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { 1077 LocationSummary* locations = instruction->GetLocations(); 1078 VRegister acc = VRegisterFrom(locations->InAt(0)); 1079 VRegister left = VRegisterFrom(locations->InAt(1)); 1080 VRegister right = VRegisterFrom(locations->InAt(2)); 1081 1082 DCHECK(locations->InAt(0).Equals(locations->Out())); 1083 1084 // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S). 1085 HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); 1086 HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); 1087 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), 1088 HVecOperation::ToSignedType(b->GetPackedType())); 1089 switch (a->GetPackedType()) { 1090 case DataType::Type::kUint8: 1091 case DataType::Type::kInt8: 1092 DCHECK_EQ(16u, a->GetVectorLength()); 1093 switch (instruction->GetPackedType()) { 1094 case DataType::Type::kInt16: 1095 DCHECK_EQ(8u, instruction->GetVectorLength()); 1096 __ Sabal(acc.V8H(), left.V8B(), right.V8B()); 1097 __ Sabal2(acc.V8H(), left.V16B(), right.V16B()); 1098 break; 1099 case DataType::Type::kInt32: { 1100 DCHECK_EQ(4u, instruction->GetVectorLength()); 1101 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0)); 1102 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1)); 1103 __ Sxtl(tmp1.V8H(), left.V8B()); 1104 __ Sxtl(tmp2.V8H(), right.V8B()); 1105 __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H()); 1106 __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H()); 1107 __ Sxtl2(tmp1.V8H(), left.V16B()); 1108 __ Sxtl2(tmp2.V8H(), right.V16B()); 1109 __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H()); 1110 __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H()); 1111 break; 1112 } 1113 case DataType::Type::kInt64: { 1114 DCHECK_EQ(2u, instruction->GetVectorLength()); 1115 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0)); 1116 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1)); 1117 VRegister tmp3 = VRegisterFrom(locations->GetTemp(2)); 1118 VRegister tmp4 = VRegisterFrom(locations->GetTemp(3)); 1119 __ Sxtl(tmp1.V8H(), left.V8B()); 1120 __ Sxtl(tmp2.V8H(), right.V8B()); 1121 __ Sxtl(tmp3.V4S(), tmp1.V4H()); 1122 __ Sxtl(tmp4.V4S(), tmp2.V4H()); 1123 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S()); 1124 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S()); 1125 __ Sxtl2(tmp3.V4S(), tmp1.V8H()); 1126 __ Sxtl2(tmp4.V4S(), tmp2.V8H()); 1127 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S()); 1128 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S()); 1129 __ Sxtl2(tmp1.V8H(), left.V16B()); 1130 __ Sxtl2(tmp2.V8H(), right.V16B()); 1131 __ Sxtl(tmp3.V4S(), tmp1.V4H()); 1132 __ Sxtl(tmp4.V4S(), tmp2.V4H()); 1133 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S()); 1134 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S()); 1135 __ Sxtl2(tmp3.V4S(), tmp1.V8H()); 1136 __ Sxtl2(tmp4.V4S(), tmp2.V8H()); 1137 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S()); 1138 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S()); 1139 break; 1140 } 1141 default: 1142 LOG(FATAL) << "Unsupported SIMD type"; 1143 UNREACHABLE(); 1144 } 1145 break; 1146 case DataType::Type::kUint16: 1147 case DataType::Type::kInt16: 1148 DCHECK_EQ(8u, a->GetVectorLength()); 1149 switch (instruction->GetPackedType()) { 1150 case DataType::Type::kInt32: 1151 DCHECK_EQ(4u, instruction->GetVectorLength()); 1152 __ Sabal(acc.V4S(), left.V4H(), right.V4H()); 1153 __ Sabal2(acc.V4S(), left.V8H(), right.V8H()); 1154 break; 1155 case DataType::Type::kInt64: { 1156 DCHECK_EQ(2u, instruction->GetVectorLength()); 1157 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0)); 1158 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1)); 1159 __ Sxtl(tmp1.V4S(), left.V4H()); 1160 __ Sxtl(tmp2.V4S(), right.V4H()); 1161 __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S()); 1162 __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S()); 1163 __ Sxtl2(tmp1.V4S(), left.V8H()); 1164 __ Sxtl2(tmp2.V4S(), right.V8H()); 1165 __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S()); 1166 __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S()); 1167 break; 1168 } 1169 default: 1170 LOG(FATAL) << "Unsupported SIMD type"; 1171 UNREACHABLE(); 1172 } 1173 break; 1174 case DataType::Type::kInt32: 1175 DCHECK_EQ(4u, a->GetVectorLength()); 1176 switch (instruction->GetPackedType()) { 1177 case DataType::Type::kInt32: { 1178 DCHECK_EQ(4u, instruction->GetVectorLength()); 1179 VRegister tmp = VRegisterFrom(locations->GetTemp(0)); 1180 __ Sub(tmp.V4S(), left.V4S(), right.V4S()); 1181 __ Abs(tmp.V4S(), tmp.V4S()); 1182 __ Add(acc.V4S(), acc.V4S(), tmp.V4S()); 1183 break; 1184 } 1185 case DataType::Type::kInt64: 1186 DCHECK_EQ(2u, instruction->GetVectorLength()); 1187 __ Sabal(acc.V2D(), left.V2S(), right.V2S()); 1188 __ Sabal2(acc.V2D(), left.V4S(), right.V4S()); 1189 break; 1190 default: 1191 LOG(FATAL) << "Unsupported SIMD type"; 1192 UNREACHABLE(); 1193 } 1194 break; 1195 case DataType::Type::kInt64: 1196 DCHECK_EQ(2u, a->GetVectorLength()); 1197 switch (instruction->GetPackedType()) { 1198 case DataType::Type::kInt64: { 1199 DCHECK_EQ(2u, instruction->GetVectorLength()); 1200 VRegister tmp = VRegisterFrom(locations->GetTemp(0)); 1201 __ Sub(tmp.V2D(), left.V2D(), right.V2D()); 1202 __ Abs(tmp.V2D(), tmp.V2D()); 1203 __ Add(acc.V2D(), acc.V2D(), tmp.V2D()); 1204 break; 1205 } 1206 default: 1207 LOG(FATAL) << "Unsupported SIMD type"; 1208 UNREACHABLE(); 1209 } 1210 break; 1211 default: 1212 LOG(FATAL) << "Unsupported SIMD type"; 1213 } 1214 } 1215 1216 // Helper to set up locations for vector memory operations. 1217 static void CreateVecMemLocations(ArenaAllocator* allocator, 1218 HVecMemoryOperation* instruction, 1219 bool is_load) { 1220 LocationSummary* locations = new (allocator) LocationSummary(instruction); 1221 switch (instruction->GetPackedType()) { 1222 case DataType::Type::kBool: 1223 case DataType::Type::kUint8: 1224 case DataType::Type::kInt8: 1225 case DataType::Type::kUint16: 1226 case DataType::Type::kInt16: 1227 case DataType::Type::kInt32: 1228 case DataType::Type::kInt64: 1229 case DataType::Type::kFloat32: 1230 case DataType::Type::kFloat64: 1231 locations->SetInAt(0, Location::RequiresRegister()); 1232 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 1233 if (is_load) { 1234 locations->SetOut(Location::RequiresFpuRegister()); 1235 } else { 1236 locations->SetInAt(2, Location::RequiresFpuRegister()); 1237 } 1238 break; 1239 default: 1240 LOG(FATAL) << "Unsupported SIMD type"; 1241 UNREACHABLE(); 1242 } 1243 } 1244 1245 // Helper to set up locations for vector memory operations. Returns the memory operand and, 1246 // if used, sets the output parameter scratch to a temporary register used in this operand, 1247 // so that the client can release it right after the memory operand use. 1248 MemOperand InstructionCodeGeneratorARM64::VecAddress( 1249 HVecMemoryOperation* instruction, 1250 UseScratchRegisterScope* temps_scope, 1251 size_t size, 1252 bool is_string_char_at, 1253 /*out*/ Register* scratch) { 1254 LocationSummary* locations = instruction->GetLocations(); 1255 Register base = InputRegisterAt(instruction, 0); 1256 1257 if (instruction->InputAt(1)->IsIntermediateAddressIndex()) { 1258 DCHECK(!is_string_char_at); 1259 return MemOperand(base.X(), InputRegisterAt(instruction, 1).X()); 1260 } 1261 1262 Location index = locations->InAt(1); 1263 uint32_t offset = is_string_char_at 1264 ? mirror::String::ValueOffset().Uint32Value() 1265 : mirror::Array::DataOffset(size).Uint32Value(); 1266 size_t shift = ComponentSizeShiftWidth(size); 1267 1268 // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet. 1269 DCHECK(!instruction->InputAt(0)->IsIntermediateAddress()); 1270 1271 if (index.IsConstant()) { 1272 offset += Int64ConstantFrom(index) << shift; 1273 return HeapOperand(base, offset); 1274 } else { 1275 *scratch = temps_scope->AcquireSameSizeAs(base); 1276 __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift)); 1277 return HeapOperand(*scratch, offset); 1278 } 1279 } 1280 1281 void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) { 1282 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true); 1283 } 1284 1285 void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) { 1286 LocationSummary* locations = instruction->GetLocations(); 1287 size_t size = DataType::Size(instruction->GetPackedType()); 1288 VRegister reg = VRegisterFrom(locations->Out()); 1289 UseScratchRegisterScope temps(GetVIXLAssembler()); 1290 Register scratch; 1291 1292 switch (instruction->GetPackedType()) { 1293 case DataType::Type::kUint16: 1294 DCHECK_EQ(8u, instruction->GetVectorLength()); 1295 // Special handling of compressed/uncompressed string load. 1296 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 1297 vixl::aarch64::Label uncompressed_load, done; 1298 // Test compression bit. 1299 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1300 "Expecting 0=compressed, 1=uncompressed"); 1301 uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 1302 Register length = temps.AcquireW(); 1303 __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset)); 1304 __ Tbnz(length.W(), 0, &uncompressed_load); 1305 temps.Release(length); // no longer needed 1306 // Zero extend 8 compressed bytes into 8 chars. 1307 __ Ldr(DRegisterFrom(locations->Out()).V8B(), 1308 VecAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch)); 1309 __ Uxtl(reg.V8H(), reg.V8B()); 1310 __ B(&done); 1311 if (scratch.IsValid()) { 1312 temps.Release(scratch); // if used, no longer needed 1313 } 1314 // Load 8 direct uncompressed chars. 1315 __ Bind(&uncompressed_load); 1316 __ Ldr(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch)); 1317 __ Bind(&done); 1318 return; 1319 } 1320 FALLTHROUGH_INTENDED; 1321 case DataType::Type::kBool: 1322 case DataType::Type::kUint8: 1323 case DataType::Type::kInt8: 1324 case DataType::Type::kInt16: 1325 case DataType::Type::kInt32: 1326 case DataType::Type::kFloat32: 1327 case DataType::Type::kInt64: 1328 case DataType::Type::kFloat64: 1329 DCHECK_LE(2u, instruction->GetVectorLength()); 1330 DCHECK_LE(instruction->GetVectorLength(), 16u); 1331 __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch)); 1332 break; 1333 default: 1334 LOG(FATAL) << "Unsupported SIMD type"; 1335 UNREACHABLE(); 1336 } 1337 } 1338 1339 void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) { 1340 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false); 1341 } 1342 1343 void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) { 1344 LocationSummary* locations = instruction->GetLocations(); 1345 size_t size = DataType::Size(instruction->GetPackedType()); 1346 VRegister reg = VRegisterFrom(locations->InAt(2)); 1347 UseScratchRegisterScope temps(GetVIXLAssembler()); 1348 Register scratch; 1349 1350 switch (instruction->GetPackedType()) { 1351 case DataType::Type::kBool: 1352 case DataType::Type::kUint8: 1353 case DataType::Type::kInt8: 1354 case DataType::Type::kUint16: 1355 case DataType::Type::kInt16: 1356 case DataType::Type::kInt32: 1357 case DataType::Type::kFloat32: 1358 case DataType::Type::kInt64: 1359 case DataType::Type::kFloat64: 1360 DCHECK_LE(2u, instruction->GetVectorLength()); 1361 DCHECK_LE(instruction->GetVectorLength(), 16u); 1362 __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch)); 1363 break; 1364 default: 1365 LOG(FATAL) << "Unsupported SIMD type"; 1366 UNREACHABLE(); 1367 } 1368 } 1369 1370 #undef __ 1371 1372 } // namespace arm64 1373 } // namespace art 1374