1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "code_generator_x86_64.h" 18 #include "mirror/array-inl.h" 19 20 namespace art { 21 namespace x86_64 { 22 23 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 24 #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT 25 26 void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { 27 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); 28 switch (instruction->GetPackedType()) { 29 case Primitive::kPrimBoolean: 30 case Primitive::kPrimByte: 31 case Primitive::kPrimChar: 32 case Primitive::kPrimShort: 33 case Primitive::kPrimInt: 34 case Primitive::kPrimLong: 35 locations->SetInAt(0, Location::RequiresRegister()); 36 locations->SetOut(Location::RequiresFpuRegister()); 37 break; 38 case Primitive::kPrimFloat: 39 case Primitive::kPrimDouble: 40 locations->SetInAt(0, Location::RequiresFpuRegister()); 41 locations->SetOut(Location::SameAsFirstInput()); 42 break; 43 default: 44 LOG(FATAL) << "Unsupported SIMD type"; 45 UNREACHABLE(); 46 } 47 } 48 49 void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { 50 LocationSummary* locations = instruction->GetLocations(); 51 XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); 52 switch (instruction->GetPackedType()) { 53 case Primitive::kPrimBoolean: 54 case Primitive::kPrimByte: 55 DCHECK_EQ(16u, instruction->GetVectorLength()); 56 __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); 57 __ punpcklbw(reg, reg); 58 __ punpcklwd(reg, reg); 59 __ pshufd(reg, reg, Immediate(0)); 60 break; 61 case Primitive::kPrimChar: 62 case Primitive::kPrimShort: 63 DCHECK_EQ(8u, instruction->GetVectorLength()); 64 __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); 65 __ punpcklwd(reg, reg); 66 __ pshufd(reg, reg, Immediate(0)); 67 break; 68 case Primitive::kPrimInt: 69 DCHECK_EQ(4u, instruction->GetVectorLength()); 70 __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); 71 __ pshufd(reg, reg, Immediate(0)); 72 break; 73 case Primitive::kPrimLong: 74 DCHECK_EQ(2u, instruction->GetVectorLength()); 75 __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit 76 __ punpcklqdq(reg, reg); 77 break; 78 case Primitive::kPrimFloat: 79 DCHECK(locations->InAt(0).Equals(locations->Out())); 80 DCHECK_EQ(4u, instruction->GetVectorLength()); 81 __ shufps(reg, reg, Immediate(0)); 82 break; 83 case Primitive::kPrimDouble: 84 DCHECK(locations->InAt(0).Equals(locations->Out())); 85 DCHECK_EQ(2u, instruction->GetVectorLength()); 86 __ shufpd(reg, reg, Immediate(0)); 87 break; 88 default: 89 LOG(FATAL) << "Unsupported SIMD type"; 90 UNREACHABLE(); 91 } 92 } 93 94 void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { 95 LOG(FATAL) << "No SIMD for " << instruction->GetId(); 96 } 97 98 void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { 99 LOG(FATAL) << "No SIMD for " << instruction->GetId(); 100 } 101 102 void LocationsBuilderX86_64::VisitVecSumReduce(HVecSumReduce* instruction) { 103 LOG(FATAL) << "No SIMD for " << instruction->GetId(); 104 } 105 106 void InstructionCodeGeneratorX86_64::VisitVecSumReduce(HVecSumReduce* instruction) { 107 LOG(FATAL) << "No SIMD for " << instruction->GetId(); 108 } 109 110 // Helper to set up locations for vector unary operations. 111 static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { 112 LocationSummary* locations = new (arena) LocationSummary(instruction); 113 switch (instruction->GetPackedType()) { 114 case Primitive::kPrimBoolean: 115 case Primitive::kPrimByte: 116 case Primitive::kPrimChar: 117 case Primitive::kPrimShort: 118 case Primitive::kPrimInt: 119 case Primitive::kPrimLong: 120 case Primitive::kPrimFloat: 121 case Primitive::kPrimDouble: 122 locations->SetInAt(0, Location::RequiresFpuRegister()); 123 locations->SetOut(Location::RequiresFpuRegister()); 124 break; 125 default: 126 LOG(FATAL) << "Unsupported SIMD type"; 127 UNREACHABLE(); 128 } 129 } 130 131 void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) { 132 CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); 133 } 134 135 void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) { 136 LocationSummary* locations = instruction->GetLocations(); 137 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); 138 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 139 Primitive::Type from = instruction->GetInputType(); 140 Primitive::Type to = instruction->GetResultType(); 141 if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) { 142 DCHECK_EQ(4u, instruction->GetVectorLength()); 143 __ cvtdq2ps(dst, src); 144 } else { 145 LOG(FATAL) << "Unsupported SIMD type"; 146 } 147 } 148 149 void LocationsBuilderX86_64::VisitVecNeg(HVecNeg* instruction) { 150 CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); 151 } 152 153 void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) { 154 LocationSummary* locations = instruction->GetLocations(); 155 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); 156 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 157 switch (instruction->GetPackedType()) { 158 case Primitive::kPrimByte: 159 DCHECK_EQ(16u, instruction->GetVectorLength()); 160 __ pxor(dst, dst); 161 __ psubb(dst, src); 162 break; 163 case Primitive::kPrimChar: 164 case Primitive::kPrimShort: 165 DCHECK_EQ(8u, instruction->GetVectorLength()); 166 __ pxor(dst, dst); 167 __ psubw(dst, src); 168 break; 169 case Primitive::kPrimInt: 170 DCHECK_EQ(4u, instruction->GetVectorLength()); 171 __ pxor(dst, dst); 172 __ psubd(dst, src); 173 break; 174 case Primitive::kPrimLong: 175 DCHECK_EQ(2u, instruction->GetVectorLength()); 176 __ pxor(dst, dst); 177 __ psubq(dst, src); 178 break; 179 case Primitive::kPrimFloat: 180 DCHECK_EQ(4u, instruction->GetVectorLength()); 181 __ xorps(dst, dst); 182 __ subps(dst, src); 183 break; 184 case Primitive::kPrimDouble: 185 DCHECK_EQ(2u, instruction->GetVectorLength()); 186 __ xorpd(dst, dst); 187 __ subpd(dst, src); 188 break; 189 default: 190 LOG(FATAL) << "Unsupported SIMD type"; 191 UNREACHABLE(); 192 } 193 } 194 195 void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) { 196 CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); 197 // Integral-abs requires a temporary for the comparison. 198 if (instruction->GetPackedType() == Primitive::kPrimInt) { 199 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 200 } 201 } 202 203 void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) { 204 LocationSummary* locations = instruction->GetLocations(); 205 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); 206 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 207 switch (instruction->GetPackedType()) { 208 case Primitive::kPrimInt: { 209 DCHECK_EQ(4u, instruction->GetVectorLength()); 210 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 211 __ movaps(dst, src); 212 __ pxor(tmp, tmp); 213 __ pcmpgtd(tmp, dst); 214 __ pxor(dst, tmp); 215 __ psubd(dst, tmp); 216 break; 217 } 218 case Primitive::kPrimFloat: 219 DCHECK_EQ(4u, instruction->GetVectorLength()); 220 __ pcmpeqb(dst, dst); // all ones 221 __ psrld(dst, Immediate(1)); 222 __ andps(dst, src); 223 break; 224 case Primitive::kPrimDouble: 225 DCHECK_EQ(2u, instruction->GetVectorLength()); 226 __ pcmpeqb(dst, dst); // all ones 227 __ psrlq(dst, Immediate(1)); 228 __ andpd(dst, src); 229 break; 230 default: 231 LOG(FATAL) << "Unsupported SIMD type"; 232 UNREACHABLE(); 233 } 234 } 235 236 void LocationsBuilderX86_64::VisitVecNot(HVecNot* instruction) { 237 CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); 238 // Boolean-not requires a temporary to construct the 16 x one. 239 if (instruction->GetPackedType() == Primitive::kPrimBoolean) { 240 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 241 } 242 } 243 244 void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) { 245 LocationSummary* locations = instruction->GetLocations(); 246 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); 247 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 248 switch (instruction->GetPackedType()) { 249 case Primitive::kPrimBoolean: { // special case boolean-not 250 DCHECK_EQ(16u, instruction->GetVectorLength()); 251 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 252 __ pxor(dst, dst); 253 __ pcmpeqb(tmp, tmp); // all ones 254 __ psubb(dst, tmp); // 16 x one 255 __ pxor(dst, src); 256 break; 257 } 258 case Primitive::kPrimByte: 259 case Primitive::kPrimChar: 260 case Primitive::kPrimShort: 261 case Primitive::kPrimInt: 262 case Primitive::kPrimLong: 263 DCHECK_LE(2u, instruction->GetVectorLength()); 264 DCHECK_LE(instruction->GetVectorLength(), 16u); 265 __ pcmpeqb(dst, dst); // all ones 266 __ pxor(dst, src); 267 break; 268 case Primitive::kPrimFloat: 269 DCHECK_EQ(4u, instruction->GetVectorLength()); 270 __ pcmpeqb(dst, dst); // all ones 271 __ xorps(dst, src); 272 break; 273 case Primitive::kPrimDouble: 274 DCHECK_EQ(2u, instruction->GetVectorLength()); 275 __ pcmpeqb(dst, dst); // all ones 276 __ xorpd(dst, src); 277 break; 278 default: 279 LOG(FATAL) << "Unsupported SIMD type"; 280 UNREACHABLE(); 281 } 282 } 283 284 // Helper to set up locations for vector binary operations. 285 static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { 286 LocationSummary* locations = new (arena) LocationSummary(instruction); 287 switch (instruction->GetPackedType()) { 288 case Primitive::kPrimBoolean: 289 case Primitive::kPrimByte: 290 case Primitive::kPrimChar: 291 case Primitive::kPrimShort: 292 case Primitive::kPrimInt: 293 case Primitive::kPrimLong: 294 case Primitive::kPrimFloat: 295 case Primitive::kPrimDouble: 296 locations->SetInAt(0, Location::RequiresFpuRegister()); 297 locations->SetInAt(1, Location::RequiresFpuRegister()); 298 locations->SetOut(Location::SameAsFirstInput()); 299 break; 300 default: 301 LOG(FATAL) << "Unsupported SIMD type"; 302 UNREACHABLE(); 303 } 304 } 305 306 void LocationsBuilderX86_64::VisitVecAdd(HVecAdd* instruction) { 307 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 308 } 309 310 void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) { 311 LocationSummary* locations = instruction->GetLocations(); 312 DCHECK(locations->InAt(0).Equals(locations->Out())); 313 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 314 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 315 switch (instruction->GetPackedType()) { 316 case Primitive::kPrimByte: 317 DCHECK_EQ(16u, instruction->GetVectorLength()); 318 __ paddb(dst, src); 319 break; 320 case Primitive::kPrimChar: 321 case Primitive::kPrimShort: 322 DCHECK_EQ(8u, instruction->GetVectorLength()); 323 __ paddw(dst, src); 324 break; 325 case Primitive::kPrimInt: 326 DCHECK_EQ(4u, instruction->GetVectorLength()); 327 __ paddd(dst, src); 328 break; 329 case Primitive::kPrimLong: 330 DCHECK_EQ(2u, instruction->GetVectorLength()); 331 __ paddq(dst, src); 332 break; 333 case Primitive::kPrimFloat: 334 DCHECK_EQ(4u, instruction->GetVectorLength()); 335 __ addps(dst, src); 336 break; 337 case Primitive::kPrimDouble: 338 DCHECK_EQ(2u, instruction->GetVectorLength()); 339 __ addpd(dst, src); 340 break; 341 default: 342 LOG(FATAL) << "Unsupported SIMD type"; 343 UNREACHABLE(); 344 } 345 } 346 347 void LocationsBuilderX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { 348 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 349 } 350 351 void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { 352 LocationSummary* locations = instruction->GetLocations(); 353 DCHECK(locations->InAt(0).Equals(locations->Out())); 354 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 355 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 356 357 DCHECK(instruction->IsRounded()); 358 DCHECK(instruction->IsUnsigned()); 359 360 switch (instruction->GetPackedType()) { 361 case Primitive::kPrimByte: 362 DCHECK_EQ(16u, instruction->GetVectorLength()); 363 __ pavgb(dst, src); 364 return; 365 case Primitive::kPrimChar: 366 case Primitive::kPrimShort: 367 DCHECK_EQ(8u, instruction->GetVectorLength()); 368 __ pavgw(dst, src); 369 return; 370 default: 371 LOG(FATAL) << "Unsupported SIMD type"; 372 UNREACHABLE(); 373 } 374 } 375 376 void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) { 377 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 378 } 379 380 void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) { 381 LocationSummary* locations = instruction->GetLocations(); 382 DCHECK(locations->InAt(0).Equals(locations->Out())); 383 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 384 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 385 switch (instruction->GetPackedType()) { 386 case Primitive::kPrimByte: 387 DCHECK_EQ(16u, instruction->GetVectorLength()); 388 __ psubb(dst, src); 389 break; 390 case Primitive::kPrimChar: 391 case Primitive::kPrimShort: 392 DCHECK_EQ(8u, instruction->GetVectorLength()); 393 __ psubw(dst, src); 394 break; 395 case Primitive::kPrimInt: 396 DCHECK_EQ(4u, instruction->GetVectorLength()); 397 __ psubd(dst, src); 398 break; 399 case Primitive::kPrimLong: 400 DCHECK_EQ(2u, instruction->GetVectorLength()); 401 __ psubq(dst, src); 402 break; 403 case Primitive::kPrimFloat: 404 DCHECK_EQ(4u, instruction->GetVectorLength()); 405 __ subps(dst, src); 406 break; 407 case Primitive::kPrimDouble: 408 DCHECK_EQ(2u, instruction->GetVectorLength()); 409 __ subpd(dst, src); 410 break; 411 default: 412 LOG(FATAL) << "Unsupported SIMD type"; 413 UNREACHABLE(); 414 } 415 } 416 417 void LocationsBuilderX86_64::VisitVecMul(HVecMul* instruction) { 418 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 419 } 420 421 void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) { 422 LocationSummary* locations = instruction->GetLocations(); 423 DCHECK(locations->InAt(0).Equals(locations->Out())); 424 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 425 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 426 switch (instruction->GetPackedType()) { 427 case Primitive::kPrimChar: 428 case Primitive::kPrimShort: 429 DCHECK_EQ(8u, instruction->GetVectorLength()); 430 __ pmullw(dst, src); 431 break; 432 case Primitive::kPrimInt: 433 DCHECK_EQ(4u, instruction->GetVectorLength()); 434 __ pmulld(dst, src); 435 break; 436 case Primitive::kPrimFloat: 437 DCHECK_EQ(4u, instruction->GetVectorLength()); 438 __ mulps(dst, src); 439 break; 440 case Primitive::kPrimDouble: 441 DCHECK_EQ(2u, instruction->GetVectorLength()); 442 __ mulpd(dst, src); 443 break; 444 default: 445 LOG(FATAL) << "Unsupported SIMD type"; 446 UNREACHABLE(); 447 } 448 } 449 450 void LocationsBuilderX86_64::VisitVecDiv(HVecDiv* instruction) { 451 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 452 } 453 454 void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) { 455 LocationSummary* locations = instruction->GetLocations(); 456 DCHECK(locations->InAt(0).Equals(locations->Out())); 457 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 458 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 459 switch (instruction->GetPackedType()) { 460 case Primitive::kPrimFloat: 461 DCHECK_EQ(4u, instruction->GetVectorLength()); 462 __ divps(dst, src); 463 break; 464 case Primitive::kPrimDouble: 465 DCHECK_EQ(2u, instruction->GetVectorLength()); 466 __ divpd(dst, src); 467 break; 468 default: 469 LOG(FATAL) << "Unsupported SIMD type"; 470 UNREACHABLE(); 471 } 472 } 473 474 void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) { 475 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 476 } 477 478 void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) { 479 LocationSummary* locations = instruction->GetLocations(); 480 DCHECK(locations->InAt(0).Equals(locations->Out())); 481 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 482 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 483 switch (instruction->GetPackedType()) { 484 case Primitive::kPrimByte: 485 DCHECK_EQ(16u, instruction->GetVectorLength()); 486 if (instruction->IsUnsigned()) { 487 __ pminub(dst, src); 488 } else { 489 __ pminsb(dst, src); 490 } 491 break; 492 case Primitive::kPrimChar: 493 case Primitive::kPrimShort: 494 DCHECK_EQ(8u, instruction->GetVectorLength()); 495 if (instruction->IsUnsigned()) { 496 __ pminuw(dst, src); 497 } else { 498 __ pminsw(dst, src); 499 } 500 break; 501 case Primitive::kPrimInt: 502 DCHECK_EQ(4u, instruction->GetVectorLength()); 503 if (instruction->IsUnsigned()) { 504 __ pminud(dst, src); 505 } else { 506 __ pminsd(dst, src); 507 } 508 break; 509 // Next cases are sloppy wrt 0.0 vs -0.0. 510 case Primitive::kPrimFloat: 511 DCHECK_EQ(4u, instruction->GetVectorLength()); 512 DCHECK(!instruction->IsUnsigned()); 513 __ minps(dst, src); 514 break; 515 case Primitive::kPrimDouble: 516 DCHECK_EQ(2u, instruction->GetVectorLength()); 517 DCHECK(!instruction->IsUnsigned()); 518 __ minpd(dst, src); 519 break; 520 default: 521 LOG(FATAL) << "Unsupported SIMD type"; 522 UNREACHABLE(); 523 } 524 } 525 526 void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) { 527 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 528 } 529 530 void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) { 531 LocationSummary* locations = instruction->GetLocations(); 532 DCHECK(locations->InAt(0).Equals(locations->Out())); 533 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 534 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 535 switch (instruction->GetPackedType()) { 536 case Primitive::kPrimByte: 537 DCHECK_EQ(16u, instruction->GetVectorLength()); 538 if (instruction->IsUnsigned()) { 539 __ pmaxub(dst, src); 540 } else { 541 __ pmaxsb(dst, src); 542 } 543 break; 544 case Primitive::kPrimChar: 545 case Primitive::kPrimShort: 546 DCHECK_EQ(8u, instruction->GetVectorLength()); 547 if (instruction->IsUnsigned()) { 548 __ pmaxuw(dst, src); 549 } else { 550 __ pmaxsw(dst, src); 551 } 552 break; 553 case Primitive::kPrimInt: 554 DCHECK_EQ(4u, instruction->GetVectorLength()); 555 if (instruction->IsUnsigned()) { 556 __ pmaxud(dst, src); 557 } else { 558 __ pmaxsd(dst, src); 559 } 560 break; 561 // Next cases are sloppy wrt 0.0 vs -0.0. 562 case Primitive::kPrimFloat: 563 DCHECK_EQ(4u, instruction->GetVectorLength()); 564 DCHECK(!instruction->IsUnsigned()); 565 __ maxps(dst, src); 566 break; 567 case Primitive::kPrimDouble: 568 DCHECK_EQ(2u, instruction->GetVectorLength()); 569 DCHECK(!instruction->IsUnsigned()); 570 __ maxpd(dst, src); 571 break; 572 default: 573 LOG(FATAL) << "Unsupported SIMD type"; 574 UNREACHABLE(); 575 } 576 } 577 578 void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) { 579 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 580 } 581 582 void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) { 583 LocationSummary* locations = instruction->GetLocations(); 584 DCHECK(locations->InAt(0).Equals(locations->Out())); 585 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 586 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 587 switch (instruction->GetPackedType()) { 588 case Primitive::kPrimBoolean: 589 case Primitive::kPrimByte: 590 case Primitive::kPrimChar: 591 case Primitive::kPrimShort: 592 case Primitive::kPrimInt: 593 case Primitive::kPrimLong: 594 DCHECK_LE(2u, instruction->GetVectorLength()); 595 DCHECK_LE(instruction->GetVectorLength(), 16u); 596 __ pand(dst, src); 597 break; 598 case Primitive::kPrimFloat: 599 DCHECK_EQ(4u, instruction->GetVectorLength()); 600 __ andps(dst, src); 601 break; 602 case Primitive::kPrimDouble: 603 DCHECK_EQ(2u, instruction->GetVectorLength()); 604 __ andpd(dst, src); 605 break; 606 default: 607 LOG(FATAL) << "Unsupported SIMD type"; 608 UNREACHABLE(); 609 } 610 } 611 612 void LocationsBuilderX86_64::VisitVecAndNot(HVecAndNot* instruction) { 613 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 614 } 615 616 void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) { 617 LocationSummary* locations = instruction->GetLocations(); 618 DCHECK(locations->InAt(0).Equals(locations->Out())); 619 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 620 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 621 switch (instruction->GetPackedType()) { 622 case Primitive::kPrimBoolean: 623 case Primitive::kPrimByte: 624 case Primitive::kPrimChar: 625 case Primitive::kPrimShort: 626 case Primitive::kPrimInt: 627 case Primitive::kPrimLong: 628 DCHECK_LE(2u, instruction->GetVectorLength()); 629 DCHECK_LE(instruction->GetVectorLength(), 16u); 630 __ pandn(dst, src); 631 break; 632 case Primitive::kPrimFloat: 633 DCHECK_EQ(4u, instruction->GetVectorLength()); 634 __ andnps(dst, src); 635 break; 636 case Primitive::kPrimDouble: 637 DCHECK_EQ(2u, instruction->GetVectorLength()); 638 __ andnpd(dst, src); 639 break; 640 default: 641 LOG(FATAL) << "Unsupported SIMD type"; 642 UNREACHABLE(); 643 } 644 } 645 646 void LocationsBuilderX86_64::VisitVecOr(HVecOr* instruction) { 647 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 648 } 649 650 void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) { 651 LocationSummary* locations = instruction->GetLocations(); 652 DCHECK(locations->InAt(0).Equals(locations->Out())); 653 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 654 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 655 switch (instruction->GetPackedType()) { 656 case Primitive::kPrimBoolean: 657 case Primitive::kPrimByte: 658 case Primitive::kPrimChar: 659 case Primitive::kPrimShort: 660 case Primitive::kPrimInt: 661 case Primitive::kPrimLong: 662 DCHECK_LE(2u, instruction->GetVectorLength()); 663 DCHECK_LE(instruction->GetVectorLength(), 16u); 664 __ por(dst, src); 665 break; 666 case Primitive::kPrimFloat: 667 DCHECK_EQ(4u, instruction->GetVectorLength()); 668 __ orps(dst, src); 669 break; 670 case Primitive::kPrimDouble: 671 DCHECK_EQ(2u, instruction->GetVectorLength()); 672 __ orpd(dst, src); 673 break; 674 default: 675 LOG(FATAL) << "Unsupported SIMD type"; 676 UNREACHABLE(); 677 } 678 } 679 680 void LocationsBuilderX86_64::VisitVecXor(HVecXor* instruction) { 681 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 682 } 683 684 void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) { 685 LocationSummary* locations = instruction->GetLocations(); 686 DCHECK(locations->InAt(0).Equals(locations->Out())); 687 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 688 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 689 switch (instruction->GetPackedType()) { 690 case Primitive::kPrimBoolean: 691 case Primitive::kPrimByte: 692 case Primitive::kPrimChar: 693 case Primitive::kPrimShort: 694 case Primitive::kPrimInt: 695 case Primitive::kPrimLong: 696 DCHECK_LE(2u, instruction->GetVectorLength()); 697 DCHECK_LE(instruction->GetVectorLength(), 16u); 698 __ pxor(dst, src); 699 break; 700 case Primitive::kPrimFloat: 701 DCHECK_EQ(4u, instruction->GetVectorLength()); 702 __ xorps(dst, src); 703 break; 704 case Primitive::kPrimDouble: 705 DCHECK_EQ(2u, instruction->GetVectorLength()); 706 __ xorpd(dst, src); 707 break; 708 default: 709 LOG(FATAL) << "Unsupported SIMD type"; 710 UNREACHABLE(); 711 } 712 } 713 714 // Helper to set up locations for vector shift operations. 715 static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { 716 LocationSummary* locations = new (arena) LocationSummary(instruction); 717 switch (instruction->GetPackedType()) { 718 case Primitive::kPrimChar: 719 case Primitive::kPrimShort: 720 case Primitive::kPrimInt: 721 case Primitive::kPrimLong: 722 locations->SetInAt(0, Location::RequiresFpuRegister()); 723 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); 724 locations->SetOut(Location::SameAsFirstInput()); 725 break; 726 default: 727 LOG(FATAL) << "Unsupported SIMD type"; 728 UNREACHABLE(); 729 } 730 } 731 732 void LocationsBuilderX86_64::VisitVecShl(HVecShl* instruction) { 733 CreateVecShiftLocations(GetGraph()->GetArena(), instruction); 734 } 735 736 void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) { 737 LocationSummary* locations = instruction->GetLocations(); 738 DCHECK(locations->InAt(0).Equals(locations->Out())); 739 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 740 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 741 switch (instruction->GetPackedType()) { 742 case Primitive::kPrimChar: 743 case Primitive::kPrimShort: 744 DCHECK_EQ(8u, instruction->GetVectorLength()); 745 __ psllw(dst, Immediate(static_cast<int8_t>(value))); 746 break; 747 case Primitive::kPrimInt: 748 DCHECK_EQ(4u, instruction->GetVectorLength()); 749 __ pslld(dst, Immediate(static_cast<int8_t>(value))); 750 break; 751 case Primitive::kPrimLong: 752 DCHECK_EQ(2u, instruction->GetVectorLength()); 753 __ psllq(dst, Immediate(static_cast<int8_t>(value))); 754 break; 755 default: 756 LOG(FATAL) << "Unsupported SIMD type"; 757 UNREACHABLE(); 758 } 759 } 760 761 void LocationsBuilderX86_64::VisitVecShr(HVecShr* instruction) { 762 CreateVecShiftLocations(GetGraph()->GetArena(), instruction); 763 } 764 765 void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) { 766 LocationSummary* locations = instruction->GetLocations(); 767 DCHECK(locations->InAt(0).Equals(locations->Out())); 768 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 769 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 770 switch (instruction->GetPackedType()) { 771 case Primitive::kPrimChar: 772 case Primitive::kPrimShort: 773 DCHECK_EQ(8u, instruction->GetVectorLength()); 774 __ psraw(dst, Immediate(static_cast<int8_t>(value))); 775 break; 776 case Primitive::kPrimInt: 777 DCHECK_EQ(4u, instruction->GetVectorLength()); 778 __ psrad(dst, Immediate(static_cast<int8_t>(value))); 779 break; 780 default: 781 LOG(FATAL) << "Unsupported SIMD type"; 782 UNREACHABLE(); 783 } 784 } 785 786 void LocationsBuilderX86_64::VisitVecUShr(HVecUShr* instruction) { 787 CreateVecShiftLocations(GetGraph()->GetArena(), instruction); 788 } 789 790 void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) { 791 LocationSummary* locations = instruction->GetLocations(); 792 DCHECK(locations->InAt(0).Equals(locations->Out())); 793 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 794 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 795 switch (instruction->GetPackedType()) { 796 case Primitive::kPrimChar: 797 case Primitive::kPrimShort: 798 DCHECK_EQ(8u, instruction->GetVectorLength()); 799 __ psrlw(dst, Immediate(static_cast<int8_t>(value))); 800 break; 801 case Primitive::kPrimInt: 802 DCHECK_EQ(4u, instruction->GetVectorLength()); 803 __ psrld(dst, Immediate(static_cast<int8_t>(value))); 804 break; 805 case Primitive::kPrimLong: 806 DCHECK_EQ(2u, instruction->GetVectorLength()); 807 __ psrlq(dst, Immediate(static_cast<int8_t>(value))); 808 break; 809 default: 810 LOG(FATAL) << "Unsupported SIMD type"; 811 UNREACHABLE(); 812 } 813 } 814 815 void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { 816 LOG(FATAL) << "No SIMD for " << instr->GetId(); 817 } 818 819 void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { 820 LOG(FATAL) << "No SIMD for " << instr->GetId(); 821 } 822 823 // Helper to set up locations for vector memory operations. 824 static void CreateVecMemLocations(ArenaAllocator* arena, 825 HVecMemoryOperation* instruction, 826 bool is_load) { 827 LocationSummary* locations = new (arena) LocationSummary(instruction); 828 switch (instruction->GetPackedType()) { 829 case Primitive::kPrimBoolean: 830 case Primitive::kPrimByte: 831 case Primitive::kPrimChar: 832 case Primitive::kPrimShort: 833 case Primitive::kPrimInt: 834 case Primitive::kPrimLong: 835 case Primitive::kPrimFloat: 836 case Primitive::kPrimDouble: 837 locations->SetInAt(0, Location::RequiresRegister()); 838 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 839 if (is_load) { 840 locations->SetOut(Location::RequiresFpuRegister()); 841 } else { 842 locations->SetInAt(2, Location::RequiresFpuRegister()); 843 } 844 break; 845 default: 846 LOG(FATAL) << "Unsupported SIMD type"; 847 UNREACHABLE(); 848 } 849 } 850 851 // Helper to construct address for vector memory operations. 852 static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) { 853 Location base = locations->InAt(0); 854 Location index = locations->InAt(1); 855 ScaleFactor scale = TIMES_1; 856 switch (size) { 857 case 2: scale = TIMES_2; break; 858 case 4: scale = TIMES_4; break; 859 case 8: scale = TIMES_8; break; 860 default: break; 861 } 862 uint32_t offset = is_string_char_at 863 ? mirror::String::ValueOffset().Uint32Value() 864 : mirror::Array::DataOffset(size).Uint32Value(); 865 return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset); 866 } 867 868 void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) { 869 CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true); 870 // String load requires a temporary for the compressed load. 871 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 872 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 873 } 874 } 875 876 void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) { 877 LocationSummary* locations = instruction->GetLocations(); 878 size_t size = Primitive::ComponentSize(instruction->GetPackedType()); 879 Address address = VecAddress(locations, size, instruction->IsStringCharAt()); 880 XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); 881 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); 882 switch (instruction->GetPackedType()) { 883 case Primitive::kPrimChar: 884 DCHECK_EQ(8u, instruction->GetVectorLength()); 885 // Special handling of compressed/uncompressed string load. 886 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 887 NearLabel done, not_compressed; 888 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 889 // Test compression bit. 890 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 891 "Expecting 0=compressed, 1=uncompressed"); 892 uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 893 __ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1)); 894 __ j(kNotZero, ¬_compressed); 895 // Zero extend 8 compressed bytes into 8 chars. 896 __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true)); 897 __ pxor(tmp, tmp); 898 __ punpcklbw(reg, tmp); 899 __ jmp(&done); 900 // Load 8 direct uncompressed chars. 901 __ Bind(¬_compressed); 902 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); 903 __ Bind(&done); 904 return; 905 } 906 FALLTHROUGH_INTENDED; 907 case Primitive::kPrimBoolean: 908 case Primitive::kPrimByte: 909 case Primitive::kPrimShort: 910 case Primitive::kPrimInt: 911 case Primitive::kPrimLong: 912 DCHECK_LE(2u, instruction->GetVectorLength()); 913 DCHECK_LE(instruction->GetVectorLength(), 16u); 914 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); 915 break; 916 case Primitive::kPrimFloat: 917 DCHECK_EQ(4u, instruction->GetVectorLength()); 918 is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address); 919 break; 920 case Primitive::kPrimDouble: 921 DCHECK_EQ(2u, instruction->GetVectorLength()); 922 is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address); 923 break; 924 default: 925 LOG(FATAL) << "Unsupported SIMD type"; 926 UNREACHABLE(); 927 } 928 } 929 930 void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) { 931 CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false); 932 } 933 934 void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) { 935 LocationSummary* locations = instruction->GetLocations(); 936 size_t size = Primitive::ComponentSize(instruction->GetPackedType()); 937 Address address = VecAddress(locations, size, /*is_string_char_at*/ false); 938 XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>(); 939 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); 940 switch (instruction->GetPackedType()) { 941 case Primitive::kPrimBoolean: 942 case Primitive::kPrimByte: 943 case Primitive::kPrimChar: 944 case Primitive::kPrimShort: 945 case Primitive::kPrimInt: 946 case Primitive::kPrimLong: 947 DCHECK_LE(2u, instruction->GetVectorLength()); 948 DCHECK_LE(instruction->GetVectorLength(), 16u); 949 is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg); 950 break; 951 case Primitive::kPrimFloat: 952 DCHECK_EQ(4u, instruction->GetVectorLength()); 953 is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg); 954 break; 955 case Primitive::kPrimDouble: 956 DCHECK_EQ(2u, instruction->GetVectorLength()); 957 is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg); 958 break; 959 default: 960 LOG(FATAL) << "Unsupported SIMD type"; 961 UNREACHABLE(); 962 } 963 } 964 965 #undef __ 966 967 } // namespace x86_64 968 } // namespace art 969