1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "code_generator_x86.h" 18 #include "mirror/array-inl.h" 19 20 namespace art { 21 namespace x86 { 22 23 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 24 #define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT 25 26 void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { 27 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); 28 switch (instruction->GetPackedType()) { 29 case Primitive::kPrimLong: 30 // Long needs extra temporary to load the register pair. 31 locations->AddTemp(Location::RequiresFpuRegister()); 32 FALLTHROUGH_INTENDED; 33 case Primitive::kPrimBoolean: 34 case Primitive::kPrimByte: 35 case Primitive::kPrimChar: 36 case Primitive::kPrimShort: 37 case Primitive::kPrimInt: 38 locations->SetInAt(0, Location::RequiresRegister()); 39 locations->SetOut(Location::RequiresFpuRegister()); 40 break; 41 case Primitive::kPrimFloat: 42 case Primitive::kPrimDouble: 43 locations->SetInAt(0, Location::RequiresFpuRegister()); 44 locations->SetOut(Location::SameAsFirstInput()); 45 break; 46 default: 47 LOG(FATAL) << "Unsupported SIMD type"; 48 UNREACHABLE(); 49 } 50 } 51 52 void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { 53 LocationSummary* locations = instruction->GetLocations(); 54 XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); 55 switch (instruction->GetPackedType()) { 56 case Primitive::kPrimBoolean: 57 case Primitive::kPrimByte: 58 DCHECK_EQ(16u, instruction->GetVectorLength()); 59 __ movd(reg, locations->InAt(0).AsRegister<Register>()); 60 __ punpcklbw(reg, reg); 61 __ punpcklwd(reg, reg); 62 __ pshufd(reg, reg, Immediate(0)); 63 break; 64 case Primitive::kPrimChar: 65 case Primitive::kPrimShort: 66 DCHECK_EQ(8u, instruction->GetVectorLength()); 67 __ movd(reg, locations->InAt(0).AsRegister<Register>()); 68 __ punpcklwd(reg, reg); 69 __ pshufd(reg, reg, Immediate(0)); 70 break; 71 case Primitive::kPrimInt: 72 DCHECK_EQ(4u, instruction->GetVectorLength()); 73 __ movd(reg, locations->InAt(0).AsRegister<Register>()); 74 __ pshufd(reg, reg, Immediate(0)); 75 break; 76 case Primitive::kPrimLong: { 77 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 78 DCHECK_EQ(2u, instruction->GetVectorLength()); 79 __ movd(reg, locations->InAt(0).AsRegisterPairLow<Register>()); 80 __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>()); 81 __ punpckldq(reg, tmp); 82 __ punpcklqdq(reg, reg); 83 break; 84 } 85 case Primitive::kPrimFloat: 86 DCHECK(locations->InAt(0).Equals(locations->Out())); 87 DCHECK_EQ(4u, instruction->GetVectorLength()); 88 __ shufps(reg, reg, Immediate(0)); 89 break; 90 case Primitive::kPrimDouble: 91 DCHECK(locations->InAt(0).Equals(locations->Out())); 92 DCHECK_EQ(2u, instruction->GetVectorLength()); 93 __ shufpd(reg, reg, Immediate(0)); 94 break; 95 default: 96 LOG(FATAL) << "Unsupported SIMD type"; 97 UNREACHABLE(); 98 } 99 } 100 101 void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) { 102 LOG(FATAL) << "No SIMD for " << instruction->GetId(); 103 } 104 105 void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) { 106 LOG(FATAL) << "No SIMD for " << instruction->GetId(); 107 } 108 109 void LocationsBuilderX86::VisitVecSumReduce(HVecSumReduce* instruction) { 110 LOG(FATAL) << "No SIMD for " << instruction->GetId(); 111 } 112 113 void InstructionCodeGeneratorX86::VisitVecSumReduce(HVecSumReduce* instruction) { 114 LOG(FATAL) << "No SIMD for " << instruction->GetId(); 115 } 116 117 // Helper to set up locations for vector unary operations. 118 static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { 119 LocationSummary* locations = new (arena) LocationSummary(instruction); 120 switch (instruction->GetPackedType()) { 121 case Primitive::kPrimBoolean: 122 case Primitive::kPrimByte: 123 case Primitive::kPrimChar: 124 case Primitive::kPrimShort: 125 case Primitive::kPrimInt: 126 case Primitive::kPrimLong: 127 case Primitive::kPrimFloat: 128 case Primitive::kPrimDouble: 129 locations->SetInAt(0, Location::RequiresFpuRegister()); 130 locations->SetOut(Location::RequiresFpuRegister()); 131 break; 132 default: 133 LOG(FATAL) << "Unsupported SIMD type"; 134 UNREACHABLE(); 135 } 136 } 137 138 void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) { 139 CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); 140 } 141 142 void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) { 143 LocationSummary* locations = instruction->GetLocations(); 144 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); 145 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 146 Primitive::Type from = instruction->GetInputType(); 147 Primitive::Type to = instruction->GetResultType(); 148 if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) { 149 DCHECK_EQ(4u, instruction->GetVectorLength()); 150 __ cvtdq2ps(dst, src); 151 } else { 152 LOG(FATAL) << "Unsupported SIMD type"; 153 } 154 } 155 156 void LocationsBuilderX86::VisitVecNeg(HVecNeg* instruction) { 157 CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); 158 } 159 160 void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) { 161 LocationSummary* locations = instruction->GetLocations(); 162 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); 163 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 164 switch (instruction->GetPackedType()) { 165 case Primitive::kPrimByte: 166 DCHECK_EQ(16u, instruction->GetVectorLength()); 167 __ pxor(dst, dst); 168 __ psubb(dst, src); 169 break; 170 case Primitive::kPrimChar: 171 case Primitive::kPrimShort: 172 DCHECK_EQ(8u, instruction->GetVectorLength()); 173 __ pxor(dst, dst); 174 __ psubw(dst, src); 175 break; 176 case Primitive::kPrimInt: 177 DCHECK_EQ(4u, instruction->GetVectorLength()); 178 __ pxor(dst, dst); 179 __ psubd(dst, src); 180 break; 181 case Primitive::kPrimLong: 182 DCHECK_EQ(2u, instruction->GetVectorLength()); 183 __ pxor(dst, dst); 184 __ psubq(dst, src); 185 break; 186 case Primitive::kPrimFloat: 187 DCHECK_EQ(4u, instruction->GetVectorLength()); 188 __ xorps(dst, dst); 189 __ subps(dst, src); 190 break; 191 case Primitive::kPrimDouble: 192 DCHECK_EQ(2u, instruction->GetVectorLength()); 193 __ xorpd(dst, dst); 194 __ subpd(dst, src); 195 break; 196 default: 197 LOG(FATAL) << "Unsupported SIMD type"; 198 UNREACHABLE(); 199 } 200 } 201 202 void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) { 203 CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); 204 // Integral-abs requires a temporary for the comparison. 205 if (instruction->GetPackedType() == Primitive::kPrimInt) { 206 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 207 } 208 } 209 210 void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) { 211 LocationSummary* locations = instruction->GetLocations(); 212 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); 213 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 214 switch (instruction->GetPackedType()) { 215 case Primitive::kPrimInt: { 216 DCHECK_EQ(4u, instruction->GetVectorLength()); 217 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 218 __ movaps(dst, src); 219 __ pxor(tmp, tmp); 220 __ pcmpgtd(tmp, dst); 221 __ pxor(dst, tmp); 222 __ psubd(dst, tmp); 223 break; 224 } 225 case Primitive::kPrimFloat: 226 DCHECK_EQ(4u, instruction->GetVectorLength()); 227 __ pcmpeqb(dst, dst); // all ones 228 __ psrld(dst, Immediate(1)); 229 __ andps(dst, src); 230 break; 231 case Primitive::kPrimDouble: 232 DCHECK_EQ(2u, instruction->GetVectorLength()); 233 __ pcmpeqb(dst, dst); // all ones 234 __ psrlq(dst, Immediate(1)); 235 __ andpd(dst, src); 236 break; 237 default: 238 LOG(FATAL) << "Unsupported SIMD type"; 239 UNREACHABLE(); 240 } 241 } 242 243 void LocationsBuilderX86::VisitVecNot(HVecNot* instruction) { 244 CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); 245 // Boolean-not requires a temporary to construct the 16 x one. 246 if (instruction->GetPackedType() == Primitive::kPrimBoolean) { 247 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 248 } 249 } 250 251 void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) { 252 LocationSummary* locations = instruction->GetLocations(); 253 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); 254 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 255 switch (instruction->GetPackedType()) { 256 case Primitive::kPrimBoolean: { // special case boolean-not 257 DCHECK_EQ(16u, instruction->GetVectorLength()); 258 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 259 __ pxor(dst, dst); 260 __ pcmpeqb(tmp, tmp); // all ones 261 __ psubb(dst, tmp); // 16 x one 262 __ pxor(dst, src); 263 break; 264 } 265 case Primitive::kPrimByte: 266 case Primitive::kPrimChar: 267 case Primitive::kPrimShort: 268 case Primitive::kPrimInt: 269 case Primitive::kPrimLong: 270 DCHECK_LE(2u, instruction->GetVectorLength()); 271 DCHECK_LE(instruction->GetVectorLength(), 16u); 272 __ pcmpeqb(dst, dst); // all ones 273 __ pxor(dst, src); 274 break; 275 case Primitive::kPrimFloat: 276 DCHECK_EQ(4u, instruction->GetVectorLength()); 277 __ pcmpeqb(dst, dst); // all ones 278 __ xorps(dst, src); 279 break; 280 case Primitive::kPrimDouble: 281 DCHECK_EQ(2u, instruction->GetVectorLength()); 282 __ pcmpeqb(dst, dst); // all ones 283 __ xorpd(dst, src); 284 break; 285 default: 286 LOG(FATAL) << "Unsupported SIMD type"; 287 UNREACHABLE(); 288 } 289 } 290 291 // Helper to set up locations for vector binary operations. 292 static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { 293 LocationSummary* locations = new (arena) LocationSummary(instruction); 294 switch (instruction->GetPackedType()) { 295 case Primitive::kPrimBoolean: 296 case Primitive::kPrimByte: 297 case Primitive::kPrimChar: 298 case Primitive::kPrimShort: 299 case Primitive::kPrimInt: 300 case Primitive::kPrimLong: 301 case Primitive::kPrimFloat: 302 case Primitive::kPrimDouble: 303 locations->SetInAt(0, Location::RequiresFpuRegister()); 304 locations->SetInAt(1, Location::RequiresFpuRegister()); 305 locations->SetOut(Location::SameAsFirstInput()); 306 break; 307 default: 308 LOG(FATAL) << "Unsupported SIMD type"; 309 UNREACHABLE(); 310 } 311 } 312 313 void LocationsBuilderX86::VisitVecAdd(HVecAdd* instruction) { 314 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 315 } 316 317 void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) { 318 LocationSummary* locations = instruction->GetLocations(); 319 DCHECK(locations->InAt(0).Equals(locations->Out())); 320 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 321 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 322 switch (instruction->GetPackedType()) { 323 case Primitive::kPrimByte: 324 DCHECK_EQ(16u, instruction->GetVectorLength()); 325 __ paddb(dst, src); 326 break; 327 case Primitive::kPrimChar: 328 case Primitive::kPrimShort: 329 DCHECK_EQ(8u, instruction->GetVectorLength()); 330 __ paddw(dst, src); 331 break; 332 case Primitive::kPrimInt: 333 DCHECK_EQ(4u, instruction->GetVectorLength()); 334 __ paddd(dst, src); 335 break; 336 case Primitive::kPrimLong: 337 DCHECK_EQ(2u, instruction->GetVectorLength()); 338 __ paddq(dst, src); 339 break; 340 case Primitive::kPrimFloat: 341 DCHECK_EQ(4u, instruction->GetVectorLength()); 342 __ addps(dst, src); 343 break; 344 case Primitive::kPrimDouble: 345 DCHECK_EQ(2u, instruction->GetVectorLength()); 346 __ addpd(dst, src); 347 break; 348 default: 349 LOG(FATAL) << "Unsupported SIMD type"; 350 UNREACHABLE(); 351 } 352 } 353 354 void LocationsBuilderX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { 355 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 356 } 357 358 void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { 359 LocationSummary* locations = instruction->GetLocations(); 360 DCHECK(locations->InAt(0).Equals(locations->Out())); 361 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 362 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 363 364 DCHECK(instruction->IsRounded()); 365 DCHECK(instruction->IsUnsigned()); 366 367 switch (instruction->GetPackedType()) { 368 case Primitive::kPrimByte: 369 DCHECK_EQ(16u, instruction->GetVectorLength()); 370 __ pavgb(dst, src); 371 return; 372 case Primitive::kPrimChar: 373 case Primitive::kPrimShort: 374 DCHECK_EQ(8u, instruction->GetVectorLength()); 375 __ pavgw(dst, src); 376 return; 377 default: 378 LOG(FATAL) << "Unsupported SIMD type"; 379 UNREACHABLE(); 380 } 381 } 382 383 void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) { 384 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 385 } 386 387 void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) { 388 LocationSummary* locations = instruction->GetLocations(); 389 DCHECK(locations->InAt(0).Equals(locations->Out())); 390 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 391 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 392 switch (instruction->GetPackedType()) { 393 case Primitive::kPrimByte: 394 DCHECK_EQ(16u, instruction->GetVectorLength()); 395 __ psubb(dst, src); 396 break; 397 case Primitive::kPrimChar: 398 case Primitive::kPrimShort: 399 DCHECK_EQ(8u, instruction->GetVectorLength()); 400 __ psubw(dst, src); 401 break; 402 case Primitive::kPrimInt: 403 DCHECK_EQ(4u, instruction->GetVectorLength()); 404 __ psubd(dst, src); 405 break; 406 case Primitive::kPrimLong: 407 DCHECK_EQ(2u, instruction->GetVectorLength()); 408 __ psubq(dst, src); 409 break; 410 case Primitive::kPrimFloat: 411 DCHECK_EQ(4u, instruction->GetVectorLength()); 412 __ subps(dst, src); 413 break; 414 case Primitive::kPrimDouble: 415 DCHECK_EQ(2u, instruction->GetVectorLength()); 416 __ subpd(dst, src); 417 break; 418 default: 419 LOG(FATAL) << "Unsupported SIMD type"; 420 UNREACHABLE(); 421 } 422 } 423 424 void LocationsBuilderX86::VisitVecMul(HVecMul* instruction) { 425 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 426 } 427 428 void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) { 429 LocationSummary* locations = instruction->GetLocations(); 430 DCHECK(locations->InAt(0).Equals(locations->Out())); 431 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 432 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 433 switch (instruction->GetPackedType()) { 434 case Primitive::kPrimChar: 435 case Primitive::kPrimShort: 436 DCHECK_EQ(8u, instruction->GetVectorLength()); 437 __ pmullw(dst, src); 438 break; 439 case Primitive::kPrimInt: 440 DCHECK_EQ(4u, instruction->GetVectorLength()); 441 __ pmulld(dst, src); 442 break; 443 case Primitive::kPrimFloat: 444 DCHECK_EQ(4u, instruction->GetVectorLength()); 445 __ mulps(dst, src); 446 break; 447 case Primitive::kPrimDouble: 448 DCHECK_EQ(2u, instruction->GetVectorLength()); 449 __ mulpd(dst, src); 450 break; 451 default: 452 LOG(FATAL) << "Unsupported SIMD type"; 453 UNREACHABLE(); 454 } 455 } 456 457 void LocationsBuilderX86::VisitVecDiv(HVecDiv* instruction) { 458 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 459 } 460 461 void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) { 462 LocationSummary* locations = instruction->GetLocations(); 463 DCHECK(locations->InAt(0).Equals(locations->Out())); 464 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 465 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 466 switch (instruction->GetPackedType()) { 467 case Primitive::kPrimFloat: 468 DCHECK_EQ(4u, instruction->GetVectorLength()); 469 __ divps(dst, src); 470 break; 471 case Primitive::kPrimDouble: 472 DCHECK_EQ(2u, instruction->GetVectorLength()); 473 __ divpd(dst, src); 474 break; 475 default: 476 LOG(FATAL) << "Unsupported SIMD type"; 477 UNREACHABLE(); 478 } 479 } 480 481 void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) { 482 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 483 } 484 485 void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) { 486 LocationSummary* locations = instruction->GetLocations(); 487 DCHECK(locations->InAt(0).Equals(locations->Out())); 488 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 489 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 490 switch (instruction->GetPackedType()) { 491 case Primitive::kPrimByte: 492 DCHECK_EQ(16u, instruction->GetVectorLength()); 493 if (instruction->IsUnsigned()) { 494 __ pminub(dst, src); 495 } else { 496 __ pminsb(dst, src); 497 } 498 break; 499 case Primitive::kPrimChar: 500 case Primitive::kPrimShort: 501 DCHECK_EQ(8u, instruction->GetVectorLength()); 502 if (instruction->IsUnsigned()) { 503 __ pminuw(dst, src); 504 } else { 505 __ pminsw(dst, src); 506 } 507 break; 508 case Primitive::kPrimInt: 509 DCHECK_EQ(4u, instruction->GetVectorLength()); 510 if (instruction->IsUnsigned()) { 511 __ pminud(dst, src); 512 } else { 513 __ pminsd(dst, src); 514 } 515 break; 516 // Next cases are sloppy wrt 0.0 vs -0.0. 517 case Primitive::kPrimFloat: 518 DCHECK_EQ(4u, instruction->GetVectorLength()); 519 DCHECK(!instruction->IsUnsigned()); 520 __ minps(dst, src); 521 break; 522 case Primitive::kPrimDouble: 523 DCHECK_EQ(2u, instruction->GetVectorLength()); 524 DCHECK(!instruction->IsUnsigned()); 525 __ minpd(dst, src); 526 break; 527 default: 528 LOG(FATAL) << "Unsupported SIMD type"; 529 UNREACHABLE(); 530 } 531 } 532 533 void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) { 534 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 535 } 536 537 void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) { 538 LocationSummary* locations = instruction->GetLocations(); 539 DCHECK(locations->InAt(0).Equals(locations->Out())); 540 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 541 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 542 switch (instruction->GetPackedType()) { 543 case Primitive::kPrimByte: 544 DCHECK_EQ(16u, instruction->GetVectorLength()); 545 if (instruction->IsUnsigned()) { 546 __ pmaxub(dst, src); 547 } else { 548 __ pmaxsb(dst, src); 549 } 550 break; 551 case Primitive::kPrimChar: 552 case Primitive::kPrimShort: 553 DCHECK_EQ(8u, instruction->GetVectorLength()); 554 if (instruction->IsUnsigned()) { 555 __ pmaxuw(dst, src); 556 } else { 557 __ pmaxsw(dst, src); 558 } 559 break; 560 case Primitive::kPrimInt: 561 DCHECK_EQ(4u, instruction->GetVectorLength()); 562 if (instruction->IsUnsigned()) { 563 __ pmaxud(dst, src); 564 } else { 565 __ pmaxsd(dst, src); 566 } 567 break; 568 // Next cases are sloppy wrt 0.0 vs -0.0. 569 case Primitive::kPrimFloat: 570 DCHECK_EQ(4u, instruction->GetVectorLength()); 571 DCHECK(!instruction->IsUnsigned()); 572 __ maxps(dst, src); 573 break; 574 case Primitive::kPrimDouble: 575 DCHECK_EQ(2u, instruction->GetVectorLength()); 576 DCHECK(!instruction->IsUnsigned()); 577 __ maxpd(dst, src); 578 break; 579 default: 580 LOG(FATAL) << "Unsupported SIMD type"; 581 UNREACHABLE(); 582 } 583 } 584 585 void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) { 586 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 587 } 588 589 void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) { 590 LocationSummary* locations = instruction->GetLocations(); 591 DCHECK(locations->InAt(0).Equals(locations->Out())); 592 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 593 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 594 switch (instruction->GetPackedType()) { 595 case Primitive::kPrimBoolean: 596 case Primitive::kPrimByte: 597 case Primitive::kPrimChar: 598 case Primitive::kPrimShort: 599 case Primitive::kPrimInt: 600 case Primitive::kPrimLong: 601 DCHECK_LE(2u, instruction->GetVectorLength()); 602 DCHECK_LE(instruction->GetVectorLength(), 16u); 603 __ pand(dst, src); 604 break; 605 case Primitive::kPrimFloat: 606 DCHECK_EQ(4u, instruction->GetVectorLength()); 607 __ andps(dst, src); 608 break; 609 case Primitive::kPrimDouble: 610 DCHECK_EQ(2u, instruction->GetVectorLength()); 611 __ andpd(dst, src); 612 break; 613 default: 614 LOG(FATAL) << "Unsupported SIMD type"; 615 UNREACHABLE(); 616 } 617 } 618 619 void LocationsBuilderX86::VisitVecAndNot(HVecAndNot* instruction) { 620 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 621 } 622 623 void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) { 624 LocationSummary* locations = instruction->GetLocations(); 625 DCHECK(locations->InAt(0).Equals(locations->Out())); 626 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 627 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 628 switch (instruction->GetPackedType()) { 629 case Primitive::kPrimBoolean: 630 case Primitive::kPrimByte: 631 case Primitive::kPrimChar: 632 case Primitive::kPrimShort: 633 case Primitive::kPrimInt: 634 case Primitive::kPrimLong: 635 DCHECK_LE(2u, instruction->GetVectorLength()); 636 DCHECK_LE(instruction->GetVectorLength(), 16u); 637 __ pandn(dst, src); 638 break; 639 case Primitive::kPrimFloat: 640 DCHECK_EQ(4u, instruction->GetVectorLength()); 641 __ andnps(dst, src); 642 break; 643 case Primitive::kPrimDouble: 644 DCHECK_EQ(2u, instruction->GetVectorLength()); 645 __ andnpd(dst, src); 646 break; 647 default: 648 LOG(FATAL) << "Unsupported SIMD type"; 649 UNREACHABLE(); 650 } 651 } 652 653 void LocationsBuilderX86::VisitVecOr(HVecOr* instruction) { 654 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 655 } 656 657 void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) { 658 LocationSummary* locations = instruction->GetLocations(); 659 DCHECK(locations->InAt(0).Equals(locations->Out())); 660 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 661 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 662 switch (instruction->GetPackedType()) { 663 case Primitive::kPrimBoolean: 664 case Primitive::kPrimByte: 665 case Primitive::kPrimChar: 666 case Primitive::kPrimShort: 667 case Primitive::kPrimInt: 668 case Primitive::kPrimLong: 669 DCHECK_LE(2u, instruction->GetVectorLength()); 670 DCHECK_LE(instruction->GetVectorLength(), 16u); 671 __ por(dst, src); 672 break; 673 case Primitive::kPrimFloat: 674 DCHECK_EQ(4u, instruction->GetVectorLength()); 675 __ orps(dst, src); 676 break; 677 case Primitive::kPrimDouble: 678 DCHECK_EQ(2u, instruction->GetVectorLength()); 679 __ orpd(dst, src); 680 break; 681 default: 682 LOG(FATAL) << "Unsupported SIMD type"; 683 UNREACHABLE(); 684 } 685 } 686 687 void LocationsBuilderX86::VisitVecXor(HVecXor* instruction) { 688 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); 689 } 690 691 void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) { 692 LocationSummary* locations = instruction->GetLocations(); 693 DCHECK(locations->InAt(0).Equals(locations->Out())); 694 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 695 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 696 switch (instruction->GetPackedType()) { 697 case Primitive::kPrimBoolean: 698 case Primitive::kPrimByte: 699 case Primitive::kPrimChar: 700 case Primitive::kPrimShort: 701 case Primitive::kPrimInt: 702 case Primitive::kPrimLong: 703 DCHECK_LE(2u, instruction->GetVectorLength()); 704 DCHECK_LE(instruction->GetVectorLength(), 16u); 705 __ pxor(dst, src); 706 break; 707 case Primitive::kPrimFloat: 708 DCHECK_EQ(4u, instruction->GetVectorLength()); 709 __ xorps(dst, src); 710 break; 711 case Primitive::kPrimDouble: 712 DCHECK_EQ(2u, instruction->GetVectorLength()); 713 __ xorpd(dst, src); 714 break; 715 default: 716 LOG(FATAL) << "Unsupported SIMD type"; 717 UNREACHABLE(); 718 } 719 } 720 721 // Helper to set up locations for vector shift operations. 722 static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { 723 LocationSummary* locations = new (arena) LocationSummary(instruction); 724 switch (instruction->GetPackedType()) { 725 case Primitive::kPrimChar: 726 case Primitive::kPrimShort: 727 case Primitive::kPrimInt: 728 case Primitive::kPrimLong: 729 locations->SetInAt(0, Location::RequiresFpuRegister()); 730 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); 731 locations->SetOut(Location::SameAsFirstInput()); 732 break; 733 default: 734 LOG(FATAL) << "Unsupported SIMD type"; 735 UNREACHABLE(); 736 } 737 } 738 739 void LocationsBuilderX86::VisitVecShl(HVecShl* instruction) { 740 CreateVecShiftLocations(GetGraph()->GetArena(), instruction); 741 } 742 743 void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) { 744 LocationSummary* locations = instruction->GetLocations(); 745 DCHECK(locations->InAt(0).Equals(locations->Out())); 746 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 747 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 748 switch (instruction->GetPackedType()) { 749 case Primitive::kPrimChar: 750 case Primitive::kPrimShort: 751 DCHECK_EQ(8u, instruction->GetVectorLength()); 752 __ psllw(dst, Immediate(static_cast<uint8_t>(value))); 753 break; 754 case Primitive::kPrimInt: 755 DCHECK_EQ(4u, instruction->GetVectorLength()); 756 __ pslld(dst, Immediate(static_cast<uint8_t>(value))); 757 break; 758 case Primitive::kPrimLong: 759 DCHECK_EQ(2u, instruction->GetVectorLength()); 760 __ psllq(dst, Immediate(static_cast<uint8_t>(value))); 761 break; 762 default: 763 LOG(FATAL) << "Unsupported SIMD type"; 764 UNREACHABLE(); 765 } 766 } 767 768 void LocationsBuilderX86::VisitVecShr(HVecShr* instruction) { 769 CreateVecShiftLocations(GetGraph()->GetArena(), instruction); 770 } 771 772 void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) { 773 LocationSummary* locations = instruction->GetLocations(); 774 DCHECK(locations->InAt(0).Equals(locations->Out())); 775 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 776 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 777 switch (instruction->GetPackedType()) { 778 case Primitive::kPrimChar: 779 case Primitive::kPrimShort: 780 DCHECK_EQ(8u, instruction->GetVectorLength()); 781 __ psraw(dst, Immediate(static_cast<uint8_t>(value))); 782 break; 783 case Primitive::kPrimInt: 784 DCHECK_EQ(4u, instruction->GetVectorLength()); 785 __ psrad(dst, Immediate(static_cast<uint8_t>(value))); 786 break; 787 default: 788 LOG(FATAL) << "Unsupported SIMD type"; 789 UNREACHABLE(); 790 } 791 } 792 793 void LocationsBuilderX86::VisitVecUShr(HVecUShr* instruction) { 794 CreateVecShiftLocations(GetGraph()->GetArena(), instruction); 795 } 796 797 void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) { 798 LocationSummary* locations = instruction->GetLocations(); 799 DCHECK(locations->InAt(0).Equals(locations->Out())); 800 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 801 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 802 switch (instruction->GetPackedType()) { 803 case Primitive::kPrimChar: 804 case Primitive::kPrimShort: 805 DCHECK_EQ(8u, instruction->GetVectorLength()); 806 __ psrlw(dst, Immediate(static_cast<uint8_t>(value))); 807 break; 808 case Primitive::kPrimInt: 809 DCHECK_EQ(4u, instruction->GetVectorLength()); 810 __ psrld(dst, Immediate(static_cast<uint8_t>(value))); 811 break; 812 case Primitive::kPrimLong: 813 DCHECK_EQ(2u, instruction->GetVectorLength()); 814 __ psrlq(dst, Immediate(static_cast<uint8_t>(value))); 815 break; 816 default: 817 LOG(FATAL) << "Unsupported SIMD type"; 818 UNREACHABLE(); 819 } 820 } 821 822 void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { 823 LOG(FATAL) << "No SIMD for " << instr->GetId(); 824 } 825 826 void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { 827 LOG(FATAL) << "No SIMD for " << instr->GetId(); 828 } 829 830 // Helper to set up locations for vector memory operations. 831 static void CreateVecMemLocations(ArenaAllocator* arena, 832 HVecMemoryOperation* instruction, 833 bool is_load) { 834 LocationSummary* locations = new (arena) LocationSummary(instruction); 835 switch (instruction->GetPackedType()) { 836 case Primitive::kPrimBoolean: 837 case Primitive::kPrimByte: 838 case Primitive::kPrimChar: 839 case Primitive::kPrimShort: 840 case Primitive::kPrimInt: 841 case Primitive::kPrimLong: 842 case Primitive::kPrimFloat: 843 case Primitive::kPrimDouble: 844 locations->SetInAt(0, Location::RequiresRegister()); 845 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 846 if (is_load) { 847 locations->SetOut(Location::RequiresFpuRegister()); 848 } else { 849 locations->SetInAt(2, Location::RequiresFpuRegister()); 850 } 851 break; 852 default: 853 LOG(FATAL) << "Unsupported SIMD type"; 854 UNREACHABLE(); 855 } 856 } 857 858 // Helper to construct address for vector memory operations. 859 static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) { 860 Location base = locations->InAt(0); 861 Location index = locations->InAt(1); 862 ScaleFactor scale = TIMES_1; 863 switch (size) { 864 case 2: scale = TIMES_2; break; 865 case 4: scale = TIMES_4; break; 866 case 8: scale = TIMES_8; break; 867 default: break; 868 } 869 uint32_t offset = is_string_char_at 870 ? mirror::String::ValueOffset().Uint32Value() 871 : mirror::Array::DataOffset(size).Uint32Value(); 872 return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset); 873 } 874 875 void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) { 876 CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true); 877 // String load requires a temporary for the compressed load. 878 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 879 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 880 } 881 } 882 883 void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) { 884 LocationSummary* locations = instruction->GetLocations(); 885 size_t size = Primitive::ComponentSize(instruction->GetPackedType()); 886 Address address = VecAddress(locations, size, instruction->IsStringCharAt()); 887 XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); 888 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); 889 switch (instruction->GetPackedType()) { 890 case Primitive::kPrimChar: 891 DCHECK_EQ(8u, instruction->GetVectorLength()); 892 // Special handling of compressed/uncompressed string load. 893 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 894 NearLabel done, not_compressed; 895 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 896 // Test compression bit. 897 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 898 "Expecting 0=compressed, 1=uncompressed"); 899 uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 900 __ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1)); 901 __ j(kNotZero, ¬_compressed); 902 // Zero extend 8 compressed bytes into 8 chars. 903 __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true)); 904 __ pxor(tmp, tmp); 905 __ punpcklbw(reg, tmp); 906 __ jmp(&done); 907 // Load 4 direct uncompressed chars. 908 __ Bind(¬_compressed); 909 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); 910 __ Bind(&done); 911 return; 912 } 913 FALLTHROUGH_INTENDED; 914 case Primitive::kPrimBoolean: 915 case Primitive::kPrimByte: 916 case Primitive::kPrimShort: 917 case Primitive::kPrimInt: 918 case Primitive::kPrimLong: 919 DCHECK_LE(2u, instruction->GetVectorLength()); 920 DCHECK_LE(instruction->GetVectorLength(), 16u); 921 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); 922 break; 923 case Primitive::kPrimFloat: 924 DCHECK_EQ(4u, instruction->GetVectorLength()); 925 is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address); 926 break; 927 case Primitive::kPrimDouble: 928 DCHECK_EQ(2u, instruction->GetVectorLength()); 929 is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address); 930 break; 931 default: 932 LOG(FATAL) << "Unsupported SIMD type"; 933 UNREACHABLE(); 934 } 935 } 936 937 void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) { 938 CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false); 939 } 940 941 void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) { 942 LocationSummary* locations = instruction->GetLocations(); 943 size_t size = Primitive::ComponentSize(instruction->GetPackedType()); 944 Address address = VecAddress(locations, size, /*is_string_char_at*/ false); 945 XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>(); 946 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); 947 switch (instruction->GetPackedType()) { 948 case Primitive::kPrimBoolean: 949 case Primitive::kPrimByte: 950 case Primitive::kPrimChar: 951 case Primitive::kPrimShort: 952 case Primitive::kPrimInt: 953 case Primitive::kPrimLong: 954 DCHECK_LE(2u, instruction->GetVectorLength()); 955 DCHECK_LE(instruction->GetVectorLength(), 16u); 956 is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg); 957 break; 958 case Primitive::kPrimFloat: 959 DCHECK_EQ(4u, instruction->GetVectorLength()); 960 is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg); 961 break; 962 case Primitive::kPrimDouble: 963 DCHECK_EQ(2u, instruction->GetVectorLength()); 964 is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg); 965 break; 966 default: 967 LOG(FATAL) << "Unsupported SIMD type"; 968 UNREACHABLE(); 969 } 970 } 971 972 #undef __ 973 974 } // namespace x86 975 } // namespace art 976