1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "code_generator_x86_64.h" 18 19 #include "mirror/array-inl.h" 20 #include "mirror/string.h" 21 22 namespace art { 23 namespace x86_64 { 24 25 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 26 #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT 27 28 void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { 29 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 30 HInstruction* input = instruction->InputAt(0); 31 bool is_zero = IsZeroBitPattern(input); 32 switch (instruction->GetPackedType()) { 33 case DataType::Type::kBool: 34 case DataType::Type::kUint8: 35 case DataType::Type::kInt8: 36 case DataType::Type::kUint16: 37 case DataType::Type::kInt16: 38 case DataType::Type::kInt32: 39 case DataType::Type::kInt64: 40 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) 41 : Location::RequiresRegister()); 42 locations->SetOut(Location::RequiresFpuRegister()); 43 break; 44 case DataType::Type::kFloat32: 45 case DataType::Type::kFloat64: 46 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) 47 : Location::RequiresFpuRegister()); 48 locations->SetOut(is_zero ? Location::RequiresFpuRegister() 49 : Location::SameAsFirstInput()); 50 break; 51 default: 52 LOG(FATAL) << "Unsupported SIMD type"; 53 UNREACHABLE(); 54 } 55 } 56 57 void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { 58 LocationSummary* locations = instruction->GetLocations(); 59 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 60 61 // Shorthand for any type of zero. 62 if (IsZeroBitPattern(instruction->InputAt(0))) { 63 __ xorps(dst, dst); 64 return; 65 } 66 67 switch (instruction->GetPackedType()) { 68 case DataType::Type::kBool: 69 case DataType::Type::kUint8: 70 case DataType::Type::kInt8: 71 DCHECK_EQ(16u, instruction->GetVectorLength()); 72 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false); 73 __ punpcklbw(dst, dst); 74 __ punpcklwd(dst, dst); 75 __ pshufd(dst, dst, Immediate(0)); 76 break; 77 case DataType::Type::kUint16: 78 case DataType::Type::kInt16: 79 DCHECK_EQ(8u, instruction->GetVectorLength()); 80 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false); 81 __ punpcklwd(dst, dst); 82 __ pshufd(dst, dst, Immediate(0)); 83 break; 84 case DataType::Type::kInt32: 85 DCHECK_EQ(4u, instruction->GetVectorLength()); 86 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false); 87 __ pshufd(dst, dst, Immediate(0)); 88 break; 89 case DataType::Type::kInt64: 90 DCHECK_EQ(2u, instruction->GetVectorLength()); 91 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ true); 92 __ punpcklqdq(dst, dst); 93 break; 94 case DataType::Type::kFloat32: 95 DCHECK_EQ(4u, instruction->GetVectorLength()); 96 DCHECK(locations->InAt(0).Equals(locations->Out())); 97 __ shufps(dst, dst, Immediate(0)); 98 break; 99 case DataType::Type::kFloat64: 100 DCHECK_EQ(2u, instruction->GetVectorLength()); 101 DCHECK(locations->InAt(0).Equals(locations->Out())); 102 __ shufpd(dst, dst, Immediate(0)); 103 break; 104 default: 105 LOG(FATAL) << "Unsupported SIMD type"; 106 UNREACHABLE(); 107 } 108 } 109 110 void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) { 111 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 112 switch (instruction->GetPackedType()) { 113 case DataType::Type::kBool: 114 case DataType::Type::kUint8: 115 case DataType::Type::kInt8: 116 case DataType::Type::kUint16: 117 case DataType::Type::kInt16: 118 case DataType::Type::kInt32: 119 case DataType::Type::kInt64: 120 locations->SetInAt(0, Location::RequiresFpuRegister()); 121 locations->SetOut(Location::RequiresRegister()); 122 break; 123 case DataType::Type::kFloat32: 124 case DataType::Type::kFloat64: 125 locations->SetInAt(0, Location::RequiresFpuRegister()); 126 locations->SetOut(Location::SameAsFirstInput()); 127 break; 128 default: 129 LOG(FATAL) << "Unsupported SIMD type"; 130 UNREACHABLE(); 131 } 132 } 133 134 void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) { 135 LocationSummary* locations = instruction->GetLocations(); 136 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); 137 switch (instruction->GetPackedType()) { 138 case DataType::Type::kBool: 139 case DataType::Type::kUint8: 140 case DataType::Type::kInt8: 141 case DataType::Type::kUint16: 142 case DataType::Type::kInt16: // TODO: up to here, and? 143 LOG(FATAL) << "Unsupported SIMD type"; 144 UNREACHABLE(); 145 case DataType::Type::kInt32: 146 DCHECK_EQ(4u, instruction->GetVectorLength()); 147 __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ false); 148 break; 149 case DataType::Type::kInt64: 150 DCHECK_EQ(2u, instruction->GetVectorLength()); 151 __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ true); 152 break; 153 case DataType::Type::kFloat32: 154 case DataType::Type::kFloat64: 155 DCHECK_LE(2u, instruction->GetVectorLength()); 156 DCHECK_LE(instruction->GetVectorLength(), 4u); 157 DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required 158 break; 159 default: 160 LOG(FATAL) << "Unsupported SIMD type"; 161 UNREACHABLE(); 162 } 163 } 164 165 // Helper to set up locations for vector unary operations. 166 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) { 167 LocationSummary* locations = new (allocator) LocationSummary(instruction); 168 switch (instruction->GetPackedType()) { 169 case DataType::Type::kBool: 170 case DataType::Type::kUint8: 171 case DataType::Type::kInt8: 172 case DataType::Type::kUint16: 173 case DataType::Type::kInt16: 174 case DataType::Type::kInt32: 175 case DataType::Type::kInt64: 176 case DataType::Type::kFloat32: 177 case DataType::Type::kFloat64: 178 locations->SetInAt(0, Location::RequiresFpuRegister()); 179 locations->SetOut(Location::RequiresFpuRegister()); 180 break; 181 default: 182 LOG(FATAL) << "Unsupported SIMD type"; 183 UNREACHABLE(); 184 } 185 } 186 187 void LocationsBuilderX86_64::VisitVecReduce(HVecReduce* instruction) { 188 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); 189 // Long reduction or min/max require a temporary. 190 if (instruction->GetPackedType() == DataType::Type::kInt64 || 191 instruction->GetKind() == HVecReduce::kMin || 192 instruction->GetKind() == HVecReduce::kMax) { 193 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 194 } 195 } 196 197 void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) { 198 LocationSummary* locations = instruction->GetLocations(); 199 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); 200 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 201 switch (instruction->GetPackedType()) { 202 case DataType::Type::kInt32: 203 DCHECK_EQ(4u, instruction->GetVectorLength()); 204 switch (instruction->GetKind()) { 205 case HVecReduce::kSum: 206 __ movaps(dst, src); 207 __ phaddd(dst, dst); 208 __ phaddd(dst, dst); 209 break; 210 case HVecReduce::kMin: { 211 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 212 __ movaps(tmp, src); 213 __ movaps(dst, src); 214 __ psrldq(tmp, Immediate(8)); 215 __ pminsd(dst, tmp); 216 __ psrldq(tmp, Immediate(4)); 217 __ pminsd(dst, tmp); 218 break; 219 } 220 case HVecReduce::kMax: { 221 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 222 __ movaps(tmp, src); 223 __ movaps(dst, src); 224 __ psrldq(tmp, Immediate(8)); 225 __ pmaxsd(dst, tmp); 226 __ psrldq(tmp, Immediate(4)); 227 __ pmaxsd(dst, tmp); 228 break; 229 } 230 } 231 break; 232 case DataType::Type::kInt64: { 233 DCHECK_EQ(2u, instruction->GetVectorLength()); 234 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 235 switch (instruction->GetKind()) { 236 case HVecReduce::kSum: 237 __ movaps(tmp, src); 238 __ movaps(dst, src); 239 __ punpckhqdq(tmp, tmp); 240 __ paddq(dst, tmp); 241 break; 242 case HVecReduce::kMin: 243 case HVecReduce::kMax: 244 LOG(FATAL) << "Unsupported SIMD type"; 245 } 246 break; 247 } 248 default: 249 LOG(FATAL) << "Unsupported SIMD type"; 250 UNREACHABLE(); 251 } 252 } 253 254 void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) { 255 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); 256 } 257 258 void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) { 259 LocationSummary* locations = instruction->GetLocations(); 260 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); 261 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 262 DataType::Type from = instruction->GetInputType(); 263 DataType::Type to = instruction->GetResultType(); 264 if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) { 265 DCHECK_EQ(4u, instruction->GetVectorLength()); 266 __ cvtdq2ps(dst, src); 267 } else { 268 LOG(FATAL) << "Unsupported SIMD type"; 269 } 270 } 271 272 void LocationsBuilderX86_64::VisitVecNeg(HVecNeg* instruction) { 273 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); 274 } 275 276 void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) { 277 LocationSummary* locations = instruction->GetLocations(); 278 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); 279 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 280 switch (instruction->GetPackedType()) { 281 case DataType::Type::kUint8: 282 case DataType::Type::kInt8: 283 DCHECK_EQ(16u, instruction->GetVectorLength()); 284 __ pxor(dst, dst); 285 __ psubb(dst, src); 286 break; 287 case DataType::Type::kUint16: 288 case DataType::Type::kInt16: 289 DCHECK_EQ(8u, instruction->GetVectorLength()); 290 __ pxor(dst, dst); 291 __ psubw(dst, src); 292 break; 293 case DataType::Type::kInt32: 294 DCHECK_EQ(4u, instruction->GetVectorLength()); 295 __ pxor(dst, dst); 296 __ psubd(dst, src); 297 break; 298 case DataType::Type::kInt64: 299 DCHECK_EQ(2u, instruction->GetVectorLength()); 300 __ pxor(dst, dst); 301 __ psubq(dst, src); 302 break; 303 case DataType::Type::kFloat32: 304 DCHECK_EQ(4u, instruction->GetVectorLength()); 305 __ xorps(dst, dst); 306 __ subps(dst, src); 307 break; 308 case DataType::Type::kFloat64: 309 DCHECK_EQ(2u, instruction->GetVectorLength()); 310 __ xorpd(dst, dst); 311 __ subpd(dst, src); 312 break; 313 default: 314 LOG(FATAL) << "Unsupported SIMD type"; 315 UNREACHABLE(); 316 } 317 } 318 319 void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) { 320 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); 321 // Integral-abs requires a temporary for the comparison. 322 if (instruction->GetPackedType() == DataType::Type::kInt32) { 323 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 324 } 325 } 326 327 void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) { 328 LocationSummary* locations = instruction->GetLocations(); 329 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); 330 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 331 switch (instruction->GetPackedType()) { 332 case DataType::Type::kInt32: { 333 DCHECK_EQ(4u, instruction->GetVectorLength()); 334 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 335 __ movaps(dst, src); 336 __ pxor(tmp, tmp); 337 __ pcmpgtd(tmp, dst); 338 __ pxor(dst, tmp); 339 __ psubd(dst, tmp); 340 break; 341 } 342 case DataType::Type::kFloat32: 343 DCHECK_EQ(4u, instruction->GetVectorLength()); 344 __ pcmpeqb(dst, dst); // all ones 345 __ psrld(dst, Immediate(1)); 346 __ andps(dst, src); 347 break; 348 case DataType::Type::kFloat64: 349 DCHECK_EQ(2u, instruction->GetVectorLength()); 350 __ pcmpeqb(dst, dst); // all ones 351 __ psrlq(dst, Immediate(1)); 352 __ andpd(dst, src); 353 break; 354 default: 355 LOG(FATAL) << "Unsupported SIMD type"; 356 UNREACHABLE(); 357 } 358 } 359 360 void LocationsBuilderX86_64::VisitVecNot(HVecNot* instruction) { 361 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); 362 // Boolean-not requires a temporary to construct the 16 x one. 363 if (instruction->GetPackedType() == DataType::Type::kBool) { 364 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 365 } 366 } 367 368 void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) { 369 LocationSummary* locations = instruction->GetLocations(); 370 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); 371 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 372 switch (instruction->GetPackedType()) { 373 case DataType::Type::kBool: { // special case boolean-not 374 DCHECK_EQ(16u, instruction->GetVectorLength()); 375 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 376 __ pxor(dst, dst); 377 __ pcmpeqb(tmp, tmp); // all ones 378 __ psubb(dst, tmp); // 16 x one 379 __ pxor(dst, src); 380 break; 381 } 382 case DataType::Type::kUint8: 383 case DataType::Type::kInt8: 384 case DataType::Type::kUint16: 385 case DataType::Type::kInt16: 386 case DataType::Type::kInt32: 387 case DataType::Type::kInt64: 388 DCHECK_LE(2u, instruction->GetVectorLength()); 389 DCHECK_LE(instruction->GetVectorLength(), 16u); 390 __ pcmpeqb(dst, dst); // all ones 391 __ pxor(dst, src); 392 break; 393 case DataType::Type::kFloat32: 394 DCHECK_EQ(4u, instruction->GetVectorLength()); 395 __ pcmpeqb(dst, dst); // all ones 396 __ xorps(dst, src); 397 break; 398 case DataType::Type::kFloat64: 399 DCHECK_EQ(2u, instruction->GetVectorLength()); 400 __ pcmpeqb(dst, dst); // all ones 401 __ xorpd(dst, src); 402 break; 403 default: 404 LOG(FATAL) << "Unsupported SIMD type"; 405 UNREACHABLE(); 406 } 407 } 408 409 // Helper to set up locations for vector binary operations. 410 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { 411 LocationSummary* locations = new (allocator) LocationSummary(instruction); 412 switch (instruction->GetPackedType()) { 413 case DataType::Type::kBool: 414 case DataType::Type::kUint8: 415 case DataType::Type::kInt8: 416 case DataType::Type::kUint16: 417 case DataType::Type::kInt16: 418 case DataType::Type::kInt32: 419 case DataType::Type::kInt64: 420 case DataType::Type::kFloat32: 421 case DataType::Type::kFloat64: 422 locations->SetInAt(0, Location::RequiresFpuRegister()); 423 locations->SetInAt(1, Location::RequiresFpuRegister()); 424 locations->SetOut(Location::SameAsFirstInput()); 425 break; 426 default: 427 LOG(FATAL) << "Unsupported SIMD type"; 428 UNREACHABLE(); 429 } 430 } 431 432 void LocationsBuilderX86_64::VisitVecAdd(HVecAdd* instruction) { 433 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 434 } 435 436 void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) { 437 LocationSummary* locations = instruction->GetLocations(); 438 DCHECK(locations->InAt(0).Equals(locations->Out())); 439 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 440 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 441 switch (instruction->GetPackedType()) { 442 case DataType::Type::kUint8: 443 case DataType::Type::kInt8: 444 DCHECK_EQ(16u, instruction->GetVectorLength()); 445 __ paddb(dst, src); 446 break; 447 case DataType::Type::kUint16: 448 case DataType::Type::kInt16: 449 DCHECK_EQ(8u, instruction->GetVectorLength()); 450 __ paddw(dst, src); 451 break; 452 case DataType::Type::kInt32: 453 DCHECK_EQ(4u, instruction->GetVectorLength()); 454 __ paddd(dst, src); 455 break; 456 case DataType::Type::kInt64: 457 DCHECK_EQ(2u, instruction->GetVectorLength()); 458 __ paddq(dst, src); 459 break; 460 case DataType::Type::kFloat32: 461 DCHECK_EQ(4u, instruction->GetVectorLength()); 462 __ addps(dst, src); 463 break; 464 case DataType::Type::kFloat64: 465 DCHECK_EQ(2u, instruction->GetVectorLength()); 466 __ addpd(dst, src); 467 break; 468 default: 469 LOG(FATAL) << "Unsupported SIMD type"; 470 UNREACHABLE(); 471 } 472 } 473 474 void LocationsBuilderX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { 475 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 476 } 477 478 void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { 479 LocationSummary* locations = instruction->GetLocations(); 480 DCHECK(locations->InAt(0).Equals(locations->Out())); 481 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 482 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 483 484 DCHECK(instruction->IsRounded()); 485 486 switch (instruction->GetPackedType()) { 487 case DataType::Type::kUint8: 488 DCHECK_EQ(16u, instruction->GetVectorLength()); 489 __ pavgb(dst, src); 490 return; 491 case DataType::Type::kUint16: 492 DCHECK_EQ(8u, instruction->GetVectorLength()); 493 __ pavgw(dst, src); 494 return; 495 default: 496 LOG(FATAL) << "Unsupported SIMD type"; 497 UNREACHABLE(); 498 } 499 } 500 501 void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) { 502 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 503 } 504 505 void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) { 506 LocationSummary* locations = instruction->GetLocations(); 507 DCHECK(locations->InAt(0).Equals(locations->Out())); 508 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 509 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 510 switch (instruction->GetPackedType()) { 511 case DataType::Type::kUint8: 512 case DataType::Type::kInt8: 513 DCHECK_EQ(16u, instruction->GetVectorLength()); 514 __ psubb(dst, src); 515 break; 516 case DataType::Type::kUint16: 517 case DataType::Type::kInt16: 518 DCHECK_EQ(8u, instruction->GetVectorLength()); 519 __ psubw(dst, src); 520 break; 521 case DataType::Type::kInt32: 522 DCHECK_EQ(4u, instruction->GetVectorLength()); 523 __ psubd(dst, src); 524 break; 525 case DataType::Type::kInt64: 526 DCHECK_EQ(2u, instruction->GetVectorLength()); 527 __ psubq(dst, src); 528 break; 529 case DataType::Type::kFloat32: 530 DCHECK_EQ(4u, instruction->GetVectorLength()); 531 __ subps(dst, src); 532 break; 533 case DataType::Type::kFloat64: 534 DCHECK_EQ(2u, instruction->GetVectorLength()); 535 __ subpd(dst, src); 536 break; 537 default: 538 LOG(FATAL) << "Unsupported SIMD type"; 539 UNREACHABLE(); 540 } 541 } 542 543 void LocationsBuilderX86_64::VisitVecMul(HVecMul* instruction) { 544 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 545 } 546 547 void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) { 548 LocationSummary* locations = instruction->GetLocations(); 549 DCHECK(locations->InAt(0).Equals(locations->Out())); 550 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 551 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 552 switch (instruction->GetPackedType()) { 553 case DataType::Type::kUint16: 554 case DataType::Type::kInt16: 555 DCHECK_EQ(8u, instruction->GetVectorLength()); 556 __ pmullw(dst, src); 557 break; 558 case DataType::Type::kInt32: 559 DCHECK_EQ(4u, instruction->GetVectorLength()); 560 __ pmulld(dst, src); 561 break; 562 case DataType::Type::kFloat32: 563 DCHECK_EQ(4u, instruction->GetVectorLength()); 564 __ mulps(dst, src); 565 break; 566 case DataType::Type::kFloat64: 567 DCHECK_EQ(2u, instruction->GetVectorLength()); 568 __ mulpd(dst, src); 569 break; 570 default: 571 LOG(FATAL) << "Unsupported SIMD type"; 572 UNREACHABLE(); 573 } 574 } 575 576 void LocationsBuilderX86_64::VisitVecDiv(HVecDiv* instruction) { 577 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 578 } 579 580 void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) { 581 LocationSummary* locations = instruction->GetLocations(); 582 DCHECK(locations->InAt(0).Equals(locations->Out())); 583 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 584 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 585 switch (instruction->GetPackedType()) { 586 case DataType::Type::kFloat32: 587 DCHECK_EQ(4u, instruction->GetVectorLength()); 588 __ divps(dst, src); 589 break; 590 case DataType::Type::kFloat64: 591 DCHECK_EQ(2u, instruction->GetVectorLength()); 592 __ divpd(dst, src); 593 break; 594 default: 595 LOG(FATAL) << "Unsupported SIMD type"; 596 UNREACHABLE(); 597 } 598 } 599 600 void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) { 601 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 602 } 603 604 void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) { 605 LocationSummary* locations = instruction->GetLocations(); 606 DCHECK(locations->InAt(0).Equals(locations->Out())); 607 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 608 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 609 switch (instruction->GetPackedType()) { 610 case DataType::Type::kUint8: 611 DCHECK_EQ(16u, instruction->GetVectorLength()); 612 __ pminub(dst, src); 613 break; 614 case DataType::Type::kInt8: 615 DCHECK_EQ(16u, instruction->GetVectorLength()); 616 __ pminsb(dst, src); 617 break; 618 case DataType::Type::kUint16: 619 DCHECK_EQ(8u, instruction->GetVectorLength()); 620 __ pminuw(dst, src); 621 break; 622 case DataType::Type::kInt16: 623 DCHECK_EQ(8u, instruction->GetVectorLength()); 624 __ pminsw(dst, src); 625 break; 626 case DataType::Type::kUint32: 627 DCHECK_EQ(4u, instruction->GetVectorLength()); 628 __ pminud(dst, src); 629 break; 630 case DataType::Type::kInt32: 631 DCHECK_EQ(4u, instruction->GetVectorLength()); 632 __ pminsd(dst, src); 633 break; 634 // Next cases are sloppy wrt 0.0 vs -0.0. 635 case DataType::Type::kFloat32: 636 DCHECK_EQ(4u, instruction->GetVectorLength()); 637 __ minps(dst, src); 638 break; 639 case DataType::Type::kFloat64: 640 DCHECK_EQ(2u, instruction->GetVectorLength()); 641 __ minpd(dst, src); 642 break; 643 default: 644 LOG(FATAL) << "Unsupported SIMD type"; 645 UNREACHABLE(); 646 } 647 } 648 649 void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) { 650 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 651 } 652 653 void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) { 654 LocationSummary* locations = instruction->GetLocations(); 655 DCHECK(locations->InAt(0).Equals(locations->Out())); 656 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 657 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 658 switch (instruction->GetPackedType()) { 659 case DataType::Type::kUint8: 660 DCHECK_EQ(16u, instruction->GetVectorLength()); 661 __ pmaxub(dst, src); 662 break; 663 case DataType::Type::kInt8: 664 DCHECK_EQ(16u, instruction->GetVectorLength()); 665 __ pmaxsb(dst, src); 666 break; 667 case DataType::Type::kUint16: 668 DCHECK_EQ(8u, instruction->GetVectorLength()); 669 __ pmaxuw(dst, src); 670 break; 671 case DataType::Type::kInt16: 672 DCHECK_EQ(8u, instruction->GetVectorLength()); 673 __ pmaxsw(dst, src); 674 break; 675 case DataType::Type::kUint32: 676 DCHECK_EQ(4u, instruction->GetVectorLength()); 677 __ pmaxud(dst, src); 678 break; 679 case DataType::Type::kInt32: 680 DCHECK_EQ(4u, instruction->GetVectorLength()); 681 __ pmaxsd(dst, src); 682 break; 683 // Next cases are sloppy wrt 0.0 vs -0.0. 684 case DataType::Type::kFloat32: 685 DCHECK_EQ(4u, instruction->GetVectorLength()); 686 __ maxps(dst, src); 687 break; 688 case DataType::Type::kFloat64: 689 DCHECK_EQ(2u, instruction->GetVectorLength()); 690 __ maxpd(dst, src); 691 break; 692 default: 693 LOG(FATAL) << "Unsupported SIMD type"; 694 UNREACHABLE(); 695 } 696 } 697 698 void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) { 699 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 700 } 701 702 void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) { 703 LocationSummary* locations = instruction->GetLocations(); 704 DCHECK(locations->InAt(0).Equals(locations->Out())); 705 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 706 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 707 switch (instruction->GetPackedType()) { 708 case DataType::Type::kBool: 709 case DataType::Type::kUint8: 710 case DataType::Type::kInt8: 711 case DataType::Type::kUint16: 712 case DataType::Type::kInt16: 713 case DataType::Type::kInt32: 714 case DataType::Type::kInt64: 715 DCHECK_LE(2u, instruction->GetVectorLength()); 716 DCHECK_LE(instruction->GetVectorLength(), 16u); 717 __ pand(dst, src); 718 break; 719 case DataType::Type::kFloat32: 720 DCHECK_EQ(4u, instruction->GetVectorLength()); 721 __ andps(dst, src); 722 break; 723 case DataType::Type::kFloat64: 724 DCHECK_EQ(2u, instruction->GetVectorLength()); 725 __ andpd(dst, src); 726 break; 727 default: 728 LOG(FATAL) << "Unsupported SIMD type"; 729 UNREACHABLE(); 730 } 731 } 732 733 void LocationsBuilderX86_64::VisitVecAndNot(HVecAndNot* instruction) { 734 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 735 } 736 737 void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) { 738 LocationSummary* locations = instruction->GetLocations(); 739 DCHECK(locations->InAt(0).Equals(locations->Out())); 740 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 741 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 742 switch (instruction->GetPackedType()) { 743 case DataType::Type::kBool: 744 case DataType::Type::kUint8: 745 case DataType::Type::kInt8: 746 case DataType::Type::kUint16: 747 case DataType::Type::kInt16: 748 case DataType::Type::kInt32: 749 case DataType::Type::kInt64: 750 DCHECK_LE(2u, instruction->GetVectorLength()); 751 DCHECK_LE(instruction->GetVectorLength(), 16u); 752 __ pandn(dst, src); 753 break; 754 case DataType::Type::kFloat32: 755 DCHECK_EQ(4u, instruction->GetVectorLength()); 756 __ andnps(dst, src); 757 break; 758 case DataType::Type::kFloat64: 759 DCHECK_EQ(2u, instruction->GetVectorLength()); 760 __ andnpd(dst, src); 761 break; 762 default: 763 LOG(FATAL) << "Unsupported SIMD type"; 764 UNREACHABLE(); 765 } 766 } 767 768 void LocationsBuilderX86_64::VisitVecOr(HVecOr* instruction) { 769 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 770 } 771 772 void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) { 773 LocationSummary* locations = instruction->GetLocations(); 774 DCHECK(locations->InAt(0).Equals(locations->Out())); 775 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 776 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 777 switch (instruction->GetPackedType()) { 778 case DataType::Type::kBool: 779 case DataType::Type::kUint8: 780 case DataType::Type::kInt8: 781 case DataType::Type::kUint16: 782 case DataType::Type::kInt16: 783 case DataType::Type::kInt32: 784 case DataType::Type::kInt64: 785 DCHECK_LE(2u, instruction->GetVectorLength()); 786 DCHECK_LE(instruction->GetVectorLength(), 16u); 787 __ por(dst, src); 788 break; 789 case DataType::Type::kFloat32: 790 DCHECK_EQ(4u, instruction->GetVectorLength()); 791 __ orps(dst, src); 792 break; 793 case DataType::Type::kFloat64: 794 DCHECK_EQ(2u, instruction->GetVectorLength()); 795 __ orpd(dst, src); 796 break; 797 default: 798 LOG(FATAL) << "Unsupported SIMD type"; 799 UNREACHABLE(); 800 } 801 } 802 803 void LocationsBuilderX86_64::VisitVecXor(HVecXor* instruction) { 804 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 805 } 806 807 void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) { 808 LocationSummary* locations = instruction->GetLocations(); 809 DCHECK(locations->InAt(0).Equals(locations->Out())); 810 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 811 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 812 switch (instruction->GetPackedType()) { 813 case DataType::Type::kBool: 814 case DataType::Type::kUint8: 815 case DataType::Type::kInt8: 816 case DataType::Type::kUint16: 817 case DataType::Type::kInt16: 818 case DataType::Type::kInt32: 819 case DataType::Type::kInt64: 820 DCHECK_LE(2u, instruction->GetVectorLength()); 821 DCHECK_LE(instruction->GetVectorLength(), 16u); 822 __ pxor(dst, src); 823 break; 824 case DataType::Type::kFloat32: 825 DCHECK_EQ(4u, instruction->GetVectorLength()); 826 __ xorps(dst, src); 827 break; 828 case DataType::Type::kFloat64: 829 DCHECK_EQ(2u, instruction->GetVectorLength()); 830 __ xorpd(dst, src); 831 break; 832 default: 833 LOG(FATAL) << "Unsupported SIMD type"; 834 UNREACHABLE(); 835 } 836 } 837 838 // Helper to set up locations for vector shift operations. 839 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { 840 LocationSummary* locations = new (allocator) LocationSummary(instruction); 841 switch (instruction->GetPackedType()) { 842 case DataType::Type::kUint16: 843 case DataType::Type::kInt16: 844 case DataType::Type::kInt32: 845 case DataType::Type::kInt64: 846 locations->SetInAt(0, Location::RequiresFpuRegister()); 847 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); 848 locations->SetOut(Location::SameAsFirstInput()); 849 break; 850 default: 851 LOG(FATAL) << "Unsupported SIMD type"; 852 UNREACHABLE(); 853 } 854 } 855 856 void LocationsBuilderX86_64::VisitVecShl(HVecShl* instruction) { 857 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); 858 } 859 860 void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) { 861 LocationSummary* locations = instruction->GetLocations(); 862 DCHECK(locations->InAt(0).Equals(locations->Out())); 863 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 864 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 865 switch (instruction->GetPackedType()) { 866 case DataType::Type::kUint16: 867 case DataType::Type::kInt16: 868 DCHECK_EQ(8u, instruction->GetVectorLength()); 869 __ psllw(dst, Immediate(static_cast<int8_t>(value))); 870 break; 871 case DataType::Type::kInt32: 872 DCHECK_EQ(4u, instruction->GetVectorLength()); 873 __ pslld(dst, Immediate(static_cast<int8_t>(value))); 874 break; 875 case DataType::Type::kInt64: 876 DCHECK_EQ(2u, instruction->GetVectorLength()); 877 __ psllq(dst, Immediate(static_cast<int8_t>(value))); 878 break; 879 default: 880 LOG(FATAL) << "Unsupported SIMD type"; 881 UNREACHABLE(); 882 } 883 } 884 885 void LocationsBuilderX86_64::VisitVecShr(HVecShr* instruction) { 886 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); 887 } 888 889 void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) { 890 LocationSummary* locations = instruction->GetLocations(); 891 DCHECK(locations->InAt(0).Equals(locations->Out())); 892 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 893 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 894 switch (instruction->GetPackedType()) { 895 case DataType::Type::kUint16: 896 case DataType::Type::kInt16: 897 DCHECK_EQ(8u, instruction->GetVectorLength()); 898 __ psraw(dst, Immediate(static_cast<int8_t>(value))); 899 break; 900 case DataType::Type::kInt32: 901 DCHECK_EQ(4u, instruction->GetVectorLength()); 902 __ psrad(dst, Immediate(static_cast<int8_t>(value))); 903 break; 904 default: 905 LOG(FATAL) << "Unsupported SIMD type"; 906 UNREACHABLE(); 907 } 908 } 909 910 void LocationsBuilderX86_64::VisitVecUShr(HVecUShr* instruction) { 911 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); 912 } 913 914 void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) { 915 LocationSummary* locations = instruction->GetLocations(); 916 DCHECK(locations->InAt(0).Equals(locations->Out())); 917 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 918 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 919 switch (instruction->GetPackedType()) { 920 case DataType::Type::kUint16: 921 case DataType::Type::kInt16: 922 DCHECK_EQ(8u, instruction->GetVectorLength()); 923 __ psrlw(dst, Immediate(static_cast<int8_t>(value))); 924 break; 925 case DataType::Type::kInt32: 926 DCHECK_EQ(4u, instruction->GetVectorLength()); 927 __ psrld(dst, Immediate(static_cast<int8_t>(value))); 928 break; 929 case DataType::Type::kInt64: 930 DCHECK_EQ(2u, instruction->GetVectorLength()); 931 __ psrlq(dst, Immediate(static_cast<int8_t>(value))); 932 break; 933 default: 934 LOG(FATAL) << "Unsupported SIMD type"; 935 UNREACHABLE(); 936 } 937 } 938 939 void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { 940 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 941 942 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented 943 944 HInstruction* input = instruction->InputAt(0); 945 bool is_zero = IsZeroBitPattern(input); 946 947 switch (instruction->GetPackedType()) { 948 case DataType::Type::kBool: 949 case DataType::Type::kUint8: 950 case DataType::Type::kInt8: 951 case DataType::Type::kUint16: 952 case DataType::Type::kInt16: 953 case DataType::Type::kInt32: 954 case DataType::Type::kInt64: 955 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) 956 : Location::RequiresRegister()); 957 locations->SetOut(Location::RequiresFpuRegister()); 958 break; 959 case DataType::Type::kFloat32: 960 case DataType::Type::kFloat64: 961 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) 962 : Location::RequiresFpuRegister()); 963 locations->SetOut(Location::RequiresFpuRegister()); 964 break; 965 default: 966 LOG(FATAL) << "Unsupported SIMD type"; 967 UNREACHABLE(); 968 } 969 } 970 971 void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { 972 LocationSummary* locations = instruction->GetLocations(); 973 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 974 975 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented 976 977 // Zero out all other elements first. 978 __ xorps(dst, dst); 979 980 // Shorthand for any type of zero. 981 if (IsZeroBitPattern(instruction->InputAt(0))) { 982 return; 983 } 984 985 // Set required elements. 986 switch (instruction->GetPackedType()) { 987 case DataType::Type::kBool: 988 case DataType::Type::kUint8: 989 case DataType::Type::kInt8: 990 case DataType::Type::kUint16: 991 case DataType::Type::kInt16: // TODO: up to here, and? 992 LOG(FATAL) << "Unsupported SIMD type"; 993 UNREACHABLE(); 994 case DataType::Type::kInt32: 995 DCHECK_EQ(4u, instruction->GetVectorLength()); 996 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); 997 break; 998 case DataType::Type::kInt64: 999 DCHECK_EQ(2u, instruction->GetVectorLength()); 1000 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit 1001 break; 1002 case DataType::Type::kFloat32: 1003 DCHECK_EQ(4u, instruction->GetVectorLength()); 1004 __ movss(dst, locations->InAt(0).AsFpuRegister<XmmRegister>()); 1005 break; 1006 case DataType::Type::kFloat64: 1007 DCHECK_EQ(2u, instruction->GetVectorLength()); 1008 __ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>()); 1009 break; 1010 default: 1011 LOG(FATAL) << "Unsupported SIMD type"; 1012 UNREACHABLE(); 1013 } 1014 } 1015 1016 // Helper to set up locations for vector accumulations. 1017 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) { 1018 LocationSummary* locations = new (allocator) LocationSummary(instruction); 1019 switch (instruction->GetPackedType()) { 1020 case DataType::Type::kUint8: 1021 case DataType::Type::kInt8: 1022 case DataType::Type::kUint16: 1023 case DataType::Type::kInt16: 1024 case DataType::Type::kInt32: 1025 case DataType::Type::kInt64: 1026 locations->SetInAt(0, Location::RequiresFpuRegister()); 1027 locations->SetInAt(1, Location::RequiresFpuRegister()); 1028 locations->SetInAt(2, Location::RequiresFpuRegister()); 1029 locations->SetOut(Location::SameAsFirstInput()); 1030 break; 1031 default: 1032 LOG(FATAL) << "Unsupported SIMD type"; 1033 UNREACHABLE(); 1034 } 1035 } 1036 1037 void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { 1038 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); 1039 } 1040 1041 void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { 1042 // TODO: pmaddwd? 1043 LOG(FATAL) << "No SIMD for " << instruction->GetId(); 1044 } 1045 1046 void LocationsBuilderX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { 1047 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); 1048 } 1049 1050 void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { 1051 // TODO: psadbw for unsigned? 1052 LOG(FATAL) << "No SIMD for " << instruction->GetId(); 1053 } 1054 1055 // Helper to set up locations for vector memory operations. 1056 static void CreateVecMemLocations(ArenaAllocator* allocator, 1057 HVecMemoryOperation* instruction, 1058 bool is_load) { 1059 LocationSummary* locations = new (allocator) LocationSummary(instruction); 1060 switch (instruction->GetPackedType()) { 1061 case DataType::Type::kBool: 1062 case DataType::Type::kUint8: 1063 case DataType::Type::kInt8: 1064 case DataType::Type::kUint16: 1065 case DataType::Type::kInt16: 1066 case DataType::Type::kInt32: 1067 case DataType::Type::kInt64: 1068 case DataType::Type::kFloat32: 1069 case DataType::Type::kFloat64: 1070 locations->SetInAt(0, Location::RequiresRegister()); 1071 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 1072 if (is_load) { 1073 locations->SetOut(Location::RequiresFpuRegister()); 1074 } else { 1075 locations->SetInAt(2, Location::RequiresFpuRegister()); 1076 } 1077 break; 1078 default: 1079 LOG(FATAL) << "Unsupported SIMD type"; 1080 UNREACHABLE(); 1081 } 1082 } 1083 1084 // Helper to construct address for vector memory operations. 1085 static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) { 1086 Location base = locations->InAt(0); 1087 Location index = locations->InAt(1); 1088 ScaleFactor scale = TIMES_1; 1089 switch (size) { 1090 case 2: scale = TIMES_2; break; 1091 case 4: scale = TIMES_4; break; 1092 case 8: scale = TIMES_8; break; 1093 default: break; 1094 } 1095 // Incorporate the string or array offset in the address computation. 1096 uint32_t offset = is_string_char_at 1097 ? mirror::String::ValueOffset().Uint32Value() 1098 : mirror::Array::DataOffset(size).Uint32Value(); 1099 return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset); 1100 } 1101 1102 void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) { 1103 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true); 1104 // String load requires a temporary for the compressed load. 1105 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 1106 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 1107 } 1108 } 1109 1110 void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) { 1111 LocationSummary* locations = instruction->GetLocations(); 1112 size_t size = DataType::Size(instruction->GetPackedType()); 1113 Address address = VecAddress(locations, size, instruction->IsStringCharAt()); 1114 XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); 1115 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); 1116 switch (instruction->GetPackedType()) { 1117 case DataType::Type::kUint16: 1118 DCHECK_EQ(8u, instruction->GetVectorLength()); 1119 // Special handling of compressed/uncompressed string load. 1120 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 1121 NearLabel done, not_compressed; 1122 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 1123 // Test compression bit. 1124 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1125 "Expecting 0=compressed, 1=uncompressed"); 1126 uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 1127 __ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1)); 1128 __ j(kNotZero, ¬_compressed); 1129 // Zero extend 8 compressed bytes into 8 chars. 1130 __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt())); 1131 __ pxor(tmp, tmp); 1132 __ punpcklbw(reg, tmp); 1133 __ jmp(&done); 1134 // Load 8 direct uncompressed chars. 1135 __ Bind(¬_compressed); 1136 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); 1137 __ Bind(&done); 1138 return; 1139 } 1140 FALLTHROUGH_INTENDED; 1141 case DataType::Type::kBool: 1142 case DataType::Type::kUint8: 1143 case DataType::Type::kInt8: 1144 case DataType::Type::kInt16: 1145 case DataType::Type::kInt32: 1146 case DataType::Type::kInt64: 1147 DCHECK_LE(2u, instruction->GetVectorLength()); 1148 DCHECK_LE(instruction->GetVectorLength(), 16u); 1149 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); 1150 break; 1151 case DataType::Type::kFloat32: 1152 DCHECK_EQ(4u, instruction->GetVectorLength()); 1153 is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address); 1154 break; 1155 case DataType::Type::kFloat64: 1156 DCHECK_EQ(2u, instruction->GetVectorLength()); 1157 is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address); 1158 break; 1159 default: 1160 LOG(FATAL) << "Unsupported SIMD type"; 1161 UNREACHABLE(); 1162 } 1163 } 1164 1165 void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) { 1166 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false); 1167 } 1168 1169 void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) { 1170 LocationSummary* locations = instruction->GetLocations(); 1171 size_t size = DataType::Size(instruction->GetPackedType()); 1172 Address address = VecAddress(locations, size, /*is_string_char_at*/ false); 1173 XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>(); 1174 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); 1175 switch (instruction->GetPackedType()) { 1176 case DataType::Type::kBool: 1177 case DataType::Type::kUint8: 1178 case DataType::Type::kInt8: 1179 case DataType::Type::kUint16: 1180 case DataType::Type::kInt16: 1181 case DataType::Type::kInt32: 1182 case DataType::Type::kInt64: 1183 DCHECK_LE(2u, instruction->GetVectorLength()); 1184 DCHECK_LE(instruction->GetVectorLength(), 16u); 1185 is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg); 1186 break; 1187 case DataType::Type::kFloat32: 1188 DCHECK_EQ(4u, instruction->GetVectorLength()); 1189 is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg); 1190 break; 1191 case DataType::Type::kFloat64: 1192 DCHECK_EQ(2u, instruction->GetVectorLength()); 1193 is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg); 1194 break; 1195 default: 1196 LOG(FATAL) << "Unsupported SIMD type"; 1197 UNREACHABLE(); 1198 } 1199 } 1200 1201 #undef __ 1202 1203 } // namespace x86_64 1204 } // namespace art 1205