1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "code_generator_x86.h" 18 19 #include "mirror/array-inl.h" 20 #include "mirror/string.h" 21 22 namespace art { 23 namespace x86 { 24 25 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 26 #define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT 27 28 void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { 29 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 30 HInstruction* input = instruction->InputAt(0); 31 bool is_zero = IsZeroBitPattern(input); 32 switch (instruction->GetPackedType()) { 33 case DataType::Type::kInt64: 34 // Long needs extra temporary to load from the register pair. 35 if (!is_zero) { 36 locations->AddTemp(Location::RequiresFpuRegister()); 37 } 38 FALLTHROUGH_INTENDED; 39 case DataType::Type::kBool: 40 case DataType::Type::kUint8: 41 case DataType::Type::kInt8: 42 case DataType::Type::kUint16: 43 case DataType::Type::kInt16: 44 case DataType::Type::kInt32: 45 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) 46 : Location::RequiresRegister()); 47 locations->SetOut(Location::RequiresFpuRegister()); 48 break; 49 case DataType::Type::kFloat32: 50 case DataType::Type::kFloat64: 51 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) 52 : Location::RequiresFpuRegister()); 53 locations->SetOut(is_zero ? Location::RequiresFpuRegister() 54 : Location::SameAsFirstInput()); 55 break; 56 default: 57 LOG(FATAL) << "Unsupported SIMD type"; 58 UNREACHABLE(); 59 } 60 } 61 62 void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { 63 LocationSummary* locations = instruction->GetLocations(); 64 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 65 66 // Shorthand for any type of zero. 67 if (IsZeroBitPattern(instruction->InputAt(0))) { 68 __ xorps(dst, dst); 69 return; 70 } 71 72 switch (instruction->GetPackedType()) { 73 case DataType::Type::kBool: 74 case DataType::Type::kUint8: 75 case DataType::Type::kInt8: 76 DCHECK_EQ(16u, instruction->GetVectorLength()); 77 __ movd(dst, locations->InAt(0).AsRegister<Register>()); 78 __ punpcklbw(dst, dst); 79 __ punpcklwd(dst, dst); 80 __ pshufd(dst, dst, Immediate(0)); 81 break; 82 case DataType::Type::kUint16: 83 case DataType::Type::kInt16: 84 DCHECK_EQ(8u, instruction->GetVectorLength()); 85 __ movd(dst, locations->InAt(0).AsRegister<Register>()); 86 __ punpcklwd(dst, dst); 87 __ pshufd(dst, dst, Immediate(0)); 88 break; 89 case DataType::Type::kInt32: 90 DCHECK_EQ(4u, instruction->GetVectorLength()); 91 __ movd(dst, locations->InAt(0).AsRegister<Register>()); 92 __ pshufd(dst, dst, Immediate(0)); 93 break; 94 case DataType::Type::kInt64: { 95 DCHECK_EQ(2u, instruction->GetVectorLength()); 96 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 97 __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>()); 98 __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>()); 99 __ punpckldq(dst, tmp); 100 __ punpcklqdq(dst, dst); 101 break; 102 } 103 case DataType::Type::kFloat32: 104 DCHECK_EQ(4u, instruction->GetVectorLength()); 105 DCHECK(locations->InAt(0).Equals(locations->Out())); 106 __ shufps(dst, dst, Immediate(0)); 107 break; 108 case DataType::Type::kFloat64: 109 DCHECK_EQ(2u, instruction->GetVectorLength()); 110 DCHECK(locations->InAt(0).Equals(locations->Out())); 111 __ shufpd(dst, dst, Immediate(0)); 112 break; 113 default: 114 LOG(FATAL) << "Unsupported SIMD type"; 115 UNREACHABLE(); 116 } 117 } 118 119 void LocationsBuilderX86::VisitVecExtractScalar(HVecExtractScalar* instruction) { 120 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 121 switch (instruction->GetPackedType()) { 122 case DataType::Type::kInt64: 123 // Long needs extra temporary to store into the register pair. 124 locations->AddTemp(Location::RequiresFpuRegister()); 125 FALLTHROUGH_INTENDED; 126 case DataType::Type::kBool: 127 case DataType::Type::kUint8: 128 case DataType::Type::kInt8: 129 case DataType::Type::kUint16: 130 case DataType::Type::kInt16: 131 case DataType::Type::kInt32: 132 locations->SetInAt(0, Location::RequiresFpuRegister()); 133 locations->SetOut(Location::RequiresRegister()); 134 break; 135 case DataType::Type::kFloat32: 136 case DataType::Type::kFloat64: 137 locations->SetInAt(0, Location::RequiresFpuRegister()); 138 locations->SetOut(Location::SameAsFirstInput()); 139 break; 140 default: 141 LOG(FATAL) << "Unsupported SIMD type"; 142 UNREACHABLE(); 143 } 144 } 145 146 void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instruction) { 147 LocationSummary* locations = instruction->GetLocations(); 148 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); 149 switch (instruction->GetPackedType()) { 150 case DataType::Type::kBool: 151 case DataType::Type::kUint8: 152 case DataType::Type::kInt8: 153 case DataType::Type::kUint16: 154 case DataType::Type::kInt16: // TODO: up to here, and? 155 LOG(FATAL) << "Unsupported SIMD type"; 156 UNREACHABLE(); 157 case DataType::Type::kInt32: 158 DCHECK_LE(4u, instruction->GetVectorLength()); 159 DCHECK_LE(instruction->GetVectorLength(), 16u); 160 __ movd(locations->Out().AsRegister<Register>(), src); 161 break; 162 case DataType::Type::kInt64: { 163 DCHECK_EQ(2u, instruction->GetVectorLength()); 164 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 165 __ movd(locations->Out().AsRegisterPairLow<Register>(), src); 166 __ pshufd(tmp, src, Immediate(1)); 167 __ movd(locations->Out().AsRegisterPairHigh<Register>(), tmp); 168 break; 169 } 170 case DataType::Type::kFloat32: 171 case DataType::Type::kFloat64: 172 DCHECK_LE(2u, instruction->GetVectorLength()); 173 DCHECK_LE(instruction->GetVectorLength(), 4u); 174 DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required 175 break; 176 default: 177 LOG(FATAL) << "Unsupported SIMD type"; 178 UNREACHABLE(); 179 } 180 } 181 182 // Helper to set up locations for vector unary operations. 183 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) { 184 LocationSummary* locations = new (allocator) LocationSummary(instruction); 185 switch (instruction->GetPackedType()) { 186 case DataType::Type::kBool: 187 case DataType::Type::kUint8: 188 case DataType::Type::kInt8: 189 case DataType::Type::kUint16: 190 case DataType::Type::kInt16: 191 case DataType::Type::kInt32: 192 case DataType::Type::kInt64: 193 case DataType::Type::kFloat32: 194 case DataType::Type::kFloat64: 195 locations->SetInAt(0, Location::RequiresFpuRegister()); 196 locations->SetOut(Location::RequiresFpuRegister()); 197 break; 198 default: 199 LOG(FATAL) << "Unsupported SIMD type"; 200 UNREACHABLE(); 201 } 202 } 203 204 void LocationsBuilderX86::VisitVecReduce(HVecReduce* instruction) { 205 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); 206 // Long reduction or min/max require a temporary. 207 if (instruction->GetPackedType() == DataType::Type::kInt64 || 208 instruction->GetKind() == HVecReduce::kMin || 209 instruction->GetKind() == HVecReduce::kMax) { 210 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 211 } 212 } 213 214 void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) { 215 LocationSummary* locations = instruction->GetLocations(); 216 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); 217 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 218 switch (instruction->GetPackedType()) { 219 case DataType::Type::kInt32: 220 DCHECK_EQ(4u, instruction->GetVectorLength()); 221 switch (instruction->GetKind()) { 222 case HVecReduce::kSum: 223 __ movaps(dst, src); 224 __ phaddd(dst, dst); 225 __ phaddd(dst, dst); 226 break; 227 case HVecReduce::kMin: { 228 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 229 __ movaps(tmp, src); 230 __ movaps(dst, src); 231 __ psrldq(tmp, Immediate(8)); 232 __ pminsd(dst, tmp); 233 __ psrldq(tmp, Immediate(4)); 234 __ pminsd(dst, tmp); 235 break; 236 } 237 case HVecReduce::kMax: { 238 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 239 __ movaps(tmp, src); 240 __ movaps(dst, src); 241 __ psrldq(tmp, Immediate(8)); 242 __ pmaxsd(dst, tmp); 243 __ psrldq(tmp, Immediate(4)); 244 __ pmaxsd(dst, tmp); 245 break; 246 } 247 } 248 break; 249 case DataType::Type::kInt64: { 250 DCHECK_EQ(2u, instruction->GetVectorLength()); 251 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 252 switch (instruction->GetKind()) { 253 case HVecReduce::kSum: 254 __ movaps(tmp, src); 255 __ movaps(dst, src); 256 __ punpckhqdq(tmp, tmp); 257 __ paddq(dst, tmp); 258 break; 259 case HVecReduce::kMin: 260 case HVecReduce::kMax: 261 LOG(FATAL) << "Unsupported SIMD type"; 262 } 263 break; 264 } 265 default: 266 LOG(FATAL) << "Unsupported SIMD type"; 267 UNREACHABLE(); 268 } 269 } 270 271 void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) { 272 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); 273 } 274 275 void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) { 276 LocationSummary* locations = instruction->GetLocations(); 277 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); 278 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 279 DataType::Type from = instruction->GetInputType(); 280 DataType::Type to = instruction->GetResultType(); 281 if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) { 282 DCHECK_EQ(4u, instruction->GetVectorLength()); 283 __ cvtdq2ps(dst, src); 284 } else { 285 LOG(FATAL) << "Unsupported SIMD type"; 286 } 287 } 288 289 void LocationsBuilderX86::VisitVecNeg(HVecNeg* instruction) { 290 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); 291 } 292 293 void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) { 294 LocationSummary* locations = instruction->GetLocations(); 295 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); 296 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 297 switch (instruction->GetPackedType()) { 298 case DataType::Type::kUint8: 299 case DataType::Type::kInt8: 300 DCHECK_EQ(16u, instruction->GetVectorLength()); 301 __ pxor(dst, dst); 302 __ psubb(dst, src); 303 break; 304 case DataType::Type::kUint16: 305 case DataType::Type::kInt16: 306 DCHECK_EQ(8u, instruction->GetVectorLength()); 307 __ pxor(dst, dst); 308 __ psubw(dst, src); 309 break; 310 case DataType::Type::kInt32: 311 DCHECK_EQ(4u, instruction->GetVectorLength()); 312 __ pxor(dst, dst); 313 __ psubd(dst, src); 314 break; 315 case DataType::Type::kInt64: 316 DCHECK_EQ(2u, instruction->GetVectorLength()); 317 __ pxor(dst, dst); 318 __ psubq(dst, src); 319 break; 320 case DataType::Type::kFloat32: 321 DCHECK_EQ(4u, instruction->GetVectorLength()); 322 __ xorps(dst, dst); 323 __ subps(dst, src); 324 break; 325 case DataType::Type::kFloat64: 326 DCHECK_EQ(2u, instruction->GetVectorLength()); 327 __ xorpd(dst, dst); 328 __ subpd(dst, src); 329 break; 330 default: 331 LOG(FATAL) << "Unsupported SIMD type"; 332 UNREACHABLE(); 333 } 334 } 335 336 void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) { 337 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); 338 // Integral-abs requires a temporary for the comparison. 339 if (instruction->GetPackedType() == DataType::Type::kInt32) { 340 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 341 } 342 } 343 344 void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) { 345 LocationSummary* locations = instruction->GetLocations(); 346 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); 347 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 348 switch (instruction->GetPackedType()) { 349 case DataType::Type::kInt32: { 350 DCHECK_EQ(4u, instruction->GetVectorLength()); 351 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 352 __ movaps(dst, src); 353 __ pxor(tmp, tmp); 354 __ pcmpgtd(tmp, dst); 355 __ pxor(dst, tmp); 356 __ psubd(dst, tmp); 357 break; 358 } 359 case DataType::Type::kFloat32: 360 DCHECK_EQ(4u, instruction->GetVectorLength()); 361 __ pcmpeqb(dst, dst); // all ones 362 __ psrld(dst, Immediate(1)); 363 __ andps(dst, src); 364 break; 365 case DataType::Type::kFloat64: 366 DCHECK_EQ(2u, instruction->GetVectorLength()); 367 __ pcmpeqb(dst, dst); // all ones 368 __ psrlq(dst, Immediate(1)); 369 __ andpd(dst, src); 370 break; 371 default: 372 LOG(FATAL) << "Unsupported SIMD type"; 373 UNREACHABLE(); 374 } 375 } 376 377 void LocationsBuilderX86::VisitVecNot(HVecNot* instruction) { 378 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); 379 // Boolean-not requires a temporary to construct the 16 x one. 380 if (instruction->GetPackedType() == DataType::Type::kBool) { 381 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 382 } 383 } 384 385 void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) { 386 LocationSummary* locations = instruction->GetLocations(); 387 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); 388 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 389 switch (instruction->GetPackedType()) { 390 case DataType::Type::kBool: { // special case boolean-not 391 DCHECK_EQ(16u, instruction->GetVectorLength()); 392 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 393 __ pxor(dst, dst); 394 __ pcmpeqb(tmp, tmp); // all ones 395 __ psubb(dst, tmp); // 16 x one 396 __ pxor(dst, src); 397 break; 398 } 399 case DataType::Type::kUint8: 400 case DataType::Type::kInt8: 401 case DataType::Type::kUint16: 402 case DataType::Type::kInt16: 403 case DataType::Type::kInt32: 404 case DataType::Type::kInt64: 405 DCHECK_LE(2u, instruction->GetVectorLength()); 406 DCHECK_LE(instruction->GetVectorLength(), 16u); 407 __ pcmpeqb(dst, dst); // all ones 408 __ pxor(dst, src); 409 break; 410 case DataType::Type::kFloat32: 411 DCHECK_EQ(4u, instruction->GetVectorLength()); 412 __ pcmpeqb(dst, dst); // all ones 413 __ xorps(dst, src); 414 break; 415 case DataType::Type::kFloat64: 416 DCHECK_EQ(2u, instruction->GetVectorLength()); 417 __ pcmpeqb(dst, dst); // all ones 418 __ xorpd(dst, src); 419 break; 420 default: 421 LOG(FATAL) << "Unsupported SIMD type"; 422 UNREACHABLE(); 423 } 424 } 425 426 // Helper to set up locations for vector binary operations. 427 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { 428 LocationSummary* locations = new (allocator) LocationSummary(instruction); 429 switch (instruction->GetPackedType()) { 430 case DataType::Type::kBool: 431 case DataType::Type::kUint8: 432 case DataType::Type::kInt8: 433 case DataType::Type::kUint16: 434 case DataType::Type::kInt16: 435 case DataType::Type::kInt32: 436 case DataType::Type::kInt64: 437 case DataType::Type::kFloat32: 438 case DataType::Type::kFloat64: 439 locations->SetInAt(0, Location::RequiresFpuRegister()); 440 locations->SetInAt(1, Location::RequiresFpuRegister()); 441 locations->SetOut(Location::SameAsFirstInput()); 442 break; 443 default: 444 LOG(FATAL) << "Unsupported SIMD type"; 445 UNREACHABLE(); 446 } 447 } 448 449 void LocationsBuilderX86::VisitVecAdd(HVecAdd* instruction) { 450 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 451 } 452 453 void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) { 454 LocationSummary* locations = instruction->GetLocations(); 455 DCHECK(locations->InAt(0).Equals(locations->Out())); 456 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 457 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 458 switch (instruction->GetPackedType()) { 459 case DataType::Type::kUint8: 460 case DataType::Type::kInt8: 461 DCHECK_EQ(16u, instruction->GetVectorLength()); 462 __ paddb(dst, src); 463 break; 464 case DataType::Type::kUint16: 465 case DataType::Type::kInt16: 466 DCHECK_EQ(8u, instruction->GetVectorLength()); 467 __ paddw(dst, src); 468 break; 469 case DataType::Type::kInt32: 470 DCHECK_EQ(4u, instruction->GetVectorLength()); 471 __ paddd(dst, src); 472 break; 473 case DataType::Type::kInt64: 474 DCHECK_EQ(2u, instruction->GetVectorLength()); 475 __ paddq(dst, src); 476 break; 477 case DataType::Type::kFloat32: 478 DCHECK_EQ(4u, instruction->GetVectorLength()); 479 __ addps(dst, src); 480 break; 481 case DataType::Type::kFloat64: 482 DCHECK_EQ(2u, instruction->GetVectorLength()); 483 __ addpd(dst, src); 484 break; 485 default: 486 LOG(FATAL) << "Unsupported SIMD type"; 487 UNREACHABLE(); 488 } 489 } 490 491 void LocationsBuilderX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { 492 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 493 } 494 495 void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { 496 LocationSummary* locations = instruction->GetLocations(); 497 DCHECK(locations->InAt(0).Equals(locations->Out())); 498 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 499 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 500 501 DCHECK(instruction->IsRounded()); 502 503 switch (instruction->GetPackedType()) { 504 case DataType::Type::kUint8: 505 DCHECK_EQ(16u, instruction->GetVectorLength()); 506 __ pavgb(dst, src); 507 return; 508 case DataType::Type::kUint16: 509 DCHECK_EQ(8u, instruction->GetVectorLength()); 510 __ pavgw(dst, src); 511 return; 512 default: 513 LOG(FATAL) << "Unsupported SIMD type"; 514 UNREACHABLE(); 515 } 516 } 517 518 void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) { 519 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 520 } 521 522 void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) { 523 LocationSummary* locations = instruction->GetLocations(); 524 DCHECK(locations->InAt(0).Equals(locations->Out())); 525 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 526 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 527 switch (instruction->GetPackedType()) { 528 case DataType::Type::kUint8: 529 case DataType::Type::kInt8: 530 DCHECK_EQ(16u, instruction->GetVectorLength()); 531 __ psubb(dst, src); 532 break; 533 case DataType::Type::kUint16: 534 case DataType::Type::kInt16: 535 DCHECK_EQ(8u, instruction->GetVectorLength()); 536 __ psubw(dst, src); 537 break; 538 case DataType::Type::kInt32: 539 DCHECK_EQ(4u, instruction->GetVectorLength()); 540 __ psubd(dst, src); 541 break; 542 case DataType::Type::kInt64: 543 DCHECK_EQ(2u, instruction->GetVectorLength()); 544 __ psubq(dst, src); 545 break; 546 case DataType::Type::kFloat32: 547 DCHECK_EQ(4u, instruction->GetVectorLength()); 548 __ subps(dst, src); 549 break; 550 case DataType::Type::kFloat64: 551 DCHECK_EQ(2u, instruction->GetVectorLength()); 552 __ subpd(dst, src); 553 break; 554 default: 555 LOG(FATAL) << "Unsupported SIMD type"; 556 UNREACHABLE(); 557 } 558 } 559 560 void LocationsBuilderX86::VisitVecMul(HVecMul* instruction) { 561 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 562 } 563 564 void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) { 565 LocationSummary* locations = instruction->GetLocations(); 566 DCHECK(locations->InAt(0).Equals(locations->Out())); 567 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 568 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 569 switch (instruction->GetPackedType()) { 570 case DataType::Type::kUint16: 571 case DataType::Type::kInt16: 572 DCHECK_EQ(8u, instruction->GetVectorLength()); 573 __ pmullw(dst, src); 574 break; 575 case DataType::Type::kInt32: 576 DCHECK_EQ(4u, instruction->GetVectorLength()); 577 __ pmulld(dst, src); 578 break; 579 case DataType::Type::kFloat32: 580 DCHECK_EQ(4u, instruction->GetVectorLength()); 581 __ mulps(dst, src); 582 break; 583 case DataType::Type::kFloat64: 584 DCHECK_EQ(2u, instruction->GetVectorLength()); 585 __ mulpd(dst, src); 586 break; 587 default: 588 LOG(FATAL) << "Unsupported SIMD type"; 589 UNREACHABLE(); 590 } 591 } 592 593 void LocationsBuilderX86::VisitVecDiv(HVecDiv* instruction) { 594 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 595 } 596 597 void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) { 598 LocationSummary* locations = instruction->GetLocations(); 599 DCHECK(locations->InAt(0).Equals(locations->Out())); 600 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 601 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 602 switch (instruction->GetPackedType()) { 603 case DataType::Type::kFloat32: 604 DCHECK_EQ(4u, instruction->GetVectorLength()); 605 __ divps(dst, src); 606 break; 607 case DataType::Type::kFloat64: 608 DCHECK_EQ(2u, instruction->GetVectorLength()); 609 __ divpd(dst, src); 610 break; 611 default: 612 LOG(FATAL) << "Unsupported SIMD type"; 613 UNREACHABLE(); 614 } 615 } 616 617 void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) { 618 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 619 } 620 621 void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) { 622 LocationSummary* locations = instruction->GetLocations(); 623 DCHECK(locations->InAt(0).Equals(locations->Out())); 624 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 625 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 626 switch (instruction->GetPackedType()) { 627 case DataType::Type::kUint8: 628 DCHECK_EQ(16u, instruction->GetVectorLength()); 629 __ pminub(dst, src); 630 break; 631 case DataType::Type::kInt8: 632 DCHECK_EQ(16u, instruction->GetVectorLength()); 633 __ pminsb(dst, src); 634 break; 635 case DataType::Type::kUint16: 636 DCHECK_EQ(8u, instruction->GetVectorLength()); 637 __ pminuw(dst, src); 638 break; 639 case DataType::Type::kInt16: 640 DCHECK_EQ(8u, instruction->GetVectorLength()); 641 __ pminsw(dst, src); 642 break; 643 case DataType::Type::kUint32: 644 DCHECK_EQ(4u, instruction->GetVectorLength()); 645 __ pminud(dst, src); 646 break; 647 case DataType::Type::kInt32: 648 DCHECK_EQ(4u, instruction->GetVectorLength()); 649 __ pminsd(dst, src); 650 break; 651 // Next cases are sloppy wrt 0.0 vs -0.0. 652 case DataType::Type::kFloat32: 653 DCHECK_EQ(4u, instruction->GetVectorLength()); 654 __ minps(dst, src); 655 break; 656 case DataType::Type::kFloat64: 657 DCHECK_EQ(2u, instruction->GetVectorLength()); 658 __ minpd(dst, src); 659 break; 660 default: 661 LOG(FATAL) << "Unsupported SIMD type"; 662 UNREACHABLE(); 663 } 664 } 665 666 void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) { 667 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 668 } 669 670 void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) { 671 LocationSummary* locations = instruction->GetLocations(); 672 DCHECK(locations->InAt(0).Equals(locations->Out())); 673 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 674 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 675 switch (instruction->GetPackedType()) { 676 case DataType::Type::kUint8: 677 DCHECK_EQ(16u, instruction->GetVectorLength()); 678 __ pmaxub(dst, src); 679 break; 680 case DataType::Type::kInt8: 681 DCHECK_EQ(16u, instruction->GetVectorLength()); 682 __ pmaxsb(dst, src); 683 break; 684 case DataType::Type::kUint16: 685 DCHECK_EQ(8u, instruction->GetVectorLength()); 686 __ pmaxuw(dst, src); 687 break; 688 case DataType::Type::kInt16: 689 DCHECK_EQ(8u, instruction->GetVectorLength()); 690 __ pmaxsw(dst, src); 691 break; 692 case DataType::Type::kUint32: 693 DCHECK_EQ(4u, instruction->GetVectorLength()); 694 __ pmaxud(dst, src); 695 break; 696 case DataType::Type::kInt32: 697 DCHECK_EQ(4u, instruction->GetVectorLength()); 698 __ pmaxsd(dst, src); 699 break; 700 // Next cases are sloppy wrt 0.0 vs -0.0. 701 case DataType::Type::kFloat32: 702 DCHECK_EQ(4u, instruction->GetVectorLength()); 703 __ maxps(dst, src); 704 break; 705 case DataType::Type::kFloat64: 706 DCHECK_EQ(2u, instruction->GetVectorLength()); 707 __ maxpd(dst, src); 708 break; 709 default: 710 LOG(FATAL) << "Unsupported SIMD type"; 711 UNREACHABLE(); 712 } 713 } 714 715 void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) { 716 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 717 } 718 719 void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) { 720 LocationSummary* locations = instruction->GetLocations(); 721 DCHECK(locations->InAt(0).Equals(locations->Out())); 722 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 723 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 724 switch (instruction->GetPackedType()) { 725 case DataType::Type::kBool: 726 case DataType::Type::kUint8: 727 case DataType::Type::kInt8: 728 case DataType::Type::kUint16: 729 case DataType::Type::kInt16: 730 case DataType::Type::kInt32: 731 case DataType::Type::kInt64: 732 DCHECK_LE(2u, instruction->GetVectorLength()); 733 DCHECK_LE(instruction->GetVectorLength(), 16u); 734 __ pand(dst, src); 735 break; 736 case DataType::Type::kFloat32: 737 DCHECK_EQ(4u, instruction->GetVectorLength()); 738 __ andps(dst, src); 739 break; 740 case DataType::Type::kFloat64: 741 DCHECK_EQ(2u, instruction->GetVectorLength()); 742 __ andpd(dst, src); 743 break; 744 default: 745 LOG(FATAL) << "Unsupported SIMD type"; 746 UNREACHABLE(); 747 } 748 } 749 750 void LocationsBuilderX86::VisitVecAndNot(HVecAndNot* instruction) { 751 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 752 } 753 754 void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) { 755 LocationSummary* locations = instruction->GetLocations(); 756 DCHECK(locations->InAt(0).Equals(locations->Out())); 757 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 758 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 759 switch (instruction->GetPackedType()) { 760 case DataType::Type::kBool: 761 case DataType::Type::kUint8: 762 case DataType::Type::kInt8: 763 case DataType::Type::kUint16: 764 case DataType::Type::kInt16: 765 case DataType::Type::kInt32: 766 case DataType::Type::kInt64: 767 DCHECK_LE(2u, instruction->GetVectorLength()); 768 DCHECK_LE(instruction->GetVectorLength(), 16u); 769 __ pandn(dst, src); 770 break; 771 case DataType::Type::kFloat32: 772 DCHECK_EQ(4u, instruction->GetVectorLength()); 773 __ andnps(dst, src); 774 break; 775 case DataType::Type::kFloat64: 776 DCHECK_EQ(2u, instruction->GetVectorLength()); 777 __ andnpd(dst, src); 778 break; 779 default: 780 LOG(FATAL) << "Unsupported SIMD type"; 781 UNREACHABLE(); 782 } 783 } 784 785 void LocationsBuilderX86::VisitVecOr(HVecOr* instruction) { 786 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 787 } 788 789 void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) { 790 LocationSummary* locations = instruction->GetLocations(); 791 DCHECK(locations->InAt(0).Equals(locations->Out())); 792 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 793 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 794 switch (instruction->GetPackedType()) { 795 case DataType::Type::kBool: 796 case DataType::Type::kUint8: 797 case DataType::Type::kInt8: 798 case DataType::Type::kUint16: 799 case DataType::Type::kInt16: 800 case DataType::Type::kInt32: 801 case DataType::Type::kInt64: 802 DCHECK_LE(2u, instruction->GetVectorLength()); 803 DCHECK_LE(instruction->GetVectorLength(), 16u); 804 __ por(dst, src); 805 break; 806 case DataType::Type::kFloat32: 807 DCHECK_EQ(4u, instruction->GetVectorLength()); 808 __ orps(dst, src); 809 break; 810 case DataType::Type::kFloat64: 811 DCHECK_EQ(2u, instruction->GetVectorLength()); 812 __ orpd(dst, src); 813 break; 814 default: 815 LOG(FATAL) << "Unsupported SIMD type"; 816 UNREACHABLE(); 817 } 818 } 819 820 void LocationsBuilderX86::VisitVecXor(HVecXor* instruction) { 821 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); 822 } 823 824 void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) { 825 LocationSummary* locations = instruction->GetLocations(); 826 DCHECK(locations->InAt(0).Equals(locations->Out())); 827 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); 828 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 829 switch (instruction->GetPackedType()) { 830 case DataType::Type::kBool: 831 case DataType::Type::kUint8: 832 case DataType::Type::kInt8: 833 case DataType::Type::kUint16: 834 case DataType::Type::kInt16: 835 case DataType::Type::kInt32: 836 case DataType::Type::kInt64: 837 DCHECK_LE(2u, instruction->GetVectorLength()); 838 DCHECK_LE(instruction->GetVectorLength(), 16u); 839 __ pxor(dst, src); 840 break; 841 case DataType::Type::kFloat32: 842 DCHECK_EQ(4u, instruction->GetVectorLength()); 843 __ xorps(dst, src); 844 break; 845 case DataType::Type::kFloat64: 846 DCHECK_EQ(2u, instruction->GetVectorLength()); 847 __ xorpd(dst, src); 848 break; 849 default: 850 LOG(FATAL) << "Unsupported SIMD type"; 851 UNREACHABLE(); 852 } 853 } 854 855 // Helper to set up locations for vector shift operations. 856 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { 857 LocationSummary* locations = new (allocator) LocationSummary(instruction); 858 switch (instruction->GetPackedType()) { 859 case DataType::Type::kUint16: 860 case DataType::Type::kInt16: 861 case DataType::Type::kInt32: 862 case DataType::Type::kInt64: 863 locations->SetInAt(0, Location::RequiresFpuRegister()); 864 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); 865 locations->SetOut(Location::SameAsFirstInput()); 866 break; 867 default: 868 LOG(FATAL) << "Unsupported SIMD type"; 869 UNREACHABLE(); 870 } 871 } 872 873 void LocationsBuilderX86::VisitVecShl(HVecShl* instruction) { 874 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); 875 } 876 877 void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) { 878 LocationSummary* locations = instruction->GetLocations(); 879 DCHECK(locations->InAt(0).Equals(locations->Out())); 880 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 881 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 882 switch (instruction->GetPackedType()) { 883 case DataType::Type::kUint16: 884 case DataType::Type::kInt16: 885 DCHECK_EQ(8u, instruction->GetVectorLength()); 886 __ psllw(dst, Immediate(static_cast<uint8_t>(value))); 887 break; 888 case DataType::Type::kInt32: 889 DCHECK_EQ(4u, instruction->GetVectorLength()); 890 __ pslld(dst, Immediate(static_cast<uint8_t>(value))); 891 break; 892 case DataType::Type::kInt64: 893 DCHECK_EQ(2u, instruction->GetVectorLength()); 894 __ psllq(dst, Immediate(static_cast<uint8_t>(value))); 895 break; 896 default: 897 LOG(FATAL) << "Unsupported SIMD type"; 898 UNREACHABLE(); 899 } 900 } 901 902 void LocationsBuilderX86::VisitVecShr(HVecShr* instruction) { 903 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); 904 } 905 906 void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) { 907 LocationSummary* locations = instruction->GetLocations(); 908 DCHECK(locations->InAt(0).Equals(locations->Out())); 909 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 910 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 911 switch (instruction->GetPackedType()) { 912 case DataType::Type::kUint16: 913 case DataType::Type::kInt16: 914 DCHECK_EQ(8u, instruction->GetVectorLength()); 915 __ psraw(dst, Immediate(static_cast<uint8_t>(value))); 916 break; 917 case DataType::Type::kInt32: 918 DCHECK_EQ(4u, instruction->GetVectorLength()); 919 __ psrad(dst, Immediate(static_cast<uint8_t>(value))); 920 break; 921 default: 922 LOG(FATAL) << "Unsupported SIMD type"; 923 UNREACHABLE(); 924 } 925 } 926 927 void LocationsBuilderX86::VisitVecUShr(HVecUShr* instruction) { 928 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); 929 } 930 931 void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) { 932 LocationSummary* locations = instruction->GetLocations(); 933 DCHECK(locations->InAt(0).Equals(locations->Out())); 934 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); 935 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 936 switch (instruction->GetPackedType()) { 937 case DataType::Type::kUint16: 938 case DataType::Type::kInt16: 939 DCHECK_EQ(8u, instruction->GetVectorLength()); 940 __ psrlw(dst, Immediate(static_cast<uint8_t>(value))); 941 break; 942 case DataType::Type::kInt32: 943 DCHECK_EQ(4u, instruction->GetVectorLength()); 944 __ psrld(dst, Immediate(static_cast<uint8_t>(value))); 945 break; 946 case DataType::Type::kInt64: 947 DCHECK_EQ(2u, instruction->GetVectorLength()); 948 __ psrlq(dst, Immediate(static_cast<uint8_t>(value))); 949 break; 950 default: 951 LOG(FATAL) << "Unsupported SIMD type"; 952 UNREACHABLE(); 953 } 954 } 955 956 void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) { 957 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 958 959 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented 960 961 HInstruction* input = instruction->InputAt(0); 962 bool is_zero = IsZeroBitPattern(input); 963 964 switch (instruction->GetPackedType()) { 965 case DataType::Type::kInt64: 966 // Long needs extra temporary to load from register pairs. 967 if (!is_zero) { 968 locations->AddTemp(Location::RequiresFpuRegister()); 969 } 970 FALLTHROUGH_INTENDED; 971 case DataType::Type::kBool: 972 case DataType::Type::kUint8: 973 case DataType::Type::kInt8: 974 case DataType::Type::kUint16: 975 case DataType::Type::kInt16: 976 case DataType::Type::kInt32: 977 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) 978 : Location::RequiresRegister()); 979 locations->SetOut(Location::RequiresFpuRegister()); 980 break; 981 case DataType::Type::kFloat32: 982 case DataType::Type::kFloat64: 983 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) 984 : Location::RequiresFpuRegister()); 985 locations->SetOut(Location::RequiresFpuRegister()); 986 break; 987 default: 988 LOG(FATAL) << "Unsupported SIMD type"; 989 UNREACHABLE(); 990 } 991 } 992 993 void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) { 994 LocationSummary* locations = instruction->GetLocations(); 995 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); 996 997 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented 998 999 // Zero out all other elements first. 1000 __ xorps(dst, dst); 1001 1002 // Shorthand for any type of zero. 1003 if (IsZeroBitPattern(instruction->InputAt(0))) { 1004 return; 1005 } 1006 1007 // Set required elements. 1008 switch (instruction->GetPackedType()) { 1009 case DataType::Type::kBool: 1010 case DataType::Type::kUint8: 1011 case DataType::Type::kInt8: 1012 case DataType::Type::kUint16: 1013 case DataType::Type::kInt16: // TODO: up to here, and? 1014 LOG(FATAL) << "Unsupported SIMD type"; 1015 UNREACHABLE(); 1016 case DataType::Type::kInt32: 1017 DCHECK_EQ(4u, instruction->GetVectorLength()); 1018 __ movd(dst, locations->InAt(0).AsRegister<Register>()); 1019 break; 1020 case DataType::Type::kInt64: { 1021 DCHECK_EQ(2u, instruction->GetVectorLength()); 1022 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 1023 __ xorps(tmp, tmp); 1024 __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>()); 1025 __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>()); 1026 __ punpckldq(dst, tmp); 1027 break; 1028 } 1029 case DataType::Type::kFloat32: 1030 DCHECK_EQ(4u, instruction->GetVectorLength()); 1031 __ movss(dst, locations->InAt(1).AsFpuRegister<XmmRegister>()); 1032 break; 1033 case DataType::Type::kFloat64: 1034 DCHECK_EQ(2u, instruction->GetVectorLength()); 1035 __ movsd(dst, locations->InAt(1).AsFpuRegister<XmmRegister>()); 1036 break; 1037 default: 1038 LOG(FATAL) << "Unsupported SIMD type"; 1039 UNREACHABLE(); 1040 } 1041 } 1042 1043 // Helper to set up locations for vector accumulations. 1044 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) { 1045 LocationSummary* locations = new (allocator) LocationSummary(instruction); 1046 switch (instruction->GetPackedType()) { 1047 case DataType::Type::kUint8: 1048 case DataType::Type::kInt8: 1049 case DataType::Type::kUint16: 1050 case DataType::Type::kInt16: 1051 case DataType::Type::kInt32: 1052 case DataType::Type::kInt64: 1053 locations->SetInAt(0, Location::RequiresFpuRegister()); 1054 locations->SetInAt(1, Location::RequiresFpuRegister()); 1055 locations->SetInAt(2, Location::RequiresFpuRegister()); 1056 locations->SetOut(Location::SameAsFirstInput()); 1057 break; 1058 default: 1059 LOG(FATAL) << "Unsupported SIMD type"; 1060 UNREACHABLE(); 1061 } 1062 } 1063 1064 void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { 1065 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); 1066 } 1067 1068 void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { 1069 // TODO: pmaddwd? 1070 LOG(FATAL) << "No SIMD for " << instruction->GetId(); 1071 } 1072 1073 void LocationsBuilderX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { 1074 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); 1075 } 1076 1077 void InstructionCodeGeneratorX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { 1078 // TODO: psadbw for unsigned? 1079 LOG(FATAL) << "No SIMD for " << instruction->GetId(); 1080 } 1081 1082 // Helper to set up locations for vector memory operations. 1083 static void CreateVecMemLocations(ArenaAllocator* allocator, 1084 HVecMemoryOperation* instruction, 1085 bool is_load) { 1086 LocationSummary* locations = new (allocator) LocationSummary(instruction); 1087 switch (instruction->GetPackedType()) { 1088 case DataType::Type::kBool: 1089 case DataType::Type::kUint8: 1090 case DataType::Type::kInt8: 1091 case DataType::Type::kUint16: 1092 case DataType::Type::kInt16: 1093 case DataType::Type::kInt32: 1094 case DataType::Type::kInt64: 1095 case DataType::Type::kFloat32: 1096 case DataType::Type::kFloat64: 1097 locations->SetInAt(0, Location::RequiresRegister()); 1098 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 1099 if (is_load) { 1100 locations->SetOut(Location::RequiresFpuRegister()); 1101 } else { 1102 locations->SetInAt(2, Location::RequiresFpuRegister()); 1103 } 1104 break; 1105 default: 1106 LOG(FATAL) << "Unsupported SIMD type"; 1107 UNREACHABLE(); 1108 } 1109 } 1110 1111 // Helper to construct address for vector memory operations. 1112 static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) { 1113 Location base = locations->InAt(0); 1114 Location index = locations->InAt(1); 1115 ScaleFactor scale = TIMES_1; 1116 switch (size) { 1117 case 2: scale = TIMES_2; break; 1118 case 4: scale = TIMES_4; break; 1119 case 8: scale = TIMES_8; break; 1120 default: break; 1121 } 1122 // Incorporate the string or array offset in the address computation. 1123 uint32_t offset = is_string_char_at 1124 ? mirror::String::ValueOffset().Uint32Value() 1125 : mirror::Array::DataOffset(size).Uint32Value(); 1126 return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset); 1127 } 1128 1129 void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) { 1130 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true); 1131 // String load requires a temporary for the compressed load. 1132 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 1133 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 1134 } 1135 } 1136 1137 void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) { 1138 LocationSummary* locations = instruction->GetLocations(); 1139 size_t size = DataType::Size(instruction->GetPackedType()); 1140 Address address = VecAddress(locations, size, instruction->IsStringCharAt()); 1141 XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); 1142 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); 1143 switch (instruction->GetPackedType()) { 1144 case DataType::Type::kUint16: 1145 DCHECK_EQ(8u, instruction->GetVectorLength()); 1146 // Special handling of compressed/uncompressed string load. 1147 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 1148 NearLabel done, not_compressed; 1149 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 1150 // Test compression bit. 1151 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1152 "Expecting 0=compressed, 1=uncompressed"); 1153 uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 1154 __ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1)); 1155 __ j(kNotZero, ¬_compressed); 1156 // Zero extend 8 compressed bytes into 8 chars. 1157 __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt())); 1158 __ pxor(tmp, tmp); 1159 __ punpcklbw(reg, tmp); 1160 __ jmp(&done); 1161 // Load 4 direct uncompressed chars. 1162 __ Bind(¬_compressed); 1163 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); 1164 __ Bind(&done); 1165 return; 1166 } 1167 FALLTHROUGH_INTENDED; 1168 case DataType::Type::kBool: 1169 case DataType::Type::kUint8: 1170 case DataType::Type::kInt8: 1171 case DataType::Type::kInt16: 1172 case DataType::Type::kInt32: 1173 case DataType::Type::kInt64: 1174 DCHECK_LE(2u, instruction->GetVectorLength()); 1175 DCHECK_LE(instruction->GetVectorLength(), 16u); 1176 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); 1177 break; 1178 case DataType::Type::kFloat32: 1179 DCHECK_EQ(4u, instruction->GetVectorLength()); 1180 is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address); 1181 break; 1182 case DataType::Type::kFloat64: 1183 DCHECK_EQ(2u, instruction->GetVectorLength()); 1184 is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address); 1185 break; 1186 default: 1187 LOG(FATAL) << "Unsupported SIMD type"; 1188 UNREACHABLE(); 1189 } 1190 } 1191 1192 void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) { 1193 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false); 1194 } 1195 1196 void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) { 1197 LocationSummary* locations = instruction->GetLocations(); 1198 size_t size = DataType::Size(instruction->GetPackedType()); 1199 Address address = VecAddress(locations, size, /*is_string_char_at*/ false); 1200 XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>(); 1201 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); 1202 switch (instruction->GetPackedType()) { 1203 case DataType::Type::kBool: 1204 case DataType::Type::kUint8: 1205 case DataType::Type::kInt8: 1206 case DataType::Type::kUint16: 1207 case DataType::Type::kInt16: 1208 case DataType::Type::kInt32: 1209 case DataType::Type::kInt64: 1210 DCHECK_LE(2u, instruction->GetVectorLength()); 1211 DCHECK_LE(instruction->GetVectorLength(), 16u); 1212 is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg); 1213 break; 1214 case DataType::Type::kFloat32: 1215 DCHECK_EQ(4u, instruction->GetVectorLength()); 1216 is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg); 1217 break; 1218 case DataType::Type::kFloat64: 1219 DCHECK_EQ(2u, instruction->GetVectorLength()); 1220 is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg); 1221 break; 1222 default: 1223 LOG(FATAL) << "Unsupported SIMD type"; 1224 UNREACHABLE(); 1225 } 1226 } 1227 1228 #undef __ 1229 1230 } // namespace x86 1231 } // namespace art 1232