1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 2 // 3 // The Subzero Code Generator 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Implements the TargetLoweringARM32 class, which consists almost 12 /// entirely of the lowering sequence for each high-level instruction. 13 /// 14 //===----------------------------------------------------------------------===// 15 #include "IceTargetLoweringARM32.h" 16 17 #include "IceCfg.h" 18 #include "IceCfgNode.h" 19 #include "IceClFlags.h" 20 #include "IceDefs.h" 21 #include "IceELFObjectWriter.h" 22 #include "IceGlobalInits.h" 23 #include "IceInstARM32.def" 24 #include "IceInstARM32.h" 25 #include "IceInstVarIter.h" 26 #include "IceLiveness.h" 27 #include "IceOperand.h" 28 #include "IcePhiLoweringImpl.h" 29 #include "IceRegistersARM32.h" 30 #include "IceTargetLoweringARM32.def" 31 #include "IceUtils.h" 32 #include "llvm/Support/MathExtras.h" 33 34 #include <algorithm> 35 #include <array> 36 #include <utility> 37 38 namespace ARM32 { 39 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) { 40 return ::Ice::ARM32::TargetARM32::create(Func); 41 } 42 43 std::unique_ptr<::Ice::TargetDataLowering> 44 createTargetDataLowering(::Ice::GlobalContext *Ctx) { 45 return ::Ice::ARM32::TargetDataARM32::create(Ctx); 46 } 47 48 std::unique_ptr<::Ice::TargetHeaderLowering> 49 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) { 50 return ::Ice::ARM32::TargetHeaderARM32::create(Ctx); 51 } 52 53 void staticInit(::Ice::GlobalContext *Ctx) { 54 ::Ice::ARM32::TargetARM32::staticInit(Ctx); 55 if (Ice::getFlags().getUseNonsfi()) { 56 // In nonsfi, we need to reference the _GLOBAL_OFFSET_TABLE_ for accessing 57 // globals. The GOT is an external symbol (i.e., it is not defined in the 58 // pexe) so we need to register it as such so that ELF emission won't barf 59 // on an "unknown" symbol. The GOT is added to the External symbols list 60 // here because staticInit() is invoked in a single-thread context. 61 Ctx->getConstantExternSym(Ctx->getGlobalString(::Ice::GlobalOffsetTable)); 62 } 63 } 64 65 bool shouldBePooled(const ::Ice::Constant *C) { 66 return ::Ice::ARM32::TargetARM32::shouldBePooled(C); 67 } 68 69 ::Ice::Type getPointerType() { 70 return ::Ice::ARM32::TargetARM32::getPointerType(); 71 } 72 73 } // end of namespace ARM32 74 75 namespace Ice { 76 namespace ARM32 { 77 78 namespace { 79 80 /// SizeOf is used to obtain the size of an initializer list as a constexpr 81 /// expression. This is only needed until our C++ library is updated to 82 /// C++ 14 -- which defines constexpr members to std::initializer_list. 83 class SizeOf { 84 SizeOf(const SizeOf &) = delete; 85 SizeOf &operator=(const SizeOf &) = delete; 86 87 public: 88 constexpr SizeOf() : Size(0) {} 89 template <typename... T> 90 explicit constexpr SizeOf(T...) 91 : Size(__length<T...>::value) {} 92 constexpr SizeT size() const { return Size; } 93 94 private: 95 template <typename T, typename... U> struct __length { 96 static constexpr std::size_t value = 1 + __length<U...>::value; 97 }; 98 99 template <typename T> struct __length<T> { 100 static constexpr std::size_t value = 1; 101 }; 102 103 const std::size_t Size; 104 }; 105 106 } // end of anonymous namespace 107 108 // Defines the RegARM32::Table table with register information. 109 RegARM32::RegTableType RegARM32::RegTable[RegARM32::Reg_NUM] = { 110 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \ 111 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \ 112 { \ 113 name, encode, cc_arg, scratch, preserved, stackptr, frameptr, isGPR, \ 114 isInt, isI64Pair, isFP32, isFP64, isVec128, \ 115 (SizeOf alias_init).size(), alias_init \ 116 } \ 117 , 118 REGARM32_TABLE 119 #undef X 120 }; 121 122 namespace { 123 124 // The following table summarizes the logic for lowering the icmp instruction 125 // for i32 and narrower types. Each icmp condition has a clear mapping to an 126 // ARM32 conditional move instruction. 127 128 const struct TableIcmp32_ { 129 CondARM32::Cond Mapping; 130 } TableIcmp32[] = { 131 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \ 132 { CondARM32::C_32 } \ 133 , 134 ICMPARM32_TABLE 135 #undef X 136 }; 137 138 // The following table summarizes the logic for lowering the icmp instruction 139 // for the i64 type. Two conditional moves are needed for setting to 1 or 0. 140 // The operands may need to be swapped, and there is a slight difference for 141 // signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc). 142 const struct TableIcmp64_ { 143 bool IsSigned; 144 bool Swapped; 145 CondARM32::Cond C1, C2; 146 } TableIcmp64[] = { 147 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \ 148 { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \ 149 , 150 ICMPARM32_TABLE 151 #undef X 152 }; 153 154 CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) { 155 assert(Cond < llvm::array_lengthof(TableIcmp32)); 156 return TableIcmp32[Cond].Mapping; 157 } 158 159 // In some cases, there are x-macros tables for both high-level and low-level 160 // instructions/operands that use the same enum key value. The tables are kept 161 // separate to maintain a proper separation between abstraction layers. There 162 // is a risk that the tables could get out of sync if enum values are reordered 163 // or if entries are added or deleted. The following anonymous namespaces use 164 // static_asserts to ensure everything is kept in sync. 165 166 // Validate the enum values in ICMPARM32_TABLE. 167 namespace { 168 // Define a temporary set of enum values based on low-level table entries. 169 enum _icmp_ll_enum { 170 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \ 171 _icmp_ll_##val, 172 ICMPARM32_TABLE 173 #undef X 174 _num 175 }; 176 // Define a set of constants based on high-level table entries. 177 #define X(tag, reverse, str) \ 178 static constexpr int _icmp_hl_##tag = InstIcmp::tag; 179 ICEINSTICMP_TABLE 180 #undef X 181 // Define a set of constants based on low-level table entries, and ensure the 182 // table entry keys are consistent. 183 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \ 184 static_assert( \ 185 _icmp_ll_##val == _icmp_hl_##val, \ 186 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #val); 187 ICMPARM32_TABLE 188 #undef X 189 // Repeat the static asserts with respect to the high-level table entries in 190 // case the high-level table has extra entries. 191 #define X(tag, reverse, str) \ 192 static_assert( \ 193 _icmp_hl_##tag == _icmp_ll_##tag, \ 194 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #tag); 195 ICEINSTICMP_TABLE 196 #undef X 197 } // end of anonymous namespace 198 199 // Stack alignment 200 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16; 201 202 // Value is in bytes. Return Value adjusted to the next highest multiple of the 203 // stack alignment. 204 uint32_t applyStackAlignment(uint32_t Value) { 205 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES); 206 } 207 208 // Value is in bytes. Return Value adjusted to the next highest multiple of the 209 // stack alignment required for the given type. 210 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) { 211 // Use natural alignment, except that normally (non-NaCl) ARM only aligns 212 // vectors to 8 bytes. 213 // TODO(jvoung): Check this ... 214 size_t typeAlignInBytes = typeWidthInBytes(Ty); 215 if (isVectorType(Ty)) 216 typeAlignInBytes = 8; 217 return Utils::applyAlignment(Value, typeAlignInBytes); 218 } 219 220 // Conservatively check if at compile time we know that the operand is 221 // definitely a non-zero integer. 222 bool isGuaranteedNonzeroInt(const Operand *Op) { 223 if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) { 224 return Const->getValue() != 0; 225 } 226 return false; 227 } 228 229 } // end of anonymous namespace 230 231 TargetARM32Features::TargetARM32Features(const ClFlags &Flags) { 232 static_assert( 233 (ARM32InstructionSet::End - ARM32InstructionSet::Begin) == 234 (TargetInstructionSet::ARM32InstructionSet_End - 235 TargetInstructionSet::ARM32InstructionSet_Begin), 236 "ARM32InstructionSet range different from TargetInstructionSet"); 237 if (Flags.getTargetInstructionSet() != 238 TargetInstructionSet::BaseInstructionSet) { 239 InstructionSet = static_cast<ARM32InstructionSet>( 240 (Flags.getTargetInstructionSet() - 241 TargetInstructionSet::ARM32InstructionSet_Begin) + 242 ARM32InstructionSet::Begin); 243 } 244 } 245 246 namespace { 247 constexpr SizeT NumGPRArgs = 248 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \ 249 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \ 250 +(((cc_arg) > 0) ? 1 : 0) 251 REGARM32_GPR_TABLE 252 #undef X 253 ; 254 std::array<RegNumT, NumGPRArgs> GPRArgInitializer; 255 256 constexpr SizeT NumI64Args = 257 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \ 258 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \ 259 +(((cc_arg) > 0) ? 1 : 0) 260 REGARM32_I64PAIR_TABLE 261 #undef X 262 ; 263 std::array<RegNumT, NumI64Args> I64ArgInitializer; 264 265 constexpr SizeT NumFP32Args = 266 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \ 267 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \ 268 +(((cc_arg) > 0) ? 1 : 0) 269 REGARM32_FP32_TABLE 270 #undef X 271 ; 272 std::array<RegNumT, NumFP32Args> FP32ArgInitializer; 273 274 constexpr SizeT NumFP64Args = 275 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \ 276 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \ 277 +(((cc_arg) > 0) ? 1 : 0) 278 REGARM32_FP64_TABLE 279 #undef X 280 ; 281 std::array<RegNumT, NumFP64Args> FP64ArgInitializer; 282 283 constexpr SizeT NumVec128Args = 284 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \ 285 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \ 286 +(((cc_arg > 0)) ? 1 : 0) 287 REGARM32_VEC128_TABLE 288 #undef X 289 ; 290 std::array<RegNumT, NumVec128Args> Vec128ArgInitializer; 291 292 const char *getRegClassName(RegClass C) { 293 auto ClassNum = static_cast<RegARM32::RegClassARM32>(C); 294 assert(ClassNum < RegARM32::RCARM32_NUM); 295 switch (ClassNum) { 296 default: 297 assert(C < RC_Target); 298 return regClassString(C); 299 // Add handling of new register classes below. 300 case RegARM32::RCARM32_QtoS: 301 return "QtoS"; 302 } 303 } 304 305 } // end of anonymous namespace 306 307 TargetARM32::TargetARM32(Cfg *Func) 308 : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl), 309 CPUFeatures(getFlags()) {} 310 311 void TargetARM32::staticInit(GlobalContext *Ctx) { 312 RegNumT::setLimit(RegARM32::Reg_NUM); 313 // Limit this size (or do all bitsets need to be the same width)??? 314 SmallBitVector IntegerRegisters(RegARM32::Reg_NUM); 315 SmallBitVector I64PairRegisters(RegARM32::Reg_NUM); 316 SmallBitVector Float32Registers(RegARM32::Reg_NUM); 317 SmallBitVector Float64Registers(RegARM32::Reg_NUM); 318 SmallBitVector VectorRegisters(RegARM32::Reg_NUM); 319 SmallBitVector QtoSRegisters(RegARM32::Reg_NUM); 320 SmallBitVector InvalidRegisters(RegARM32::Reg_NUM); 321 const unsigned EncodedReg_q8 = RegARM32::RegTable[RegARM32::Reg_q8].Encoding; 322 for (int i = 0; i < RegARM32::Reg_NUM; ++i) { 323 const auto &Entry = RegARM32::RegTable[i]; 324 IntegerRegisters[i] = Entry.IsInt; 325 I64PairRegisters[i] = Entry.IsI64Pair; 326 Float32Registers[i] = Entry.IsFP32; 327 Float64Registers[i] = Entry.IsFP64; 328 VectorRegisters[i] = Entry.IsVec128; 329 RegisterAliases[i].resize(RegARM32::Reg_NUM); 330 // TODO(eholk): It would be better to store a QtoS flag in the 331 // IceRegistersARM32 table than to compare their encodings here. 332 QtoSRegisters[i] = Entry.IsVec128 && Entry.Encoding < EncodedReg_q8; 333 for (int j = 0; j < Entry.NumAliases; ++j) { 334 assert(i == j || !RegisterAliases[i][Entry.Aliases[j]]); 335 RegisterAliases[i].set(Entry.Aliases[j]); 336 } 337 assert(RegisterAliases[i][i]); 338 if (Entry.CCArg <= 0) { 339 continue; 340 } 341 const auto RegNum = RegNumT::fromInt(i); 342 if (Entry.IsGPR) { 343 GPRArgInitializer[Entry.CCArg - 1] = RegNum; 344 } else if (Entry.IsI64Pair) { 345 I64ArgInitializer[Entry.CCArg - 1] = RegNum; 346 } else if (Entry.IsFP32) { 347 FP32ArgInitializer[Entry.CCArg - 1] = RegNum; 348 } else if (Entry.IsFP64) { 349 FP64ArgInitializer[Entry.CCArg - 1] = RegNum; 350 } else if (Entry.IsVec128) { 351 Vec128ArgInitializer[Entry.CCArg - 1] = RegNum; 352 } 353 } 354 TypeToRegisterSet[IceType_void] = InvalidRegisters; 355 TypeToRegisterSet[IceType_i1] = IntegerRegisters; 356 TypeToRegisterSet[IceType_i8] = IntegerRegisters; 357 TypeToRegisterSet[IceType_i16] = IntegerRegisters; 358 TypeToRegisterSet[IceType_i32] = IntegerRegisters; 359 TypeToRegisterSet[IceType_i64] = I64PairRegisters; 360 TypeToRegisterSet[IceType_f32] = Float32Registers; 361 TypeToRegisterSet[IceType_f64] = Float64Registers; 362 TypeToRegisterSet[IceType_v4i1] = VectorRegisters; 363 TypeToRegisterSet[IceType_v8i1] = VectorRegisters; 364 TypeToRegisterSet[IceType_v16i1] = VectorRegisters; 365 TypeToRegisterSet[IceType_v16i8] = VectorRegisters; 366 TypeToRegisterSet[IceType_v8i16] = VectorRegisters; 367 TypeToRegisterSet[IceType_v4i32] = VectorRegisters; 368 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; 369 TypeToRegisterSet[RegARM32::RCARM32_QtoS] = QtoSRegisters; 370 371 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i) 372 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i]; 373 374 filterTypeToRegisterSet(Ctx, RegARM32::Reg_NUM, TypeToRegisterSet, 375 llvm::array_lengthof(TypeToRegisterSet), 376 [](RegNumT RegNum) -> std::string { 377 // This function simply removes ", " from the 378 // register name. 379 std::string Name = RegARM32::getRegName(RegNum); 380 constexpr const char RegSeparator[] = ", "; 381 constexpr size_t RegSeparatorWidth = 382 llvm::array_lengthof(RegSeparator) - 1; 383 for (size_t Pos = Name.find(RegSeparator); 384 Pos != std::string::npos; 385 Pos = Name.find(RegSeparator)) { 386 Name.replace(Pos, RegSeparatorWidth, ""); 387 } 388 return Name; 389 }, 390 getRegClassName); 391 } 392 393 namespace { 394 void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) { 395 for (Variable *Var : Vars) { 396 auto *Var64 = llvm::dyn_cast<Variable64On32>(Var); 397 if (!Var64) { 398 // This is not the variable we are looking for. 399 continue; 400 } 401 // only allow infinite-weight i64 temporaries to be register allocated. 402 assert(!Var64->hasReg() || Var64->mustHaveReg()); 403 if (!Var64->hasReg()) { 404 continue; 405 } 406 const auto FirstReg = 407 RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Var->getRegNum())); 408 // This assumes little endian. 409 Variable *Lo = Var64->getLo(); 410 Variable *Hi = Var64->getHi(); 411 assert(Lo->hasReg() == Hi->hasReg()); 412 if (Lo->hasReg()) { 413 continue; 414 } 415 Lo->setRegNum(FirstReg); 416 Lo->setMustHaveReg(); 417 Hi->setRegNum(RegNumT::fixme(FirstReg + 1)); 418 Hi->setMustHaveReg(); 419 } 420 } 421 } // end of anonymous namespace 422 423 uint32_t TargetARM32::getCallStackArgumentsSizeBytes(const InstCall *Call) { 424 TargetARM32::CallingConv CC; 425 RegNumT DummyReg; 426 size_t OutArgsSizeBytes = 0; 427 for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) { 428 Operand *Arg = legalizeUndef(Call->getArg(i)); 429 const Type Ty = Arg->getType(); 430 if (isScalarIntegerType(Ty)) { 431 if (CC.argInGPR(Ty, &DummyReg)) { 432 continue; 433 } 434 } else { 435 if (CC.argInVFP(Ty, &DummyReg)) { 436 continue; 437 } 438 } 439 440 OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty); 441 OutArgsSizeBytes += typeWidthInBytesOnStack(Ty); 442 } 443 444 return applyStackAlignment(OutArgsSizeBytes); 445 } 446 447 void TargetARM32::genTargetHelperCallFor(Inst *Instr) { 448 constexpr bool NoTailCall = false; 449 constexpr bool IsTargetHelperCall = true; 450 451 switch (Instr->getKind()) { 452 default: 453 return; 454 case Inst::Arithmetic: { 455 Variable *Dest = Instr->getDest(); 456 const Type DestTy = Dest->getType(); 457 const InstArithmetic::OpKind Op = 458 llvm::cast<InstArithmetic>(Instr)->getOp(); 459 if (isVectorType(DestTy)) { 460 switch (Op) { 461 default: 462 break; 463 case InstArithmetic::Fdiv: 464 case InstArithmetic::Frem: 465 case InstArithmetic::Sdiv: 466 case InstArithmetic::Srem: 467 case InstArithmetic::Udiv: 468 case InstArithmetic::Urem: 469 scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1)); 470 Instr->setDeleted(); 471 return; 472 } 473 } 474 switch (DestTy) { 475 default: 476 return; 477 case IceType_i64: { 478 // Technically, ARM has its own aeabi routines, but we can use the 479 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses 480 // the more standard __moddi3 for rem. 481 RuntimeHelper HelperID = RuntimeHelper::H_Num; 482 switch (Op) { 483 default: 484 return; 485 case InstArithmetic::Udiv: 486 HelperID = RuntimeHelper::H_udiv_i64; 487 break; 488 case InstArithmetic::Sdiv: 489 HelperID = RuntimeHelper::H_sdiv_i64; 490 break; 491 case InstArithmetic::Urem: 492 HelperID = RuntimeHelper::H_urem_i64; 493 break; 494 case InstArithmetic::Srem: 495 HelperID = RuntimeHelper::H_srem_i64; 496 break; 497 } 498 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID); 499 ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem; 500 constexpr SizeT MaxArgs = 2; 501 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, 502 NoTailCall, IsTargetHelperCall); 503 Call->addArg(Instr->getSrc(0)); 504 Call->addArg(Instr->getSrc(1)); 505 Instr->setDeleted(); 506 return; 507 } 508 case IceType_i32: 509 case IceType_i16: 510 case IceType_i8: { 511 const bool HasHWDiv = hasCPUFeature(TargetARM32Features::HWDivArm); 512 InstCast::OpKind CastKind; 513 RuntimeHelper HelperID = RuntimeHelper::H_Num; 514 switch (Op) { 515 default: 516 return; 517 case InstArithmetic::Udiv: 518 HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_udiv_i32; 519 CastKind = InstCast::Zext; 520 break; 521 case InstArithmetic::Sdiv: 522 HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_sdiv_i32; 523 CastKind = InstCast::Sext; 524 break; 525 case InstArithmetic::Urem: 526 HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_urem_i32; 527 CastKind = InstCast::Zext; 528 break; 529 case InstArithmetic::Srem: 530 HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_srem_i32; 531 CastKind = InstCast::Sext; 532 break; 533 } 534 if (HelperID == RuntimeHelper::H_Num) { 535 // HelperID should only ever be undefined when the processor does not 536 // have a hardware divider. If any other helpers are ever introduced, 537 // the following assert will have to be modified. 538 assert(HasHWDiv); 539 return; 540 } 541 Operand *Src0 = Instr->getSrc(0); 542 Operand *Src1 = Instr->getSrc(1); 543 if (DestTy != IceType_i32) { 544 // Src0 and Src1 have to be zero-, or signed-extended to i32. For Src0, 545 // we just insert a InstCast right before the call to the helper. 546 Variable *Src0_32 = Func->makeVariable(IceType_i32); 547 Context.insert<InstCast>(CastKind, Src0_32, Src0); 548 Src0 = Src0_32; 549 550 // For extending Src1, we will just insert an InstCast if Src1 is not a 551 // Constant. If it is, then we extend it here, and not during program 552 // runtime. This allows preambleDivRem to optimize-out the div-by-0 553 // check. 554 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { 555 const int32_t ShAmt = (DestTy == IceType_i16) ? 16 : 24; 556 int32_t NewC = C->getValue(); 557 if (CastKind == InstCast::Zext) { 558 NewC &= ~(0x80000000l >> ShAmt); 559 } else { 560 NewC = (NewC << ShAmt) >> ShAmt; 561 } 562 Src1 = Ctx->getConstantInt32(NewC); 563 } else { 564 Variable *Src1_32 = Func->makeVariable(IceType_i32); 565 Context.insert<InstCast>(CastKind, Src1_32, Src1); 566 Src1 = Src1_32; 567 } 568 } 569 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID); 570 ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem; 571 constexpr SizeT MaxArgs = 2; 572 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, 573 NoTailCall, IsTargetHelperCall); 574 assert(Src0->getType() == IceType_i32); 575 Call->addArg(Src0); 576 assert(Src1->getType() == IceType_i32); 577 Call->addArg(Src1); 578 Instr->setDeleted(); 579 return; 580 } 581 case IceType_f64: 582 case IceType_f32: { 583 if (Op != InstArithmetic::Frem) { 584 return; 585 } 586 constexpr SizeT MaxArgs = 2; 587 Operand *TargetHelper = Ctx->getRuntimeHelperFunc( 588 DestTy == IceType_f32 ? RuntimeHelper::H_frem_f32 589 : RuntimeHelper::H_frem_f64); 590 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, 591 NoTailCall, IsTargetHelperCall); 592 Call->addArg(Instr->getSrc(0)); 593 Call->addArg(Instr->getSrc(1)); 594 Instr->setDeleted(); 595 return; 596 } 597 } 598 llvm::report_fatal_error("Control flow should never have reached here."); 599 } 600 case Inst::Cast: { 601 Variable *Dest = Instr->getDest(); 602 Operand *Src0 = Instr->getSrc(0); 603 const Type DestTy = Dest->getType(); 604 const Type SrcTy = Src0->getType(); 605 auto *CastInstr = llvm::cast<InstCast>(Instr); 606 const InstCast::OpKind CastKind = CastInstr->getCastKind(); 607 608 switch (CastKind) { 609 default: 610 return; 611 case InstCast::Fptosi: 612 case InstCast::Fptoui: { 613 if (DestTy != IceType_i64) { 614 return; 615 } 616 const bool DestIsSigned = CastKind == InstCast::Fptosi; 617 const bool Src0IsF32 = isFloat32Asserting32Or64(SrcTy); 618 Operand *TargetHelper = Ctx->getRuntimeHelperFunc( 619 Src0IsF32 ? (DestIsSigned ? RuntimeHelper::H_fptosi_f32_i64 620 : RuntimeHelper::H_fptoui_f32_i64) 621 : (DestIsSigned ? RuntimeHelper::H_fptosi_f64_i64 622 : RuntimeHelper::H_fptoui_f64_i64)); 623 static constexpr SizeT MaxArgs = 1; 624 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, 625 NoTailCall, IsTargetHelperCall); 626 Call->addArg(Src0); 627 Instr->setDeleted(); 628 return; 629 } 630 case InstCast::Sitofp: 631 case InstCast::Uitofp: { 632 if (SrcTy != IceType_i64) { 633 return; 634 } 635 const bool SourceIsSigned = CastKind == InstCast::Sitofp; 636 const bool DestIsF32 = isFloat32Asserting32Or64(Dest->getType()); 637 Operand *TargetHelper = Ctx->getRuntimeHelperFunc( 638 DestIsF32 ? (SourceIsSigned ? RuntimeHelper::H_sitofp_i64_f32 639 : RuntimeHelper::H_uitofp_i64_f32) 640 : (SourceIsSigned ? RuntimeHelper::H_sitofp_i64_f64 641 : RuntimeHelper::H_uitofp_i64_f64)); 642 static constexpr SizeT MaxArgs = 1; 643 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, 644 NoTailCall, IsTargetHelperCall); 645 Call->addArg(Src0); 646 Instr->setDeleted(); 647 return; 648 } 649 case InstCast::Bitcast: { 650 if (DestTy == SrcTy) { 651 return; 652 } 653 Variable *CallDest = Dest; 654 RuntimeHelper HelperID = RuntimeHelper::H_Num; 655 switch (DestTy) { 656 default: 657 return; 658 case IceType_i8: 659 assert(SrcTy == IceType_v8i1); 660 HelperID = RuntimeHelper::H_bitcast_8xi1_i8; 661 CallDest = Func->makeVariable(IceType_i32); 662 break; 663 case IceType_i16: 664 assert(SrcTy == IceType_v16i1); 665 HelperID = RuntimeHelper::H_bitcast_16xi1_i16; 666 CallDest = Func->makeVariable(IceType_i32); 667 break; 668 case IceType_v8i1: { 669 assert(SrcTy == IceType_i8); 670 HelperID = RuntimeHelper::H_bitcast_i8_8xi1; 671 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); 672 // Arguments to functions are required to be at least 32 bits wide. 673 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0); 674 Src0 = Src0AsI32; 675 } break; 676 case IceType_v16i1: { 677 assert(SrcTy == IceType_i16); 678 HelperID = RuntimeHelper::H_bitcast_i16_16xi1; 679 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); 680 // Arguments to functions are required to be at least 32 bits wide. 681 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0); 682 Src0 = Src0AsI32; 683 } break; 684 } 685 constexpr SizeT MaxSrcs = 1; 686 InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs); 687 Call->addArg(Src0); 688 Context.insert(Call); 689 // The PNaCl ABI disallows i8/i16 return types, so truncate the helper 690 // call result to the appropriate type as necessary. 691 if (CallDest->getType() != Dest->getType()) 692 Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest); 693 Instr->setDeleted(); 694 return; 695 } 696 case InstCast::Trunc: { 697 if (DestTy == SrcTy) { 698 return; 699 } 700 if (!isVectorType(SrcTy)) { 701 return; 702 } 703 assert(typeNumElements(DestTy) == typeNumElements(SrcTy)); 704 assert(typeElementType(DestTy) == IceType_i1); 705 assert(isVectorIntegerType(SrcTy)); 706 return; 707 } 708 case InstCast::Sext: 709 case InstCast::Zext: { 710 if (DestTy == SrcTy) { 711 return; 712 } 713 if (!isVectorType(DestTy)) { 714 return; 715 } 716 assert(typeNumElements(DestTy) == typeNumElements(SrcTy)); 717 assert(typeElementType(SrcTy) == IceType_i1); 718 assert(isVectorIntegerType(DestTy)); 719 return; 720 } 721 } 722 llvm::report_fatal_error("Control flow should never have reached here."); 723 } 724 case Inst::IntrinsicCall: { 725 Variable *Dest = Instr->getDest(); 726 auto *IntrinsicCall = llvm::cast<InstIntrinsicCall>(Instr); 727 Intrinsics::IntrinsicID ID = IntrinsicCall->getIntrinsicInfo().ID; 728 switch (ID) { 729 default: 730 return; 731 case Intrinsics::Ctpop: { 732 Operand *Src0 = IntrinsicCall->getArg(0); 733 Operand *TargetHelper = 734 Ctx->getRuntimeHelperFunc(isInt32Asserting32Or64(Src0->getType()) 735 ? RuntimeHelper::H_call_ctpop_i32 736 : RuntimeHelper::H_call_ctpop_i64); 737 static constexpr SizeT MaxArgs = 1; 738 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, 739 NoTailCall, IsTargetHelperCall); 740 Call->addArg(Src0); 741 Instr->setDeleted(); 742 if (Src0->getType() == IceType_i64) { 743 ARM32HelpersPostamble[TargetHelper] = &TargetARM32::postambleCtpop64; 744 } 745 return; 746 } 747 case Intrinsics::Longjmp: { 748 static constexpr SizeT MaxArgs = 2; 749 static constexpr Variable *NoDest = nullptr; 750 Operand *TargetHelper = 751 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_longjmp); 752 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, 753 NoTailCall, IsTargetHelperCall); 754 Call->addArg(IntrinsicCall->getArg(0)); 755 Call->addArg(IntrinsicCall->getArg(1)); 756 Instr->setDeleted(); 757 return; 758 } 759 case Intrinsics::Memcpy: { 760 // In the future, we could potentially emit an inline memcpy/memset, etc. 761 // for intrinsic calls w/ a known length. 762 static constexpr SizeT MaxArgs = 3; 763 static constexpr Variable *NoDest = nullptr; 764 Operand *TargetHelper = 765 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memcpy); 766 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, 767 NoTailCall, IsTargetHelperCall); 768 Call->addArg(IntrinsicCall->getArg(0)); 769 Call->addArg(IntrinsicCall->getArg(1)); 770 Call->addArg(IntrinsicCall->getArg(2)); 771 Instr->setDeleted(); 772 return; 773 } 774 case Intrinsics::Memmove: { 775 static constexpr SizeT MaxArgs = 3; 776 static constexpr Variable *NoDest = nullptr; 777 Operand *TargetHelper = 778 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memmove); 779 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, 780 NoTailCall, IsTargetHelperCall); 781 Call->addArg(IntrinsicCall->getArg(0)); 782 Call->addArg(IntrinsicCall->getArg(1)); 783 Call->addArg(IntrinsicCall->getArg(2)); 784 Instr->setDeleted(); 785 return; 786 } 787 case Intrinsics::Memset: { 788 // The value operand needs to be extended to a stack slot size because the 789 // PNaCl ABI requires arguments to be at least 32 bits wide. 790 Operand *ValOp = IntrinsicCall->getArg(1); 791 assert(ValOp->getType() == IceType_i8); 792 Variable *ValExt = Func->makeVariable(stackSlotType()); 793 Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp); 794 795 // Technically, ARM has its own __aeabi_memset, but we can use plain 796 // memset too. The value and size argument need to be flipped if we ever 797 // decide to use __aeabi_memset. 798 static constexpr SizeT MaxArgs = 3; 799 static constexpr Variable *NoDest = nullptr; 800 Operand *TargetHelper = 801 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memset); 802 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, 803 NoTailCall, IsTargetHelperCall); 804 Call->addArg(IntrinsicCall->getArg(0)); 805 Call->addArg(ValExt); 806 Call->addArg(IntrinsicCall->getArg(2)); 807 Instr->setDeleted(); 808 return; 809 } 810 case Intrinsics::NaClReadTP: { 811 if (SandboxingType == ST_NaCl) { 812 return; 813 } 814 static constexpr SizeT MaxArgs = 0; 815 Operand *TargetHelper = 816 SandboxingType == ST_Nonsfi 817 ? Ctx->getConstantExternSym( 818 Ctx->getGlobalString("__aeabi_read_tp")) 819 : Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_read_tp); 820 Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall, 821 IsTargetHelperCall); 822 Instr->setDeleted(); 823 return; 824 } 825 case Intrinsics::Setjmp: { 826 static constexpr SizeT MaxArgs = 1; 827 Operand *TargetHelper = 828 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_setjmp); 829 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, 830 NoTailCall, IsTargetHelperCall); 831 Call->addArg(IntrinsicCall->getArg(0)); 832 Instr->setDeleted(); 833 return; 834 } 835 } 836 llvm::report_fatal_error("Control flow should never have reached here."); 837 } 838 } 839 } 840 841 void TargetARM32::findMaxStackOutArgsSize() { 842 // MinNeededOutArgsBytes should be updated if the Target ever creates a 843 // high-level InstCall that requires more stack bytes. 844 constexpr size_t MinNeededOutArgsBytes = 0; 845 MaxOutArgsSizeBytes = MinNeededOutArgsBytes; 846 for (CfgNode *Node : Func->getNodes()) { 847 Context.init(Node); 848 while (!Context.atEnd()) { 849 PostIncrLoweringContext PostIncrement(Context); 850 Inst *CurInstr = iteratorToInst(Context.getCur()); 851 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { 852 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); 853 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); 854 } 855 } 856 } 857 } 858 859 void TargetARM32::createGotPtr() { 860 if (SandboxingType != ST_Nonsfi) { 861 return; 862 } 863 GotPtr = Func->makeVariable(IceType_i32); 864 } 865 866 void TargetARM32::insertGotPtrInitPlaceholder() { 867 if (SandboxingType != ST_Nonsfi) { 868 return; 869 } 870 assert(GotPtr != nullptr); 871 // We add the two placeholder instructions here. The first fakedefs T, an 872 // infinite-weight temporary, while the second fakedefs the GotPtr "using" T. 873 // This is needed because the GotPtr initialization, if needed, will require 874 // a register: 875 // 876 // movw reg, _GLOBAL_OFFSET_TABLE_ - 16 - . 877 // movt reg, _GLOBAL_OFFSET_TABLE_ - 12 - . 878 // add reg, pc, reg 879 // mov GotPtr, reg 880 // 881 // If GotPtr is not used, then both these pseudo-instructions are dce'd. 882 Variable *T = makeReg(IceType_i32); 883 Context.insert<InstFakeDef>(T); 884 Context.insert<InstFakeDef>(GotPtr, T); 885 } 886 887 GlobalString 888 TargetARM32::createGotoffRelocation(const ConstantRelocatable *CR) { 889 GlobalString CRName = CR->getName(); 890 GlobalString CRGotoffName = 891 Ctx->getGlobalString("GOTOFF$" + Func->getFunctionName() + "$" + CRName); 892 if (KnownGotoffs.count(CRGotoffName) == 0) { 893 constexpr bool SuppressMangling = true; 894 auto *Global = 895 VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling); 896 Global->setIsConstant(true); 897 Global->setName(CRName); 898 Func->getGlobalPool()->willNotBeEmitted(Global); 899 900 auto *Gotoff = 901 VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling); 902 constexpr auto GotFixup = R_ARM_GOTOFF32; 903 Gotoff->setIsConstant(true); 904 Gotoff->addInitializer(VariableDeclaration::RelocInitializer::create( 905 Func->getGlobalPool(), Global, {RelocOffset::create(Ctx, 0)}, 906 GotFixup)); 907 Gotoff->setName(CRGotoffName); 908 Func->addGlobal(Gotoff); 909 KnownGotoffs.emplace(CRGotoffName); 910 } 911 return CRGotoffName; 912 } 913 914 void TargetARM32::materializeGotAddr(CfgNode *Node) { 915 if (SandboxingType != ST_Nonsfi) { 916 return; 917 } 918 919 // At first, we try to find the 920 // GotPtr = def T 921 // pseudo-instruction that we placed for defining the got ptr. That 922 // instruction is not just a place-holder for defining the GotPtr (thus 923 // keeping liveness consistent), but it is also located at a point where it is 924 // safe to materialize the got addr -- i.e., before loading parameters to 925 // registers, but after moving register parameters from their home location. 926 InstFakeDef *DefGotPtr = nullptr; 927 for (auto &Inst : Node->getInsts()) { 928 auto *FakeDef = llvm::dyn_cast<InstFakeDef>(&Inst); 929 if (FakeDef != nullptr && FakeDef->getDest() == GotPtr) { 930 DefGotPtr = FakeDef; 931 break; 932 } 933 } 934 935 if (DefGotPtr == nullptr || DefGotPtr->isDeleted()) { 936 return; 937 } 938 939 // The got addr needs to be materialized at the same point where DefGotPtr 940 // lives. 941 Context.setInsertPoint(instToIterator(DefGotPtr)); 942 assert(DefGotPtr->getSrcSize() == 1); 943 auto *T = llvm::cast<Variable>(DefGotPtr->getSrc(0)); 944 loadNamedConstantRelocatablePIC(Ctx->getGlobalString(GlobalOffsetTable), T, 945 [this, T](Variable *PC) { _add(T, PC, T); }); 946 _mov(GotPtr, T); 947 DefGotPtr->setDeleted(); 948 } 949 950 void TargetARM32::loadNamedConstantRelocatablePIC( 951 GlobalString Name, Variable *Register, 952 std::function<void(Variable *PC)> Finish) { 953 assert(SandboxingType == ST_Nonsfi); 954 // We makeReg() here instead of getPhysicalRegister() because the latter ends 955 // up creating multi-blocks temporaries that liveness fails to validate. 956 auto *PC = makeReg(IceType_i32, RegARM32::Reg_pc); 957 958 auto *AddPcReloc = RelocOffset::create(Ctx); 959 AddPcReloc->setSubtract(true); 960 auto *AddPcLabel = InstARM32Label::create(Func, this); 961 AddPcLabel->setRelocOffset(AddPcReloc); 962 963 auto *MovwReloc = RelocOffset::create(Ctx); 964 auto *MovwLabel = InstARM32Label::create(Func, this); 965 MovwLabel->setRelocOffset(MovwReloc); 966 967 auto *MovtReloc = RelocOffset::create(Ctx); 968 auto *MovtLabel = InstARM32Label::create(Func, this); 969 MovtLabel->setRelocOffset(MovtReloc); 970 971 // The EmitString for these constant relocatables have hardcoded offsets 972 // attached to them. This could be dangerous if, e.g., we ever implemented 973 // instruction scheduling but llvm-mc currently does not support 974 // 975 // movw reg, #:lower16:(Symbol - Label - Number) 976 // movt reg, #:upper16:(Symbol - Label - Number) 977 // 978 // relocations. 979 static constexpr RelocOffsetT PcOffset = -8; 980 auto *CRLower = Ctx->getConstantSymWithEmitString( 981 PcOffset, {MovwReloc, AddPcReloc}, Name, Name + " -16"); 982 auto *CRUpper = Ctx->getConstantSymWithEmitString( 983 PcOffset, {MovtReloc, AddPcReloc}, Name, Name + " -12"); 984 985 Context.insert(MovwLabel); 986 _movw(Register, CRLower); 987 Context.insert(MovtLabel); 988 _movt(Register, CRUpper); 989 // PC = fake-def to keep liveness consistent. 990 Context.insert<InstFakeDef>(PC); 991 Context.insert(AddPcLabel); 992 Finish(PC); 993 } 994 995 void TargetARM32::translateO2() { 996 TimerMarker T(TimerStack::TT_O2, Func); 997 998 // TODO(stichnot): share passes with other targets? 999 // https://code.google.com/p/nativeclient/issues/detail?id=4094 1000 if (SandboxingType == ST_Nonsfi) { 1001 createGotPtr(); 1002 } 1003 genTargetHelperCalls(); 1004 findMaxStackOutArgsSize(); 1005 1006 // Do not merge Alloca instructions, and lay out the stack. 1007 static constexpr bool SortAndCombineAllocas = true; 1008 Func->processAllocas(SortAndCombineAllocas); 1009 Func->dump("After Alloca processing"); 1010 1011 if (!getFlags().getEnablePhiEdgeSplit()) { 1012 // Lower Phi instructions. 1013 Func->placePhiLoads(); 1014 if (Func->hasError()) 1015 return; 1016 Func->placePhiStores(); 1017 if (Func->hasError()) 1018 return; 1019 Func->deletePhis(); 1020 if (Func->hasError()) 1021 return; 1022 Func->dump("After Phi lowering"); 1023 } 1024 1025 // Address mode optimization. 1026 Func->getVMetadata()->init(VMK_SingleDefs); 1027 Func->doAddressOpt(); 1028 Func->materializeVectorShuffles(); 1029 1030 // Argument lowering 1031 Func->doArgLowering(); 1032 1033 // Target lowering. This requires liveness analysis for some parts of the 1034 // lowering decisions, such as compare/branch fusing. If non-lightweight 1035 // liveness analysis is used, the instructions need to be renumbered first. 1036 // TODO: This renumbering should only be necessary if we're actually 1037 // calculating live intervals, which we only do for register allocation. 1038 Func->renumberInstructions(); 1039 if (Func->hasError()) 1040 return; 1041 1042 // TODO: It should be sufficient to use the fastest liveness calculation, 1043 // i.e. livenessLightweight(). However, for some reason that slows down the 1044 // rest of the translation. Investigate. 1045 Func->liveness(Liveness_Basic); 1046 if (Func->hasError()) 1047 return; 1048 Func->dump("After ARM32 address mode opt"); 1049 1050 if (SandboxingType == ST_Nonsfi) { 1051 insertGotPtrInitPlaceholder(); 1052 } 1053 Func->genCode(); 1054 if (Func->hasError()) 1055 return; 1056 Func->dump("After ARM32 codegen"); 1057 1058 // Register allocation. This requires instruction renumbering and full 1059 // liveness analysis. 1060 Func->renumberInstructions(); 1061 if (Func->hasError()) 1062 return; 1063 Func->liveness(Liveness_Intervals); 1064 if (Func->hasError()) 1065 return; 1066 // The post-codegen dump is done here, after liveness analysis and associated 1067 // cleanup, to make the dump cleaner and more useful. 1068 Func->dump("After initial ARM32 codegen"); 1069 // Validate the live range computations. The expensive validation call is 1070 // deliberately only made when assertions are enabled. 1071 assert(Func->validateLiveness()); 1072 Func->getVMetadata()->init(VMK_All); 1073 regAlloc(RAK_Global); 1074 if (Func->hasError()) 1075 return; 1076 1077 copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); 1078 Func->dump("After linear scan regalloc"); 1079 1080 if (getFlags().getEnablePhiEdgeSplit()) { 1081 Func->advancedPhiLowering(); 1082 Func->dump("After advanced Phi lowering"); 1083 } 1084 1085 ForbidTemporaryWithoutReg _(this); 1086 1087 // Stack frame mapping. 1088 Func->genFrame(); 1089 if (Func->hasError()) 1090 return; 1091 Func->dump("After stack frame mapping"); 1092 1093 postLowerLegalization(); 1094 if (Func->hasError()) 1095 return; 1096 Func->dump("After postLowerLegalization"); 1097 1098 Func->contractEmptyNodes(); 1099 Func->reorderNodes(); 1100 1101 // Branch optimization. This needs to be done just before code emission. In 1102 // particular, no transformations that insert or reorder CfgNodes should be 1103 // done after branch optimization. We go ahead and do it before nop insertion 1104 // to reduce the amount of work needed for searching for opportunities. 1105 Func->doBranchOpt(); 1106 Func->dump("After branch optimization"); 1107 1108 // Nop insertion 1109 if (getFlags().getShouldDoNopInsertion()) { 1110 Func->doNopInsertion(); 1111 } 1112 } 1113 1114 void TargetARM32::translateOm1() { 1115 TimerMarker T(TimerStack::TT_Om1, Func); 1116 1117 // TODO(stichnot): share passes with other targets? 1118 if (SandboxingType == ST_Nonsfi) { 1119 createGotPtr(); 1120 } 1121 1122 genTargetHelperCalls(); 1123 findMaxStackOutArgsSize(); 1124 1125 // Do not merge Alloca instructions, and lay out the stack. 1126 static constexpr bool DontSortAndCombineAllocas = false; 1127 Func->processAllocas(DontSortAndCombineAllocas); 1128 Func->dump("After Alloca processing"); 1129 1130 Func->placePhiLoads(); 1131 if (Func->hasError()) 1132 return; 1133 Func->placePhiStores(); 1134 if (Func->hasError()) 1135 return; 1136 Func->deletePhis(); 1137 if (Func->hasError()) 1138 return; 1139 Func->dump("After Phi lowering"); 1140 1141 Func->doArgLowering(); 1142 1143 if (SandboxingType == ST_Nonsfi) { 1144 insertGotPtrInitPlaceholder(); 1145 } 1146 Func->genCode(); 1147 if (Func->hasError()) 1148 return; 1149 Func->dump("After initial ARM32 codegen"); 1150 1151 regAlloc(RAK_InfOnly); 1152 if (Func->hasError()) 1153 return; 1154 1155 copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); 1156 Func->dump("After regalloc of infinite-weight variables"); 1157 1158 ForbidTemporaryWithoutReg _(this); 1159 1160 Func->genFrame(); 1161 if (Func->hasError()) 1162 return; 1163 Func->dump("After stack frame mapping"); 1164 1165 postLowerLegalization(); 1166 if (Func->hasError()) 1167 return; 1168 Func->dump("After postLowerLegalization"); 1169 1170 // Nop insertion 1171 if (getFlags().getShouldDoNopInsertion()) { 1172 Func->doNopInsertion(); 1173 } 1174 } 1175 1176 uint32_t TargetARM32::getStackAlignment() const { 1177 return ARM32_STACK_ALIGNMENT_BYTES; 1178 } 1179 1180 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) { 1181 if (auto *Br = llvm::dyn_cast<InstARM32Br>(I)) { 1182 return Br->optimizeBranch(NextNode); 1183 } 1184 return false; 1185 } 1186 1187 const char *TargetARM32::getRegName(RegNumT RegNum, Type Ty) const { 1188 (void)Ty; 1189 return RegARM32::getRegName(RegNum); 1190 } 1191 1192 Variable *TargetARM32::getPhysicalRegister(RegNumT RegNum, Type Ty) { 1193 static const Type DefaultType[] = { 1194 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \ 1195 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \ 1196 (isFP32) \ 1197 ? IceType_f32 \ 1198 : ((isFP64) ? IceType_f64 : ((isVec128 ? IceType_v4i32 : IceType_i32))), 1199 REGARM32_TABLE 1200 #undef X 1201 }; 1202 1203 if (Ty == IceType_void) { 1204 assert(unsigned(RegNum) < llvm::array_lengthof(DefaultType)); 1205 Ty = DefaultType[RegNum]; 1206 } 1207 if (PhysicalRegisters[Ty].empty()) 1208 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM); 1209 assert(unsigned(RegNum) < PhysicalRegisters[Ty].size()); 1210 Variable *Reg = PhysicalRegisters[Ty][RegNum]; 1211 if (Reg == nullptr) { 1212 Reg = Func->makeVariable(Ty); 1213 Reg->setRegNum(RegNum); 1214 PhysicalRegisters[Ty][RegNum] = Reg; 1215 // Specially mark a named physical register as an "argument" so that it is 1216 // considered live upon function entry. Otherwise it's possible to get 1217 // liveness validation errors for saving callee-save registers. 1218 Func->addImplicitArg(Reg); 1219 // Don't bother tracking the live range of a named physical register. 1220 Reg->setIgnoreLiveness(); 1221 } 1222 return Reg; 1223 } 1224 1225 void TargetARM32::emitJumpTable(const Cfg *Func, 1226 const InstJumpTable *JumpTable) const { 1227 (void)Func; 1228 (void)JumpTable; 1229 UnimplementedError(getFlags()); 1230 } 1231 1232 void TargetARM32::emitVariable(const Variable *Var) const { 1233 if (!BuildDefs::dump()) 1234 return; 1235 Ostream &Str = Ctx->getStrEmit(); 1236 if (Var->hasReg()) { 1237 Str << getRegName(Var->getRegNum(), Var->getType()); 1238 return; 1239 } 1240 if (Var->mustHaveReg()) { 1241 llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() + 1242 ") has no register assigned - function " + 1243 Func->getFunctionName()); 1244 } 1245 assert(!Var->isRematerializable()); 1246 int32_t Offset = Var->getStackOffset(); 1247 auto BaseRegNum = Var->getBaseRegNum(); 1248 if (BaseRegNum.hasNoValue()) { 1249 BaseRegNum = getFrameOrStackReg(); 1250 } 1251 const Type VarTy = Var->getType(); 1252 Str << "[" << getRegName(BaseRegNum, VarTy); 1253 if (Offset != 0) { 1254 Str << ", #" << Offset; 1255 } 1256 Str << "]"; 1257 } 1258 1259 TargetARM32::CallingConv::CallingConv() 1260 : GPRegsUsed(RegARM32::Reg_NUM), 1261 GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()), 1262 I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()), 1263 VFPRegsUsed(RegARM32::Reg_NUM), 1264 FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()), 1265 FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()), 1266 Vec128Args(Vec128ArgInitializer.rbegin(), Vec128ArgInitializer.rend()) {} 1267 1268 bool TargetARM32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) { 1269 CfgVector<RegNumT> *Source; 1270 1271 switch (Ty) { 1272 default: { 1273 assert(isScalarIntegerType(Ty)); 1274 Source = &GPRArgs; 1275 } break; 1276 case IceType_i64: { 1277 Source = &I64Args; 1278 } break; 1279 } 1280 1281 discardUnavailableGPRsAndTheirAliases(Source); 1282 1283 if (Source->empty()) { 1284 GPRegsUsed.set(); 1285 return false; 1286 } 1287 1288 *Reg = Source->back(); 1289 // Note that we don't Source->pop_back() here. This is intentional. Notice how 1290 // we mark all of Reg's aliases as Used. So, for the next argument, 1291 // Source->back() is marked as unavailable, and it is thus implicitly popped 1292 // from the stack. 1293 GPRegsUsed |= RegisterAliases[*Reg]; 1294 return true; 1295 } 1296 1297 // GPR are not packed when passing parameters. Thus, a function foo(i32, i64, 1298 // i32) will have the first argument in r0, the second in r1-r2, and the third 1299 // on the stack. To model this behavior, whenever we pop a register from Regs, 1300 // we remove all of its aliases from the pool of available GPRs. This has the 1301 // effect of computing the "closure" on the GPR registers. 1302 void TargetARM32::CallingConv::discardUnavailableGPRsAndTheirAliases( 1303 CfgVector<RegNumT> *Regs) { 1304 while (!Regs->empty() && GPRegsUsed[Regs->back()]) { 1305 GPRegsUsed |= RegisterAliases[Regs->back()]; 1306 Regs->pop_back(); 1307 } 1308 } 1309 1310 bool TargetARM32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) { 1311 CfgVector<RegNumT> *Source; 1312 1313 switch (Ty) { 1314 default: { 1315 assert(isVectorType(Ty)); 1316 Source = &Vec128Args; 1317 } break; 1318 case IceType_f32: { 1319 Source = &FP32Args; 1320 } break; 1321 case IceType_f64: { 1322 Source = &FP64Args; 1323 } break; 1324 } 1325 1326 discardUnavailableVFPRegs(Source); 1327 1328 if (Source->empty()) { 1329 VFPRegsUsed.set(); 1330 return false; 1331 } 1332 1333 *Reg = Source->back(); 1334 VFPRegsUsed |= RegisterAliases[*Reg]; 1335 return true; 1336 } 1337 1338 // Arguments in VFP registers are not packed, so we don't mark the popped 1339 // registers' aliases as unavailable. 1340 void TargetARM32::CallingConv::discardUnavailableVFPRegs( 1341 CfgVector<RegNumT> *Regs) { 1342 while (!Regs->empty() && VFPRegsUsed[Regs->back()]) { 1343 Regs->pop_back(); 1344 } 1345 } 1346 1347 void TargetARM32::lowerArguments() { 1348 VarList &Args = Func->getArgs(); 1349 TargetARM32::CallingConv CC; 1350 1351 // For each register argument, replace Arg in the argument list with the home 1352 // register. Then generate an instruction in the prolog to copy the home 1353 // register to the assigned location of Arg. 1354 Context.init(Func->getEntryNode()); 1355 Context.setInsertPoint(Context.getCur()); 1356 1357 for (SizeT I = 0, E = Args.size(); I < E; ++I) { 1358 Variable *Arg = Args[I]; 1359 Type Ty = Arg->getType(); 1360 RegNumT RegNum; 1361 if (isScalarIntegerType(Ty)) { 1362 if (!CC.argInGPR(Ty, &RegNum)) { 1363 continue; 1364 } 1365 } else { 1366 if (!CC.argInVFP(Ty, &RegNum)) { 1367 continue; 1368 } 1369 } 1370 1371 Variable *RegisterArg = Func->makeVariable(Ty); 1372 if (BuildDefs::dump()) { 1373 RegisterArg->setName(Func, "home_reg:" + Arg->getName()); 1374 } 1375 RegisterArg->setIsArg(); 1376 Arg->setIsArg(false); 1377 Args[I] = RegisterArg; 1378 switch (Ty) { 1379 default: { RegisterArg->setRegNum(RegNum); } break; 1380 case IceType_i64: { 1381 auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg); 1382 RegisterArg64->initHiLo(Func); 1383 RegisterArg64->getLo()->setRegNum( 1384 RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(RegNum))); 1385 RegisterArg64->getHi()->setRegNum( 1386 RegNumT::fixme(RegARM32::getI64PairSecondGPRNum(RegNum))); 1387 } break; 1388 } 1389 Context.insert<InstAssign>(Arg, RegisterArg); 1390 } 1391 } 1392 1393 // Helper function for addProlog(). 1394 // 1395 // This assumes Arg is an argument passed on the stack. This sets the frame 1396 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an 1397 // I64 arg that has been split into Lo and Hi components, it calls itself 1398 // recursively on the components, taking care to handle Lo first because of the 1399 // little-endian architecture. Lastly, this function generates an instruction 1400 // to copy Arg into its assigned register if applicable. 1401 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr, 1402 size_t BasicFrameOffset, 1403 size_t *InArgsSizeBytes) { 1404 const Type Ty = Arg->getType(); 1405 *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty); 1406 1407 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) { 1408 Variable *const Lo = Arg64On32->getLo(); 1409 Variable *const Hi = Arg64On32->getHi(); 1410 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); 1411 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); 1412 return; 1413 } 1414 assert(Ty != IceType_i64); 1415 1416 const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes; 1417 *InArgsSizeBytes += typeWidthInBytesOnStack(Ty); 1418 1419 if (!Arg->hasReg()) { 1420 Arg->setStackOffset(ArgStackOffset); 1421 return; 1422 } 1423 1424 // If the argument variable has been assigned a register, we need to copy the 1425 // value from the stack slot. 1426 Variable *Parameter = Func->makeVariable(Ty); 1427 Parameter->setMustNotHaveReg(); 1428 Parameter->setStackOffset(ArgStackOffset); 1429 _mov(Arg, Parameter); 1430 } 1431 1432 Type TargetARM32::stackSlotType() { return IceType_i32; } 1433 1434 void TargetARM32::addProlog(CfgNode *Node) { 1435 // Stack frame layout: 1436 // 1437 // +------------------------+ 1438 // | 1. preserved registers | 1439 // +------------------------+ 1440 // | 2. padding | 1441 // +------------------------+ <--- FramePointer (if used) 1442 // | 3. global spill area | 1443 // +------------------------+ 1444 // | 4. padding | 1445 // +------------------------+ 1446 // | 5. local spill area | 1447 // +------------------------+ 1448 // | 6. padding | 1449 // +------------------------+ 1450 // | 7. allocas (variable) | 1451 // +------------------------+ 1452 // | 8. padding | 1453 // +------------------------+ 1454 // | 9. out args | 1455 // +------------------------+ <--- StackPointer 1456 // 1457 // The following variables record the size in bytes of the given areas: 1458 // * PreservedRegsSizeBytes: area 1 1459 // * SpillAreaPaddingBytes: area 2 1460 // * GlobalsSize: area 3 1461 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 1462 // * LocalsSpillAreaSize: area 5 1463 // * SpillAreaSizeBytes: areas 2 - 6, and 9 1464 // * MaxOutArgsSizeBytes: area 9 1465 // 1466 // Determine stack frame offsets for each Variable without a register 1467 // assignment. This can be done as one variable per stack slot. Or, do 1468 // coalescing by running the register allocator again with an infinite set of 1469 // registers (as a side effect, this gives variables a second chance at 1470 // physical register assignment). 1471 // 1472 // A middle ground approach is to leverage sparsity and allocate one block of 1473 // space on the frame for globals (variables with multi-block lifetime), and 1474 // one block to share for locals (single-block lifetime). 1475 1476 Context.init(Node); 1477 Context.setInsertPoint(Context.getCur()); 1478 1479 SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None); 1480 RegsUsed = SmallBitVector(CalleeSaves.size()); 1481 VarList SortedSpilledVariables; 1482 size_t GlobalsSize = 0; 1483 // If there is a separate locals area, this represents that area. Otherwise 1484 // it counts any variable not counted by GlobalsSize. 1485 SpillAreaSizeBytes = 0; 1486 // If there is a separate locals area, this specifies the alignment for it. 1487 uint32_t LocalsSlotsAlignmentBytes = 0; 1488 // The entire spill locations area gets aligned to largest natural alignment 1489 // of the variables that have a spill slot. 1490 uint32_t SpillAreaAlignmentBytes = 0; 1491 // For now, we don't have target-specific variables that need special 1492 // treatment (no stack-slot-linked SpillVariable type). 1493 std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) { 1494 static constexpr bool AssignStackSlot = false; 1495 static constexpr bool DontAssignStackSlot = !AssignStackSlot; 1496 if (llvm::isa<Variable64On32>(Var)) { 1497 return DontAssignStackSlot; 1498 } 1499 return AssignStackSlot; 1500 }; 1501 1502 // Compute the list of spilled variables and bounds for GlobalsSize, etc. 1503 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, 1504 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, 1505 &LocalsSlotsAlignmentBytes, TargetVarHook); 1506 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; 1507 SpillAreaSizeBytes += GlobalsSize; 1508 1509 // Add push instructions for preserved registers. On ARM, "push" can push a 1510 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has 1511 // callee-saved float/vector registers. 1512 // 1513 // The "vpush" instruction can handle a whole list of float/vector registers, 1514 // but it only handles contiguous sequences of registers by specifying the 1515 // start and the length. 1516 PreservedGPRs.reserve(CalleeSaves.size()); 1517 PreservedSRegs.reserve(CalleeSaves.size()); 1518 1519 // Consider FP and LR as callee-save / used as needed. 1520 if (UsesFramePointer) { 1521 if (RegsUsed[RegARM32::Reg_fp]) { 1522 llvm::report_fatal_error("Frame pointer has been used."); 1523 } 1524 CalleeSaves[RegARM32::Reg_fp] = true; 1525 RegsUsed[RegARM32::Reg_fp] = true; 1526 } 1527 if (!MaybeLeafFunc) { 1528 CalleeSaves[RegARM32::Reg_lr] = true; 1529 RegsUsed[RegARM32::Reg_lr] = true; 1530 } 1531 1532 // Make two passes over the used registers. The first pass records all the 1533 // used registers -- and their aliases. Then, we figure out which GPRs and 1534 // VFP S registers should be saved. We don't bother saving D/Q registers 1535 // because their uses are recorded as S regs uses. 1536 SmallBitVector ToPreserve(RegARM32::Reg_NUM); 1537 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 1538 if (NeedSandboxing && i == RegARM32::Reg_r9) { 1539 // r9 is never updated in sandboxed code. 1540 continue; 1541 } 1542 if (CalleeSaves[i] && RegsUsed[i]) { 1543 ToPreserve |= RegisterAliases[i]; 1544 } 1545 } 1546 1547 uint32_t NumCallee = 0; 1548 size_t PreservedRegsSizeBytes = 0; 1549 1550 // RegClasses is a tuple of 1551 // 1552 // <First Register in Class, Last Register in Class, Vector of Save Registers> 1553 // 1554 // We use this tuple to figure out which register we should push/pop during 1555 // prolog/epilog. 1556 using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>; 1557 const RegClassType RegClasses[] = { 1558 RegClassType(RegARM32::Reg_GPR_First, RegARM32::Reg_GPR_Last, 1559 &PreservedGPRs), 1560 RegClassType(RegARM32::Reg_SREG_First, RegARM32::Reg_SREG_Last, 1561 &PreservedSRegs)}; 1562 for (const auto &RegClass : RegClasses) { 1563 const uint32_t FirstRegInClass = std::get<0>(RegClass); 1564 const uint32_t LastRegInClass = std::get<1>(RegClass); 1565 VarList *const PreservedRegsInClass = std::get<2>(RegClass); 1566 for (uint32_t Reg = FirstRegInClass; Reg <= LastRegInClass; ++Reg) { 1567 if (!ToPreserve[Reg]) { 1568 continue; 1569 } 1570 ++NumCallee; 1571 Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg)); 1572 PreservedRegsSizeBytes += 1573 typeWidthInBytesOnStack(PhysicalRegister->getType()); 1574 PreservedRegsInClass->push_back(PhysicalRegister); 1575 } 1576 } 1577 1578 Ctx->statsUpdateRegistersSaved(NumCallee); 1579 if (!PreservedSRegs.empty()) 1580 _push(PreservedSRegs); 1581 if (!PreservedGPRs.empty()) 1582 _push(PreservedGPRs); 1583 1584 // Generate "mov FP, SP" if needed. 1585 if (UsesFramePointer) { 1586 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); 1587 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 1588 _mov(FP, SP); 1589 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). 1590 Context.insert<InstFakeUse>(FP); 1591 } 1592 1593 // Align the variables area. SpillAreaPaddingBytes is the size of the region 1594 // after the preserved registers and before the spill areas. 1595 // LocalsSlotsPaddingBytes is the amount of padding between the globals and 1596 // locals area if they are separate. 1597 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES); 1598 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); 1599 uint32_t SpillAreaPaddingBytes = 0; 1600 uint32_t LocalsSlotsPaddingBytes = 0; 1601 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, 1602 GlobalsSize, LocalsSlotsAlignmentBytes, 1603 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); 1604 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; 1605 uint32_t GlobalsAndSubsequentPaddingSize = 1606 GlobalsSize + LocalsSlotsPaddingBytes; 1607 1608 // Adds the out args space to the stack, and align SP if necessary. 1609 if (!NeedsStackAlignment) { 1610 SpillAreaSizeBytes += MaxOutArgsSizeBytes; 1611 } else { 1612 uint32_t StackOffset = PreservedRegsSizeBytes; 1613 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); 1614 StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes); 1615 SpillAreaSizeBytes = StackSize - StackOffset; 1616 } 1617 1618 // Combine fixed alloca with SpillAreaSize. 1619 SpillAreaSizeBytes += FixedAllocaSizeBytes; 1620 1621 // Generate "sub sp, SpillAreaSizeBytes" 1622 if (SpillAreaSizeBytes) { 1623 // Use the scratch register if needed to legalize the immediate. 1624 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), 1625 Legal_Reg | Legal_Flex, getReservedTmpReg()); 1626 Sandboxer(this).sub_sp(SubAmount); 1627 if (FixedAllocaAlignBytes > ARM32_STACK_ALIGNMENT_BYTES) { 1628 Sandboxer(this).align_sp(FixedAllocaAlignBytes); 1629 } 1630 } 1631 1632 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); 1633 1634 // Fill in stack offsets for stack args, and copy args into registers for 1635 // those that were register-allocated. Args are pushed right to left, so 1636 // Arg[0] is closest to the stack/frame pointer. 1637 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 1638 size_t BasicFrameOffset = PreservedRegsSizeBytes; 1639 if (!UsesFramePointer) 1640 BasicFrameOffset += SpillAreaSizeBytes; 1641 1642 materializeGotAddr(Node); 1643 1644 const VarList &Args = Func->getArgs(); 1645 size_t InArgsSizeBytes = 0; 1646 TargetARM32::CallingConv CC; 1647 for (Variable *Arg : Args) { 1648 RegNumT DummyReg; 1649 const Type Ty = Arg->getType(); 1650 1651 // Skip arguments passed in registers. 1652 if (isScalarIntegerType(Ty)) { 1653 if (CC.argInGPR(Ty, &DummyReg)) { 1654 continue; 1655 } 1656 } else { 1657 if (CC.argInVFP(Ty, &DummyReg)) { 1658 continue; 1659 } 1660 } 1661 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, &InArgsSizeBytes); 1662 } 1663 1664 // Fill in stack offsets for locals. 1665 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, 1666 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, 1667 UsesFramePointer); 1668 this->HasComputedFrame = true; 1669 1670 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { 1671 OstreamLocker _(Func->getContext()); 1672 Ostream &Str = Func->getContext()->getStrDump(); 1673 1674 Str << "Stack layout:\n"; 1675 uint32_t SPAdjustmentPaddingSize = 1676 SpillAreaSizeBytes - LocalsSpillAreaSize - 1677 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes - 1678 MaxOutArgsSizeBytes; 1679 Str << " in-args = " << InArgsSizeBytes << " bytes\n" 1680 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" 1681 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" 1682 << " globals spill area = " << GlobalsSize << " bytes\n" 1683 << " globals-locals spill areas intermediate padding = " 1684 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" 1685 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" 1686 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n"; 1687 1688 Str << "Stack details:\n" 1689 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n" 1690 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" 1691 << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n" 1692 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes 1693 << " bytes\n" 1694 << " is FP based = " << UsesFramePointer << "\n"; 1695 } 1696 } 1697 1698 void TargetARM32::addEpilog(CfgNode *Node) { 1699 InstList &Insts = Node->getInsts(); 1700 InstList::reverse_iterator RI, E; 1701 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { 1702 if (llvm::isa<InstARM32Ret>(*RI)) 1703 break; 1704 } 1705 if (RI == E) 1706 return; 1707 1708 // Convert the reverse_iterator position into its corresponding (forward) 1709 // iterator position. 1710 InstList::iterator InsertPoint = reverseToForwardIterator(RI); 1711 --InsertPoint; 1712 Context.init(Node); 1713 Context.setInsertPoint(InsertPoint); 1714 1715 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 1716 if (UsesFramePointer) { 1717 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); 1718 // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake 1719 // use of SP before the assignment of SP=FP keeps previous SP adjustments 1720 // from being dead-code eliminated. 1721 Context.insert<InstFakeUse>(SP); 1722 Sandboxer(this).reset_sp(FP); 1723 } else { 1724 // add SP, SpillAreaSizeBytes 1725 if (SpillAreaSizeBytes) { 1726 // Use the scratch register if needed to legalize the immediate. 1727 Operand *AddAmount = 1728 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), 1729 Legal_Reg | Legal_Flex, getReservedTmpReg()); 1730 Sandboxer(this).add_sp(AddAmount); 1731 } 1732 } 1733 1734 if (!PreservedGPRs.empty()) 1735 _pop(PreservedGPRs); 1736 if (!PreservedSRegs.empty()) 1737 _pop(PreservedSRegs); 1738 1739 if (!getFlags().getUseSandboxing()) 1740 return; 1741 1742 // Change the original ret instruction into a sandboxed return sequence. 1743 // 1744 // bundle_lock 1745 // bic lr, #0xc000000f 1746 // bx lr 1747 // bundle_unlock 1748 // 1749 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to 1750 // restrict to the lower 1GB as well. 1751 Variable *LR = getPhysicalRegister(RegARM32::Reg_lr); 1752 Variable *RetValue = nullptr; 1753 if (RI->getSrcSize()) 1754 RetValue = llvm::cast<Variable>(RI->getSrc(0)); 1755 1756 Sandboxer(this).ret(LR, RetValue); 1757 1758 RI->setDeleted(); 1759 } 1760 1761 bool TargetARM32::isLegalMemOffset(Type Ty, int32_t Offset) const { 1762 constexpr bool ZeroExt = false; 1763 return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset); 1764 } 1765 1766 Variable *TargetARM32::PostLoweringLegalizer::newBaseRegister( 1767 Variable *Base, int32_t Offset, RegNumT ScratchRegNum) { 1768 // Legalize will likely need a movw/movt combination, but if the top bits are 1769 // all 0 from negating the offset and subtracting, we could use that instead. 1770 const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0; 1771 Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum); 1772 if (ShouldSub) { 1773 Operand *OffsetVal = 1774 Target->legalize(Target->Ctx->getConstantInt32(-Offset), 1775 Legal_Reg | Legal_Flex, ScratchRegNum); 1776 Target->_sub(ScratchReg, Base, OffsetVal); 1777 } else { 1778 Operand *OffsetVal = 1779 Target->legalize(Target->Ctx->getConstantInt32(Offset), 1780 Legal_Reg | Legal_Flex, ScratchRegNum); 1781 Target->_add(ScratchReg, Base, OffsetVal); 1782 } 1783 1784 if (ScratchRegNum == Target->getReservedTmpReg()) { 1785 const bool BaseIsStackOrFramePtr = 1786 Base->getRegNum() == Target->getFrameOrStackReg(); 1787 // There is currently no code path that would trigger this assertion, so we 1788 // leave this assertion here in case it is ever violated. This is not a 1789 // fatal error (thus the use of assert() and not llvm::report_fatal_error) 1790 // as the program compiled by subzero will still work correctly. 1791 assert(BaseIsStackOrFramePtr); 1792 // Side-effect: updates TempBase to reflect the new Temporary. 1793 if (BaseIsStackOrFramePtr) { 1794 TempBaseReg = ScratchReg; 1795 TempBaseOffset = Offset; 1796 } else { 1797 TempBaseReg = nullptr; 1798 TempBaseOffset = 0; 1799 } 1800 } 1801 1802 return ScratchReg; 1803 } 1804 1805 OperandARM32Mem *TargetARM32::PostLoweringLegalizer::createMemOperand( 1806 Type Ty, Variable *Base, int32_t Offset, bool AllowOffsets) { 1807 assert(!Base->isRematerializable()); 1808 if (Offset == 0 || (AllowOffsets && Target->isLegalMemOffset(Ty, Offset))) { 1809 return OperandARM32Mem::create( 1810 Target->Func, Ty, Base, 1811 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)), 1812 OperandARM32Mem::Offset); 1813 } 1814 1815 if (!AllowOffsets || TempBaseReg == nullptr) { 1816 newBaseRegister(Base, Offset, Target->getReservedTmpReg()); 1817 } 1818 1819 int32_t OffsetDiff = Offset - TempBaseOffset; 1820 assert(AllowOffsets || OffsetDiff == 0); 1821 1822 if (!Target->isLegalMemOffset(Ty, OffsetDiff)) { 1823 newBaseRegister(Base, Offset, Target->getReservedTmpReg()); 1824 OffsetDiff = 0; 1825 } 1826 1827 assert(!TempBaseReg->isRematerializable()); 1828 return OperandARM32Mem::create( 1829 Target->Func, Ty, TempBaseReg, 1830 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(OffsetDiff)), 1831 OperandARM32Mem::Offset); 1832 } 1833 1834 void TargetARM32::PostLoweringLegalizer::resetTempBaseIfClobberedBy( 1835 const Inst *Instr) { 1836 bool ClobbersTempBase = false; 1837 if (TempBaseReg != nullptr) { 1838 Variable *Dest = Instr->getDest(); 1839 if (llvm::isa<InstARM32Call>(Instr)) { 1840 // The following assertion is an invariant, so we remove it from the if 1841 // test. If the invariant is ever broken/invalidated/changed, remember 1842 // to add it back to the if condition. 1843 assert(TempBaseReg->getRegNum() == Target->getReservedTmpReg()); 1844 // The linker may need to clobber IP if the call is too far from PC. Thus, 1845 // we assume IP will be overwritten. 1846 ClobbersTempBase = true; 1847 } else if (Dest != nullptr && 1848 Dest->getRegNum() == TempBaseReg->getRegNum()) { 1849 // Register redefinition. 1850 ClobbersTempBase = true; 1851 } 1852 } 1853 1854 if (ClobbersTempBase) { 1855 TempBaseReg = nullptr; 1856 TempBaseOffset = 0; 1857 } 1858 } 1859 1860 void TargetARM32::PostLoweringLegalizer::legalizeMov(InstARM32Mov *MovInstr) { 1861 Variable *Dest = MovInstr->getDest(); 1862 assert(Dest != nullptr); 1863 Type DestTy = Dest->getType(); 1864 assert(DestTy != IceType_i64); 1865 1866 Operand *Src = MovInstr->getSrc(0); 1867 Type SrcTy = Src->getType(); 1868 (void)SrcTy; 1869 assert(SrcTy != IceType_i64); 1870 1871 if (MovInstr->isMultiDest() || MovInstr->isMultiSource()) 1872 return; 1873 1874 bool Legalized = false; 1875 if (!Dest->hasReg()) { 1876 auto *SrcR = llvm::cast<Variable>(Src); 1877 assert(SrcR->hasReg()); 1878 assert(!SrcR->isRematerializable()); 1879 const int32_t Offset = Dest->getStackOffset(); 1880 // This is a _mov(Mem(), Variable), i.e., a store. 1881 TargetARM32::Sandboxer(Target) 1882 .str(SrcR, createMemOperand(DestTy, StackOrFrameReg, Offset), 1883 MovInstr->getPredicate()); 1884 // _str() does not have a Dest, so we add a fake-def(Dest). 1885 Target->Context.insert<InstFakeDef>(Dest); 1886 Legalized = true; 1887 } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) { 1888 if (Var->isRematerializable()) { 1889 // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable). 1890 1891 // ExtraOffset is only needed for frame-pointer based frames as we have 1892 // to account for spill storage. 1893 const int32_t ExtraOffset = (Var->getRegNum() == Target->getFrameReg()) 1894 ? Target->getFrameFixedAllocaOffset() 1895 : 0; 1896 1897 const int32_t Offset = Var->getStackOffset() + ExtraOffset; 1898 Variable *Base = Target->getPhysicalRegister(Var->getRegNum()); 1899 Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum()); 1900 Target->_mov(Dest, T); 1901 Legalized = true; 1902 } else { 1903 if (!Var->hasReg()) { 1904 // This is a _mov(Variable, Mem()), i.e., a load. 1905 const int32_t Offset = Var->getStackOffset(); 1906 TargetARM32::Sandboxer(Target) 1907 .ldr(Dest, createMemOperand(DestTy, StackOrFrameReg, Offset), 1908 MovInstr->getPredicate()); 1909 Legalized = true; 1910 } 1911 } 1912 } 1913 1914 if (Legalized) { 1915 if (MovInstr->isDestRedefined()) { 1916 Target->_set_dest_redefined(); 1917 } 1918 MovInstr->setDeleted(); 1919 } 1920 } 1921 1922 // ARM32 address modes: 1923 // ld/st i[8|16|32]: [reg], [reg +/- imm12], [pc +/- imm12], 1924 // [reg +/- reg << shamt5] 1925 // ld/st f[32|64] : [reg], [reg +/- imm8] , [pc +/- imm8] 1926 // ld/st vectors : [reg] 1927 // 1928 // For now, we don't handle address modes with Relocatables. 1929 namespace { 1930 // MemTraits contains per-type valid address mode information. 1931 #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits, \ 1932 ubits, rraddr, shaddr) \ 1933 static_assert(!(shaddr) || rraddr, "Check ICETYPEARM32_TABLE::" #tag); 1934 ICETYPEARM32_TABLE 1935 #undef X 1936 1937 static const struct { 1938 int32_t ValidImmMask; 1939 bool CanHaveImm; 1940 bool CanHaveIndex; 1941 bool CanHaveShiftedIndex; 1942 } MemTraits[] = { 1943 #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits, \ 1944 ubits, rraddr, shaddr) \ 1945 { (1 << ubits) - 1, (ubits) > 0, rraddr, shaddr, } \ 1946 , 1947 ICETYPEARM32_TABLE 1948 #undef X 1949 }; 1950 static constexpr SizeT MemTraitsSize = llvm::array_lengthof(MemTraits); 1951 } // end of anonymous namespace 1952 1953 OperandARM32Mem * 1954 TargetARM32::PostLoweringLegalizer::legalizeMemOperand(OperandARM32Mem *Mem, 1955 bool AllowOffsets) { 1956 assert(!Mem->isRegReg() || !Mem->getIndex()->isRematerializable()); 1957 assert( 1958 Mem->isRegReg() || 1959 Target->isLegalMemOffset(Mem->getType(), Mem->getOffset()->getValue())); 1960 1961 bool Legalized = false; 1962 Variable *Base = Mem->getBase(); 1963 int32_t Offset = Mem->isRegReg() ? 0 : Mem->getOffset()->getValue(); 1964 if (Base->isRematerializable()) { 1965 const int32_t ExtraOffset = (Base->getRegNum() == Target->getFrameReg()) 1966 ? Target->getFrameFixedAllocaOffset() 1967 : 0; 1968 Offset += Base->getStackOffset() + ExtraOffset; 1969 Base = Target->getPhysicalRegister(Base->getRegNum()); 1970 assert(!Base->isRematerializable()); 1971 Legalized = true; 1972 } 1973 1974 if (!Legalized && !Target->NeedSandboxing) { 1975 return nullptr; 1976 } 1977 1978 if (!Mem->isRegReg()) { 1979 return createMemOperand(Mem->getType(), Base, Offset, AllowOffsets); 1980 } 1981 1982 if (Target->NeedSandboxing) { 1983 llvm::report_fatal_error("Reg-Reg address mode is not allowed."); 1984 } 1985 1986 assert(MemTraits[Mem->getType()].CanHaveIndex); 1987 1988 if (Offset != 0) { 1989 if (TempBaseReg == nullptr) { 1990 Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg()); 1991 } else { 1992 uint32_t Imm8, Rotate; 1993 const int32_t OffsetDiff = Offset - TempBaseOffset; 1994 if (OffsetDiff == 0) { 1995 Base = TempBaseReg; 1996 } else if (OperandARM32FlexImm::canHoldImm(OffsetDiff, &Rotate, &Imm8)) { 1997 auto *OffsetDiffF = OperandARM32FlexImm::create( 1998 Target->Func, IceType_i32, Imm8, Rotate); 1999 Target->_add(TempBaseReg, TempBaseReg, OffsetDiffF); 2000 TempBaseOffset += OffsetDiff; 2001 Base = TempBaseReg; 2002 } else if (OperandARM32FlexImm::canHoldImm(-OffsetDiff, &Rotate, &Imm8)) { 2003 auto *OffsetDiffF = OperandARM32FlexImm::create( 2004 Target->Func, IceType_i32, Imm8, Rotate); 2005 Target->_sub(TempBaseReg, TempBaseReg, OffsetDiffF); 2006 TempBaseOffset += OffsetDiff; 2007 Base = TempBaseReg; 2008 } else { 2009 Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg()); 2010 } 2011 } 2012 } 2013 2014 return OperandARM32Mem::create(Target->Func, Mem->getType(), Base, 2015 Mem->getIndex(), Mem->getShiftOp(), 2016 Mem->getShiftAmt(), Mem->getAddrMode()); 2017 } 2018 2019 void TargetARM32::postLowerLegalization() { 2020 // If a stack variable's frame offset doesn't fit, convert from: 2021 // ldr X, OFF[SP] 2022 // to: 2023 // movw/movt TMP, OFF_PART 2024 // add TMP, TMP, SP 2025 // ldr X, OFF_MORE[TMP] 2026 // 2027 // This is safe because we have reserved TMP, and add for ARM does not 2028 // clobber the flags register. 2029 Func->dump("Before postLowerLegalization"); 2030 assert(hasComputedFrame()); 2031 // Do a fairly naive greedy clustering for now. Pick the first stack slot 2032 // that's out of bounds and make a new base reg using the architecture's temp 2033 // register. If that works for the next slot, then great. Otherwise, create a 2034 // new base register, clobbering the previous base register. Never share a 2035 // base reg across different basic blocks. This isn't ideal if local and 2036 // multi-block variables are far apart and their references are interspersed. 2037 // It may help to be more coordinated about assign stack slot numbers and may 2038 // help to assign smaller offsets to higher-weight variables so that they 2039 // don't depend on this legalization. 2040 for (CfgNode *Node : Func->getNodes()) { 2041 Context.init(Node); 2042 // One legalizer per basic block, otherwise we would share the Temporary 2043 // Base Register between basic blocks. 2044 PostLoweringLegalizer Legalizer(this); 2045 while (!Context.atEnd()) { 2046 PostIncrLoweringContext PostIncrement(Context); 2047 Inst *CurInstr = iteratorToInst(Context.getCur()); 2048 2049 // Check if the previous TempBaseReg is clobbered, and reset if needed. 2050 Legalizer.resetTempBaseIfClobberedBy(CurInstr); 2051 2052 if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) { 2053 Legalizer.legalizeMov(MovInstr); 2054 } else if (auto *LdrInstr = llvm::dyn_cast<InstARM32Ldr>(CurInstr)) { 2055 if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand( 2056 llvm::cast<OperandARM32Mem>(LdrInstr->getSrc(0)))) { 2057 Sandboxer(this) 2058 .ldr(CurInstr->getDest(), LegalMem, LdrInstr->getPredicate()); 2059 CurInstr->setDeleted(); 2060 } 2061 } else if (auto *LdrexInstr = llvm::dyn_cast<InstARM32Ldrex>(CurInstr)) { 2062 constexpr bool DisallowOffsetsBecauseLdrex = false; 2063 if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand( 2064 llvm::cast<OperandARM32Mem>(LdrexInstr->getSrc(0)), 2065 DisallowOffsetsBecauseLdrex)) { 2066 Sandboxer(this) 2067 .ldrex(CurInstr->getDest(), LegalMem, LdrexInstr->getPredicate()); 2068 CurInstr->setDeleted(); 2069 } 2070 } else if (auto *StrInstr = llvm::dyn_cast<InstARM32Str>(CurInstr)) { 2071 if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand( 2072 llvm::cast<OperandARM32Mem>(StrInstr->getSrc(1)))) { 2073 Sandboxer(this).str(llvm::cast<Variable>(CurInstr->getSrc(0)), 2074 LegalMem, StrInstr->getPredicate()); 2075 CurInstr->setDeleted(); 2076 } 2077 } else if (auto *StrexInstr = llvm::dyn_cast<InstARM32Strex>(CurInstr)) { 2078 constexpr bool DisallowOffsetsBecauseStrex = false; 2079 if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand( 2080 llvm::cast<OperandARM32Mem>(StrexInstr->getSrc(1)), 2081 DisallowOffsetsBecauseStrex)) { 2082 Sandboxer(this).strex(CurInstr->getDest(), 2083 llvm::cast<Variable>(CurInstr->getSrc(0)), 2084 LegalMem, StrexInstr->getPredicate()); 2085 CurInstr->setDeleted(); 2086 } 2087 } 2088 2089 // Sanity-check: the Legalizer will either have no Temp, or it will be 2090 // bound to IP. 2091 Legalizer.assertNoTempOrAssignedToIP(); 2092 } 2093 } 2094 } 2095 2096 Operand *TargetARM32::loOperand(Operand *Operand) { 2097 assert(Operand->getType() == IceType_i64); 2098 if (Operand->getType() != IceType_i64) 2099 return Operand; 2100 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) 2101 return Var64On32->getLo(); 2102 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) 2103 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue())); 2104 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) { 2105 // Conservatively disallow memory operands with side-effects (pre/post 2106 // increment) in case of duplication. 2107 assert(Mem->getAddrMode() == OperandARM32Mem::Offset || 2108 Mem->getAddrMode() == OperandARM32Mem::NegOffset); 2109 if (Mem->isRegReg()) { 2110 Variable *IndexR = legalizeToReg(Mem->getIndex()); 2111 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(), IndexR, 2112 Mem->getShiftOp(), Mem->getShiftAmt(), 2113 Mem->getAddrMode()); 2114 } else { 2115 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(), 2116 Mem->getOffset(), Mem->getAddrMode()); 2117 } 2118 } 2119 llvm::report_fatal_error("Unsupported operand type"); 2120 return nullptr; 2121 } 2122 2123 Operand *TargetARM32::hiOperand(Operand *Operand) { 2124 assert(Operand->getType() == IceType_i64); 2125 if (Operand->getType() != IceType_i64) 2126 return Operand; 2127 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) 2128 return Var64On32->getHi(); 2129 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 2130 return Ctx->getConstantInt32( 2131 static_cast<uint32_t>(Const->getValue() >> 32)); 2132 } 2133 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) { 2134 // Conservatively disallow memory operands with side-effects in case of 2135 // duplication. 2136 assert(Mem->getAddrMode() == OperandARM32Mem::Offset || 2137 Mem->getAddrMode() == OperandARM32Mem::NegOffset); 2138 const Type SplitType = IceType_i32; 2139 if (Mem->isRegReg()) { 2140 // We have to make a temp variable T, and add 4 to either Base or Index. 2141 // The Index may be shifted, so adding 4 can mean something else. Thus, 2142 // prefer T := Base + 4, and use T as the new Base. 2143 Variable *Base = Mem->getBase(); 2144 Constant *Four = Ctx->getConstantInt32(4); 2145 Variable *NewBase = Func->makeVariable(Base->getType()); 2146 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase, 2147 Base, Four)); 2148 Variable *BaseR = legalizeToReg(NewBase); 2149 Variable *IndexR = legalizeToReg(Mem->getIndex()); 2150 return OperandARM32Mem::create(Func, SplitType, BaseR, IndexR, 2151 Mem->getShiftOp(), Mem->getShiftAmt(), 2152 Mem->getAddrMode()); 2153 } else { 2154 Variable *Base = Mem->getBase(); 2155 ConstantInteger32 *Offset = Mem->getOffset(); 2156 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4)); 2157 int32_t NextOffsetVal = Offset->getValue() + 4; 2158 constexpr bool ZeroExt = false; 2159 if (!OperandARM32Mem::canHoldOffset(SplitType, ZeroExt, NextOffsetVal)) { 2160 // We have to make a temp variable and add 4 to either Base or Offset. 2161 // If we add 4 to Offset, this will convert a non-RegReg addressing 2162 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows 2163 // RegReg addressing modes, prefer adding to base and replacing 2164 // instead. Thus we leave the old offset alone. 2165 Constant *_4 = Ctx->getConstantInt32(4); 2166 Variable *NewBase = Func->makeVariable(Base->getType()); 2167 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, 2168 NewBase, Base, _4)); 2169 Base = NewBase; 2170 } else { 2171 Offset = 2172 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal)); 2173 } 2174 Variable *BaseR = legalizeToReg(Base); 2175 return OperandARM32Mem::create(Func, SplitType, BaseR, Offset, 2176 Mem->getAddrMode()); 2177 } 2178 } 2179 llvm::report_fatal_error("Unsupported operand type"); 2180 return nullptr; 2181 } 2182 2183 SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include, 2184 RegSetMask Exclude) const { 2185 SmallBitVector Registers(RegARM32::Reg_NUM); 2186 2187 for (uint32_t i = 0; i < RegARM32::Reg_NUM; ++i) { 2188 const auto &Entry = RegARM32::RegTable[i]; 2189 if (Entry.Scratch && (Include & RegSet_CallerSave)) 2190 Registers[i] = true; 2191 if (Entry.Preserved && (Include & RegSet_CalleeSave)) 2192 Registers[i] = true; 2193 if (Entry.StackPtr && (Include & RegSet_StackPointer)) 2194 Registers[i] = true; 2195 if (Entry.FramePtr && (Include & RegSet_FramePointer)) 2196 Registers[i] = true; 2197 if (Entry.Scratch && (Exclude & RegSet_CallerSave)) 2198 Registers[i] = false; 2199 if (Entry.Preserved && (Exclude & RegSet_CalleeSave)) 2200 Registers[i] = false; 2201 if (Entry.StackPtr && (Exclude & RegSet_StackPointer)) 2202 Registers[i] = false; 2203 if (Entry.FramePtr && (Exclude & RegSet_FramePointer)) 2204 Registers[i] = false; 2205 } 2206 2207 return Registers; 2208 } 2209 2210 void TargetARM32::lowerAlloca(const InstAlloca *Instr) { 2211 // Conservatively require the stack to be aligned. Some stack adjustment 2212 // operations implemented below assume that the stack is aligned before the 2213 // alloca. All the alloca code ensures that the stack alignment is preserved 2214 // after the alloca. The stack alignment restriction can be relaxed in some 2215 // cases. 2216 NeedsStackAlignment = true; 2217 2218 // For default align=0, set it to the real value 1, to avoid any 2219 // bit-manipulation problems below. 2220 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes()); 2221 2222 // LLVM enforces power of 2 alignment. 2223 assert(llvm::isPowerOf2_32(AlignmentParam)); 2224 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES)); 2225 2226 const uint32_t Alignment = 2227 std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES); 2228 const bool OverAligned = Alignment > ARM32_STACK_ALIGNMENT_BYTES; 2229 const bool OptM1 = Func->getOptLevel() == Opt_m1; 2230 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset(); 2231 const bool UseFramePointer = 2232 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; 2233 2234 if (UseFramePointer) 2235 setHasFramePointer(); 2236 2237 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 2238 if (OverAligned) { 2239 Sandboxer(this).align_sp(Alignment); 2240 } 2241 2242 Variable *Dest = Instr->getDest(); 2243 Operand *TotalSize = Instr->getSizeInBytes(); 2244 2245 if (const auto *ConstantTotalSize = 2246 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { 2247 const uint32_t Value = 2248 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment); 2249 // Constant size alloca. 2250 if (!UseFramePointer) { 2251 // If we don't need a Frame Pointer, this alloca has a known offset to the 2252 // stack pointer. We don't need adjust the stack pointer, nor assign any 2253 // value to Dest, as Dest is rematerializable. 2254 assert(Dest->isRematerializable()); 2255 FixedAllocaSizeBytes += Value; 2256 Context.insert<InstFakeDef>(Dest); 2257 return; 2258 } 2259 2260 // If a frame pointer is required, then we need to store the alloca'd result 2261 // in Dest. 2262 Operand *SubAmountRF = 2263 legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex); 2264 Sandboxer(this).sub_sp(SubAmountRF); 2265 } else { 2266 // Non-constant sizes need to be adjusted to the next highest multiple of 2267 // the required alignment at runtime. 2268 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex); 2269 Variable *T = makeReg(IceType_i32); 2270 _mov(T, TotalSize); 2271 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1)); 2272 _add(T, T, AddAmount); 2273 alignRegisterPow2(T, Alignment); 2274 Sandboxer(this).sub_sp(T); 2275 } 2276 2277 // Adds back a few bytes to SP to account for the out args area. 2278 Variable *T = SP; 2279 if (MaxOutArgsSizeBytes != 0) { 2280 T = makeReg(getPointerType()); 2281 Operand *OutArgsSizeRF = legalize( 2282 Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex); 2283 _add(T, SP, OutArgsSizeRF); 2284 } 2285 2286 _mov(Dest, T); 2287 } 2288 2289 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { 2290 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi)) 2291 return; 2292 Variable *SrcLoReg = legalizeToReg(SrcLo); 2293 switch (Ty) { 2294 default: 2295 llvm_unreachable( 2296 ("Unexpected type in div0Check: " + typeStdString(Ty)).c_str()); 2297 case IceType_i8: 2298 case IceType_i16: { 2299 Operand *ShAmtImm = shAmtImm(32 - getScalarIntBitWidth(Ty)); 2300 Variable *T = makeReg(IceType_i32); 2301 _lsls(T, SrcLoReg, ShAmtImm); 2302 Context.insert<InstFakeUse>(T); 2303 } break; 2304 case IceType_i32: { 2305 _tst(SrcLoReg, SrcLoReg); 2306 break; 2307 } 2308 case IceType_i64: { 2309 Variable *T = makeReg(IceType_i32); 2310 _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex)); 2311 // T isn't going to be used, but we need the side-effect of setting flags 2312 // from this operation. 2313 Context.insert<InstFakeUse>(T); 2314 } 2315 } 2316 auto *Label = InstARM32Label::create(Func, this); 2317 _br(Label, CondARM32::NE); 2318 _trap(); 2319 Context.insert(Label); 2320 } 2321 2322 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, 2323 Operand *Src1, ExtInstr ExtFunc, 2324 DivInstr DivFunc, bool IsRemainder) { 2325 div0Check(Dest->getType(), Src1, nullptr); 2326 Variable *Src1R = legalizeToReg(Src1); 2327 Variable *T0R = Src0R; 2328 Variable *T1R = Src1R; 2329 if (Dest->getType() != IceType_i32) { 2330 T0R = makeReg(IceType_i32); 2331 (this->*ExtFunc)(T0R, Src0R, CondARM32::AL); 2332 T1R = makeReg(IceType_i32); 2333 (this->*ExtFunc)(T1R, Src1R, CondARM32::AL); 2334 } 2335 if (hasCPUFeature(TargetARM32Features::HWDivArm)) { 2336 (this->*DivFunc)(T, T0R, T1R, CondARM32::AL); 2337 if (IsRemainder) { 2338 Variable *T2 = makeReg(IceType_i32); 2339 _mls(T2, T, T1R, T0R); 2340 T = T2; 2341 } 2342 _mov(Dest, T); 2343 } else { 2344 llvm::report_fatal_error("div should have already been turned into a call"); 2345 } 2346 } 2347 2348 TargetARM32::SafeBoolChain 2349 TargetARM32::lowerInt1Arithmetic(const InstArithmetic *Instr) { 2350 Variable *Dest = Instr->getDest(); 2351 assert(Dest->getType() == IceType_i1); 2352 2353 // So folding didn't work for Instr. Not a problem: We just need to 2354 // materialize the Sources, and perform the operation. We create regular 2355 // Variables (and not infinite-weight ones) because this call might recurse a 2356 // lot, and we might end up with tons of infinite weight temporaries. 2357 assert(Instr->getSrcSize() == 2); 2358 Variable *Src0 = Func->makeVariable(IceType_i1); 2359 SafeBoolChain Src0Safe = lowerInt1(Src0, Instr->getSrc(0)); 2360 2361 Operand *Src1 = Instr->getSrc(1); 2362 SafeBoolChain Src1Safe = SBC_Yes; 2363 2364 if (!llvm::isa<Constant>(Src1)) { 2365 Variable *Src1V = Func->makeVariable(IceType_i1); 2366 Src1Safe = lowerInt1(Src1V, Src1); 2367 Src1 = Src1V; 2368 } 2369 2370 Variable *T = makeReg(IceType_i1); 2371 Src0 = legalizeToReg(Src0); 2372 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); 2373 switch (Instr->getOp()) { 2374 default: 2375 // If this Unreachable is ever executed, add the offending operation to 2376 // the list of valid consumers. 2377 llvm::report_fatal_error("Unhandled i1 Op"); 2378 case InstArithmetic::And: 2379 _and(T, Src0, Src1RF); 2380 break; 2381 case InstArithmetic::Or: 2382 _orr(T, Src0, Src1RF); 2383 break; 2384 case InstArithmetic::Xor: 2385 _eor(T, Src0, Src1RF); 2386 break; 2387 } 2388 _mov(Dest, T); 2389 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No; 2390 } 2391 2392 namespace { 2393 // NumericOperands is used during arithmetic/icmp lowering for constant folding. 2394 // It holds the two sources operands, and maintains some state as to whether one 2395 // of them is a constant. If one of the operands is a constant, then it will be 2396 // be stored as the operation's second source, with a bit indicating whether the 2397 // operands were swapped. 2398 // 2399 // The class is split into a base class with operand type-independent methods, 2400 // and a derived, templated class, for each type of operand we want to fold 2401 // constants for: 2402 // 2403 // NumericOperandsBase --> NumericOperands<ConstantFloat> 2404 // --> NumericOperands<ConstantDouble> 2405 // --> NumericOperands<ConstantInt32> 2406 // 2407 // NumericOperands<ConstantInt32> also exposes helper methods for emitting 2408 // inverted/negated immediates. 2409 class NumericOperandsBase { 2410 NumericOperandsBase() = delete; 2411 NumericOperandsBase(const NumericOperandsBase &) = delete; 2412 NumericOperandsBase &operator=(const NumericOperandsBase &) = delete; 2413 2414 public: 2415 NumericOperandsBase(Operand *S0, Operand *S1) 2416 : Src0(NonConstOperand(S0, S1)), Src1(ConstOperand(S0, S1)), 2417 Swapped(Src0 == S1 && S0 != S1) { 2418 assert(Src0 != nullptr); 2419 assert(Src1 != nullptr); 2420 assert(Src0 != Src1 || S0 == S1); 2421 } 2422 2423 bool hasConstOperand() const { 2424 return llvm::isa<Constant>(Src1) && !llvm::isa<ConstantRelocatable>(Src1); 2425 } 2426 2427 bool swappedOperands() const { return Swapped; } 2428 2429 Variable *src0R(TargetARM32 *Target) const { 2430 return legalizeToReg(Target, Src0); 2431 } 2432 2433 Variable *unswappedSrc0R(TargetARM32 *Target) const { 2434 return legalizeToReg(Target, Swapped ? Src1 : Src0); 2435 } 2436 2437 Operand *src1RF(TargetARM32 *Target) const { 2438 return legalizeToRegOrFlex(Target, Src1); 2439 } 2440 2441 Variable *unswappedSrc1R(TargetARM32 *Target) const { 2442 return legalizeToReg(Target, Swapped ? Src0 : Src1); 2443 } 2444 2445 Operand *src1() const { return Src1; } 2446 2447 protected: 2448 Operand *const Src0; 2449 Operand *const Src1; 2450 const bool Swapped; 2451 2452 static Variable *legalizeToReg(TargetARM32 *Target, Operand *Src) { 2453 return Target->legalizeToReg(Src); 2454 } 2455 2456 static Operand *legalizeToRegOrFlex(TargetARM32 *Target, Operand *Src) { 2457 return Target->legalize(Src, 2458 TargetARM32::Legal_Reg | TargetARM32::Legal_Flex); 2459 } 2460 2461 private: 2462 static Operand *NonConstOperand(Operand *S0, Operand *S1) { 2463 if (!llvm::isa<Constant>(S0)) 2464 return S0; 2465 if (!llvm::isa<Constant>(S1)) 2466 return S1; 2467 if (llvm::isa<ConstantRelocatable>(S1) && 2468 !llvm::isa<ConstantRelocatable>(S0)) 2469 return S1; 2470 return S0; 2471 } 2472 2473 static Operand *ConstOperand(Operand *S0, Operand *S1) { 2474 if (!llvm::isa<Constant>(S0)) 2475 return S1; 2476 if (!llvm::isa<Constant>(S1)) 2477 return S0; 2478 if (llvm::isa<ConstantRelocatable>(S1) && 2479 !llvm::isa<ConstantRelocatable>(S0)) 2480 return S0; 2481 return S1; 2482 } 2483 }; 2484 2485 template <typename C> class NumericOperands : public NumericOperandsBase { 2486 NumericOperands() = delete; 2487 NumericOperands(const NumericOperands &) = delete; 2488 NumericOperands &operator=(const NumericOperands &) = delete; 2489 2490 public: 2491 NumericOperands(Operand *S0, Operand *S1) : NumericOperandsBase(S0, S1) { 2492 assert(!hasConstOperand() || llvm::isa<C>(this->Src1)); 2493 } 2494 2495 typename C::PrimType getConstantValue() const { 2496 return llvm::cast<C>(Src1)->getValue(); 2497 } 2498 }; 2499 2500 using FloatOperands = NumericOperands<ConstantFloat>; 2501 using DoubleOperands = NumericOperands<ConstantDouble>; 2502 2503 class Int32Operands : public NumericOperands<ConstantInteger32> { 2504 Int32Operands() = delete; 2505 Int32Operands(const Int32Operands &) = delete; 2506 Int32Operands &operator=(const Int32Operands &) = delete; 2507 2508 public: 2509 Int32Operands(Operand *S0, Operand *S1) : NumericOperands(S0, S1) {} 2510 2511 Operand *unswappedSrc1RShAmtImm(TargetARM32 *Target) const { 2512 if (!swappedOperands() && hasConstOperand()) { 2513 return Target->shAmtImm(getConstantValue() & 0x1F); 2514 } 2515 return legalizeToReg(Target, Swapped ? Src0 : Src1); 2516 } 2517 2518 bool isSrc1ImmediateZero() const { 2519 if (!swappedOperands() && hasConstOperand()) { 2520 return getConstantValue() == 0; 2521 } 2522 return false; 2523 } 2524 2525 bool immediateIsFlexEncodable() const { 2526 uint32_t Rotate, Imm8; 2527 return OperandARM32FlexImm::canHoldImm(getConstantValue(), &Rotate, &Imm8); 2528 } 2529 2530 bool negatedImmediateIsFlexEncodable() const { 2531 uint32_t Rotate, Imm8; 2532 return OperandARM32FlexImm::canHoldImm( 2533 -static_cast<int32_t>(getConstantValue()), &Rotate, &Imm8); 2534 } 2535 2536 Operand *negatedSrc1F(TargetARM32 *Target) const { 2537 return legalizeToRegOrFlex(Target, 2538 Target->getCtx()->getConstantInt32( 2539 -static_cast<int32_t>(getConstantValue()))); 2540 } 2541 2542 bool invertedImmediateIsFlexEncodable() const { 2543 uint32_t Rotate, Imm8; 2544 return OperandARM32FlexImm::canHoldImm( 2545 ~static_cast<uint32_t>(getConstantValue()), &Rotate, &Imm8); 2546 } 2547 2548 Operand *invertedSrc1F(TargetARM32 *Target) const { 2549 return legalizeToRegOrFlex(Target, 2550 Target->getCtx()->getConstantInt32( 2551 ~static_cast<uint32_t>(getConstantValue()))); 2552 } 2553 }; 2554 } // end of anonymous namespace 2555 2556 void TargetARM32::preambleDivRem(const InstCall *Instr) { 2557 Operand *Src1 = Instr->getArg(1); 2558 2559 switch (Src1->getType()) { 2560 default: 2561 llvm::report_fatal_error("Invalid type for idiv."); 2562 case IceType_i64: { 2563 if (auto *C = llvm::dyn_cast<ConstantInteger64>(Src1)) { 2564 if (C->getValue() == 0) { 2565 _trap(); 2566 return; 2567 } 2568 } 2569 div0Check(IceType_i64, loOperand(Src1), hiOperand(Src1)); 2570 return; 2571 } 2572 case IceType_i32: { 2573 // Src0 and Src1 have already been appropriately extended to an i32, so we 2574 // don't check for i8 and i16. 2575 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { 2576 if (C->getValue() == 0) { 2577 _trap(); 2578 return; 2579 } 2580 } 2581 div0Check(IceType_i32, Src1, nullptr); 2582 return; 2583 } 2584 } 2585 } 2586 2587 void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op, 2588 Variable *Dest, Operand *Src0, 2589 Operand *Src1) { 2590 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1)); 2591 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1)); 2592 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands()); 2593 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand()); 2594 2595 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2596 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2597 Variable *T_Lo = makeReg(DestLo->getType()); 2598 Variable *T_Hi = makeReg(DestHi->getType()); 2599 2600 switch (Op) { 2601 case InstArithmetic::_num: 2602 llvm::report_fatal_error("Unknown arithmetic operator"); 2603 return; 2604 case InstArithmetic::Add: { 2605 Variable *Src0LoR = SrcsLo.src0R(this); 2606 Operand *Src1LoRF = SrcsLo.src1RF(this); 2607 Variable *Src0HiR = SrcsHi.src0R(this); 2608 Operand *Src1HiRF = SrcsHi.src1RF(this); 2609 _adds(T_Lo, Src0LoR, Src1LoRF); 2610 _mov(DestLo, T_Lo); 2611 _adc(T_Hi, Src0HiR, Src1HiRF); 2612 _mov(DestHi, T_Hi); 2613 return; 2614 } 2615 case InstArithmetic::And: { 2616 Variable *Src0LoR = SrcsLo.src0R(this); 2617 Operand *Src1LoRF = SrcsLo.src1RF(this); 2618 Variable *Src0HiR = SrcsHi.src0R(this); 2619 Operand *Src1HiRF = SrcsHi.src1RF(this); 2620 _and(T_Lo, Src0LoR, Src1LoRF); 2621 _mov(DestLo, T_Lo); 2622 _and(T_Hi, Src0HiR, Src1HiRF); 2623 _mov(DestHi, T_Hi); 2624 return; 2625 } 2626 case InstArithmetic::Or: { 2627 Variable *Src0LoR = SrcsLo.src0R(this); 2628 Operand *Src1LoRF = SrcsLo.src1RF(this); 2629 Variable *Src0HiR = SrcsHi.src0R(this); 2630 Operand *Src1HiRF = SrcsHi.src1RF(this); 2631 _orr(T_Lo, Src0LoR, Src1LoRF); 2632 _mov(DestLo, T_Lo); 2633 _orr(T_Hi, Src0HiR, Src1HiRF); 2634 _mov(DestHi, T_Hi); 2635 return; 2636 } 2637 case InstArithmetic::Xor: { 2638 Variable *Src0LoR = SrcsLo.src0R(this); 2639 Operand *Src1LoRF = SrcsLo.src1RF(this); 2640 Variable *Src0HiR = SrcsHi.src0R(this); 2641 Operand *Src1HiRF = SrcsHi.src1RF(this); 2642 _eor(T_Lo, Src0LoR, Src1LoRF); 2643 _mov(DestLo, T_Lo); 2644 _eor(T_Hi, Src0HiR, Src1HiRF); 2645 _mov(DestHi, T_Hi); 2646 return; 2647 } 2648 case InstArithmetic::Sub: { 2649 Variable *Src0LoR = SrcsLo.src0R(this); 2650 Operand *Src1LoRF = SrcsLo.src1RF(this); 2651 Variable *Src0HiR = SrcsHi.src0R(this); 2652 Operand *Src1HiRF = SrcsHi.src1RF(this); 2653 if (SrcsLo.swappedOperands()) { 2654 _rsbs(T_Lo, Src0LoR, Src1LoRF); 2655 _mov(DestLo, T_Lo); 2656 _rsc(T_Hi, Src0HiR, Src1HiRF); 2657 _mov(DestHi, T_Hi); 2658 } else { 2659 _subs(T_Lo, Src0LoR, Src1LoRF); 2660 _mov(DestLo, T_Lo); 2661 _sbc(T_Hi, Src0HiR, Src1HiRF); 2662 _mov(DestHi, T_Hi); 2663 } 2664 return; 2665 } 2666 case InstArithmetic::Mul: { 2667 // GCC 4.8 does: 2668 // a=b*c ==> 2669 // t_acc =(mul) (b.lo * c.hi) 2670 // t_acc =(mla) (c.lo * b.hi) + t_acc 2671 // t.hi,t.lo =(umull) b.lo * c.lo 2672 // t.hi += t_acc 2673 // a.lo = t.lo 2674 // a.hi = t.hi 2675 // 2676 // LLVM does: 2677 // t.hi,t.lo =(umull) b.lo * c.lo 2678 // t.hi =(mla) (b.lo * c.hi) + t.hi 2679 // t.hi =(mla) (b.hi * c.lo) + t.hi 2680 // a.lo = t.lo 2681 // a.hi = t.hi 2682 // 2683 // LLVM's lowering has fewer instructions, but more register pressure: 2684 // t.lo is live from beginning to end, while GCC delays the two-dest 2685 // instruction till the end, and kills c.hi immediately. 2686 Variable *T_Acc = makeReg(IceType_i32); 2687 Variable *T_Acc1 = makeReg(IceType_i32); 2688 Variable *T_Hi1 = makeReg(IceType_i32); 2689 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this); 2690 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this); 2691 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this); 2692 Variable *Src1RHi = SrcsHi.unswappedSrc1R(this); 2693 _mul(T_Acc, Src0RLo, Src1RHi); 2694 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc); 2695 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo); 2696 _add(T_Hi, T_Hi1, T_Acc1); 2697 _mov(DestLo, T_Lo); 2698 _mov(DestHi, T_Hi); 2699 return; 2700 } 2701 case InstArithmetic::Shl: { 2702 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) { 2703 Variable *Src0RLo = SrcsLo.src0R(this); 2704 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway. 2705 const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F; 2706 if (ShAmtImm == 0) { 2707 _mov(DestLo, Src0RLo); 2708 _mov(DestHi, SrcsHi.src0R(this)); 2709 return; 2710 } 2711 2712 if (ShAmtImm >= 32) { 2713 if (ShAmtImm == 32) { 2714 _mov(DestHi, Src0RLo); 2715 } else { 2716 Operand *ShAmtOp = shAmtImm(ShAmtImm - 32); 2717 _lsl(T_Hi, Src0RLo, ShAmtOp); 2718 _mov(DestHi, T_Hi); 2719 } 2720 2721 Operand *_0 = 2722 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); 2723 _mov(T_Lo, _0); 2724 _mov(DestLo, T_Lo); 2725 return; 2726 } 2727 2728 Variable *Src0RHi = SrcsHi.src0R(this); 2729 Operand *ShAmtOp = shAmtImm(ShAmtImm); 2730 Operand *ComplShAmtOp = shAmtImm(32 - ShAmtImm); 2731 _lsl(T_Hi, Src0RHi, ShAmtOp); 2732 _orr(T_Hi, T_Hi, 2733 OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, 2734 OperandARM32::LSR, ComplShAmtOp)); 2735 _mov(DestHi, T_Hi); 2736 2737 _lsl(T_Lo, Src0RLo, ShAmtOp); 2738 _mov(DestLo, T_Lo); 2739 return; 2740 } 2741 2742 // a=b<<c ==> 2743 // pnacl-llc does: 2744 // mov t_b.lo, b.lo 2745 // mov t_b.hi, b.hi 2746 // mov t_c.lo, c.lo 2747 // rsb T0, t_c.lo, #32 2748 // lsr T1, t_b.lo, T0 2749 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo 2750 // sub T2, t_c.lo, #32 2751 // cmp T2, #0 2752 // lslge t_a.hi, t_b.lo, T2 2753 // lsl t_a.lo, t_b.lo, t_c.lo 2754 // mov a.lo, t_a.lo 2755 // mov a.hi, t_a.hi 2756 // 2757 // GCC 4.8 does: 2758 // sub t_c1, c.lo, #32 2759 // lsl t_hi, b.hi, c.lo 2760 // orr t_hi, t_hi, b.lo, lsl t_c1 2761 // rsb t_c2, c.lo, #32 2762 // orr t_hi, t_hi, b.lo, lsr t_c2 2763 // lsl t_lo, b.lo, c.lo 2764 // a.lo = t_lo 2765 // a.hi = t_hi 2766 // 2767 // These are incompatible, therefore we mimic pnacl-llc. 2768 // Can be strength-reduced for constant-shifts, but we don't do that for 2769 // now. 2770 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On 2771 // ARM, shifts only take the lower 8 bits of the shift register, and 2772 // saturate to the range 0-32, so the negative value will saturate to 32. 2773 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); 2774 Operand *_0 = 2775 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); 2776 Variable *T0 = makeReg(IceType_i32); 2777 Variable *T1 = makeReg(IceType_i32); 2778 Variable *T2 = makeReg(IceType_i32); 2779 Variable *TA_Hi = makeReg(IceType_i32); 2780 Variable *TA_Lo = makeReg(IceType_i32); 2781 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this); 2782 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this); 2783 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this); 2784 _rsb(T0, Src1RLo, _32); 2785 _lsr(T1, Src0RLo, T0); 2786 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, 2787 OperandARM32::LSL, Src1RLo)); 2788 _sub(T2, Src1RLo, _32); 2789 _cmp(T2, _0); 2790 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE); 2791 _set_dest_redefined(); 2792 _lsl(TA_Lo, Src0RLo, Src1RLo); 2793 _mov(DestLo, TA_Lo); 2794 _mov(DestHi, TA_Hi); 2795 return; 2796 } 2797 case InstArithmetic::Lshr: 2798 case InstArithmetic::Ashr: { 2799 const bool ASR = Op == InstArithmetic::Ashr; 2800 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) { 2801 Variable *Src0RHi = SrcsHi.src0R(this); 2802 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway. 2803 const int32_t ShAmt = SrcsLo.getConstantValue() & 0x3F; 2804 if (ShAmt == 0) { 2805 _mov(DestHi, Src0RHi); 2806 _mov(DestLo, SrcsLo.src0R(this)); 2807 return; 2808 } 2809 2810 if (ShAmt >= 32) { 2811 if (ShAmt == 32) { 2812 _mov(DestLo, Src0RHi); 2813 } else { 2814 Operand *ShAmtImm = shAmtImm(ShAmt - 32); 2815 if (ASR) { 2816 _asr(T_Lo, Src0RHi, ShAmtImm); 2817 } else { 2818 _lsr(T_Lo, Src0RHi, ShAmtImm); 2819 } 2820 _mov(DestLo, T_Lo); 2821 } 2822 2823 if (ASR) { 2824 Operand *_31 = shAmtImm(31); 2825 _asr(T_Hi, Src0RHi, _31); 2826 } else { 2827 Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32), 2828 Legal_Reg | Legal_Flex); 2829 _mov(T_Hi, _0); 2830 } 2831 _mov(DestHi, T_Hi); 2832 return; 2833 } 2834 2835 Variable *Src0RLo = SrcsLo.src0R(this); 2836 Operand *ShAmtImm = shAmtImm(ShAmt); 2837 Operand *ComplShAmtImm = shAmtImm(32 - ShAmt); 2838 _lsr(T_Lo, Src0RLo, ShAmtImm); 2839 _orr(T_Lo, T_Lo, 2840 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, 2841 OperandARM32::LSL, ComplShAmtImm)); 2842 _mov(DestLo, T_Lo); 2843 2844 if (ASR) { 2845 _asr(T_Hi, Src0RHi, ShAmtImm); 2846 } else { 2847 _lsr(T_Hi, Src0RHi, ShAmtImm); 2848 } 2849 _mov(DestHi, T_Hi); 2850 return; 2851 } 2852 2853 // a=b>>c 2854 // pnacl-llc does: 2855 // mov t_b.lo, b.lo 2856 // mov t_b.hi, b.hi 2857 // mov t_c.lo, c.lo 2858 // lsr T0, t_b.lo, t_c.lo 2859 // rsb T1, t_c.lo, #32 2860 // orr t_a.lo, T0, t_b.hi, lsl T1 2861 // sub T2, t_c.lo, #32 2862 // cmp T2, #0 2863 // [al]srge t_a.lo, t_b.hi, T2 2864 // [al]sr t_a.hi, t_b.hi, t_c.lo 2865 // mov a.lo, t_a.lo 2866 // mov a.hi, t_a.hi 2867 // 2868 // GCC 4.8 does (lsr): 2869 // rsb t_c1, c.lo, #32 2870 // lsr t_lo, b.lo, c.lo 2871 // orr t_lo, t_lo, b.hi, lsl t_c1 2872 // sub t_c2, c.lo, #32 2873 // orr t_lo, t_lo, b.hi, lsr t_c2 2874 // lsr t_hi, b.hi, c.lo 2875 // mov a.lo, t_lo 2876 // mov a.hi, t_hi 2877 // 2878 // These are incompatible, therefore we mimic pnacl-llc. 2879 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); 2880 Operand *_0 = 2881 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); 2882 Variable *T0 = makeReg(IceType_i32); 2883 Variable *T1 = makeReg(IceType_i32); 2884 Variable *T2 = makeReg(IceType_i32); 2885 Variable *TA_Lo = makeReg(IceType_i32); 2886 Variable *TA_Hi = makeReg(IceType_i32); 2887 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this); 2888 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this); 2889 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this); 2890 _lsr(T0, Src0RLo, Src1RLo); 2891 _rsb(T1, Src1RLo, _32); 2892 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, 2893 OperandARM32::LSL, T1)); 2894 _sub(T2, Src1RLo, _32); 2895 _cmp(T2, _0); 2896 if (ASR) { 2897 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE); 2898 _set_dest_redefined(); 2899 _asr(TA_Hi, Src0RHi, Src1RLo); 2900 } else { 2901 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE); 2902 _set_dest_redefined(); 2903 _lsr(TA_Hi, Src0RHi, Src1RLo); 2904 } 2905 _mov(DestLo, TA_Lo); 2906 _mov(DestHi, TA_Hi); 2907 return; 2908 } 2909 case InstArithmetic::Fadd: 2910 case InstArithmetic::Fsub: 2911 case InstArithmetic::Fmul: 2912 case InstArithmetic::Fdiv: 2913 case InstArithmetic::Frem: 2914 llvm::report_fatal_error("FP instruction with i64 type"); 2915 return; 2916 case InstArithmetic::Udiv: 2917 case InstArithmetic::Sdiv: 2918 case InstArithmetic::Urem: 2919 case InstArithmetic::Srem: 2920 llvm::report_fatal_error("Call-helper-involved instruction for i64 type " 2921 "should have already been handled before"); 2922 return; 2923 } 2924 } 2925 2926 namespace { 2927 // StrengthReduction is a namespace with the strength reduction machinery. The 2928 // entry point is the StrengthReduction::tryToOptimize method. It returns true 2929 // if the optimization can be performed, and false otherwise. 2930 // 2931 // If the optimization can be performed, tryToOptimize sets its NumOperations 2932 // parameter to the number of shifts that are needed to perform the 2933 // multiplication; and it sets the Operations parameter with <ShAmt, AddOrSub> 2934 // tuples that describe how to materialize the multiplication. 2935 // 2936 // The algorithm finds contiguous 1s in the Multiplication source, and uses one 2937 // or two shifts to materialize it. A sequence of 1s, e.g., 2938 // 2939 // M N 2940 // ...00000000000011111...111110000000... 2941 // 2942 // is materializable with (1 << (M + 1)) - (1 << N): 2943 // 2944 // ...00000000000100000...000000000000... [1 << (M + 1)] 2945 // ...00000000000000000...000010000000... (-) [1 << N] 2946 // -------------------------------------- 2947 // ...00000000000011111...111110000000... 2948 // 2949 // And a single bit set, which is just a left shift. 2950 namespace StrengthReduction { 2951 enum AggregationOperation { 2952 AO_Invalid, 2953 AO_Add, 2954 AO_Sub, 2955 }; 2956 2957 // AggregateElement is a glorified <ShAmt, AddOrSub> tuple. 2958 class AggregationElement { 2959 AggregationElement(const AggregationElement &) = delete; 2960 2961 public: 2962 AggregationElement() = default; 2963 AggregationElement &operator=(const AggregationElement &) = default; 2964 AggregationElement(AggregationOperation Op, uint32_t ShAmt) 2965 : Op(Op), ShAmt(ShAmt) {} 2966 2967 Operand *createShiftedOperand(Cfg *Func, Variable *OpR) const { 2968 assert(OpR->mustHaveReg()); 2969 if (ShAmt == 0) { 2970 return OpR; 2971 } 2972 return OperandARM32FlexReg::create( 2973 Func, IceType_i32, OpR, OperandARM32::LSL, 2974 OperandARM32ShAmtImm::create( 2975 Func, llvm::cast<ConstantInteger32>( 2976 Func->getContext()->getConstantInt32(ShAmt)))); 2977 } 2978 2979 bool aggregateWithAdd() const { 2980 switch (Op) { 2981 case AO_Invalid: 2982 llvm::report_fatal_error("Invalid Strength Reduction Operations."); 2983 case AO_Add: 2984 return true; 2985 case AO_Sub: 2986 return false; 2987 } 2988 llvm_unreachable("(silence g++ warning)"); 2989 } 2990 2991 uint32_t shAmt() const { return ShAmt; } 2992 2993 private: 2994 AggregationOperation Op = AO_Invalid; 2995 uint32_t ShAmt; 2996 }; 2997 2998 // [RangeStart, RangeEnd] is a range of 1s in Src. 2999 template <std::size_t N> 3000 bool addOperations(uint32_t RangeStart, uint32_t RangeEnd, SizeT *NumOperations, 3001 std::array<AggregationElement, N> *Operations) { 3002 assert(*NumOperations < N); 3003 if (RangeStart == RangeEnd) { 3004 // Single bit set: 3005 // Src : 0...00010... 3006 // RangeStart : ^ 3007 // RangeEnd : ^ 3008 // NegSrc : 0...00001... 3009 (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart); 3010 ++(*NumOperations); 3011 return true; 3012 } 3013 3014 // Sequence of 1s: (two operations required.) 3015 // Src : 0...00011...110... 3016 // RangeStart : ^ 3017 // RangeEnd : ^ 3018 // NegSrc : 0...00000...001... 3019 if (*NumOperations + 1 >= N) { 3020 return false; 3021 } 3022 (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart + 1); 3023 ++(*NumOperations); 3024 (*Operations)[*NumOperations] = AggregationElement(AO_Sub, RangeEnd); 3025 ++(*NumOperations); 3026 return true; 3027 } 3028 3029 // tryToOptmize scans Src looking for sequences of 1s (including the unitary bit 3030 // 1 surrounded by zeroes. 3031 template <std::size_t N> 3032 bool tryToOptimize(uint32_t Src, SizeT *NumOperations, 3033 std::array<AggregationElement, N> *Operations) { 3034 constexpr uint32_t SrcSizeBits = sizeof(Src) * CHAR_BIT; 3035 uint32_t NegSrc = ~Src; 3036 3037 *NumOperations = 0; 3038 while (Src != 0 && *NumOperations < N) { 3039 // Each step of the algorithm: 3040 // * finds L, the last bit set in Src; 3041 // * clears all the upper bits in NegSrc up to bit L; 3042 // * finds nL, the last bit set in NegSrc; 3043 // * clears all the upper bits in Src up to bit nL; 3044 // 3045 // if L == nL + 1, then a unitary 1 was found in Src. Otherwise, a sequence 3046 // of 1s starting at L, and ending at nL + 1, was found. 3047 const uint32_t SrcLastBitSet = llvm::findLastSet(Src); 3048 const uint32_t NegSrcClearMask = 3049 (SrcLastBitSet == 0) ? 0 3050 : (0xFFFFFFFFu) >> (SrcSizeBits - SrcLastBitSet); 3051 NegSrc &= NegSrcClearMask; 3052 if (NegSrc == 0) { 3053 if (addOperations(SrcLastBitSet, 0, NumOperations, Operations)) { 3054 return true; 3055 } 3056 return false; 3057 } 3058 const uint32_t NegSrcLastBitSet = llvm::findLastSet(NegSrc); 3059 assert(NegSrcLastBitSet < SrcLastBitSet); 3060 const uint32_t SrcClearMask = 3061 (NegSrcLastBitSet == 0) ? 0 : (0xFFFFFFFFu) >> 3062 (SrcSizeBits - NegSrcLastBitSet); 3063 Src &= SrcClearMask; 3064 if (!addOperations(SrcLastBitSet, NegSrcLastBitSet + 1, NumOperations, 3065 Operations)) { 3066 return false; 3067 } 3068 } 3069 3070 return Src == 0; 3071 } 3072 } // end of namespace StrengthReduction 3073 } // end of anonymous namespace 3074 3075 void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) { 3076 Variable *Dest = Instr->getDest(); 3077 3078 if (Dest->isRematerializable()) { 3079 Context.insert<InstFakeDef>(Dest); 3080 return; 3081 } 3082 3083 Type DestTy = Dest->getType(); 3084 if (DestTy == IceType_i1) { 3085 lowerInt1Arithmetic(Instr); 3086 return; 3087 } 3088 3089 Operand *Src0 = legalizeUndef(Instr->getSrc(0)); 3090 Operand *Src1 = legalizeUndef(Instr->getSrc(1)); 3091 if (DestTy == IceType_i64) { 3092 lowerInt64Arithmetic(Instr->getOp(), Instr->getDest(), Src0, Src1); 3093 return; 3094 } 3095 3096 if (isVectorType(DestTy)) { 3097 switch (Instr->getOp()) { 3098 default: 3099 UnimplementedLoweringError(this, Instr); 3100 return; 3101 // Explicitly whitelist vector instructions we have implemented/enabled. 3102 case InstArithmetic::Add: 3103 case InstArithmetic::And: 3104 case InstArithmetic::Ashr: 3105 case InstArithmetic::Fadd: 3106 case InstArithmetic::Fmul: 3107 case InstArithmetic::Fsub: 3108 case InstArithmetic::Lshr: 3109 case InstArithmetic::Mul: 3110 case InstArithmetic::Or: 3111 case InstArithmetic::Shl: 3112 case InstArithmetic::Sub: 3113 case InstArithmetic::Xor: 3114 break; 3115 } 3116 } 3117 3118 Variable *T = makeReg(DestTy); 3119 3120 // * Handle div/rem separately. They require a non-legalized Src1 to inspect 3121 // whether or not Src1 is a non-zero constant. Once legalized it is more 3122 // difficult to determine (constant may be moved to a register). 3123 // * Handle floating point arithmetic separately: they require Src1 to be 3124 // legalized to a register. 3125 switch (Instr->getOp()) { 3126 default: 3127 break; 3128 case InstArithmetic::Udiv: { 3129 constexpr bool NotRemainder = false; 3130 Variable *Src0R = legalizeToReg(Src0); 3131 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, 3132 NotRemainder); 3133 return; 3134 } 3135 case InstArithmetic::Sdiv: { 3136 constexpr bool NotRemainder = false; 3137 Variable *Src0R = legalizeToReg(Src0); 3138 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv, 3139 NotRemainder); 3140 return; 3141 } 3142 case InstArithmetic::Urem: { 3143 constexpr bool IsRemainder = true; 3144 Variable *Src0R = legalizeToReg(Src0); 3145 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, 3146 IsRemainder); 3147 return; 3148 } 3149 case InstArithmetic::Srem: { 3150 constexpr bool IsRemainder = true; 3151 Variable *Src0R = legalizeToReg(Src0); 3152 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv, 3153 IsRemainder); 3154 return; 3155 } 3156 case InstArithmetic::Frem: { 3157 if (!isScalarFloatingType(DestTy)) { 3158 llvm::report_fatal_error("Unexpected type when lowering frem."); 3159 } 3160 llvm::report_fatal_error("Frem should have already been lowered."); 3161 } 3162 case InstArithmetic::Fadd: { 3163 Variable *Src0R = legalizeToReg(Src0); 3164 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) { 3165 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0)); 3166 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1)); 3167 _vmla(Src0R, Src1R, Src2R); 3168 _mov(Dest, Src0R); 3169 return; 3170 } 3171 3172 Variable *Src1R = legalizeToReg(Src1); 3173 _vadd(T, Src0R, Src1R); 3174 _mov(Dest, T); 3175 return; 3176 } 3177 case InstArithmetic::Fsub: { 3178 Variable *Src0R = legalizeToReg(Src0); 3179 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) { 3180 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0)); 3181 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1)); 3182 _vmls(Src0R, Src1R, Src2R); 3183 _mov(Dest, Src0R); 3184 return; 3185 } 3186 Variable *Src1R = legalizeToReg(Src1); 3187 _vsub(T, Src0R, Src1R); 3188 _mov(Dest, T); 3189 return; 3190 } 3191 case InstArithmetic::Fmul: { 3192 Variable *Src0R = legalizeToReg(Src0); 3193 Variable *Src1R = legalizeToReg(Src1); 3194 _vmul(T, Src0R, Src1R); 3195 _mov(Dest, T); 3196 return; 3197 } 3198 case InstArithmetic::Fdiv: { 3199 Variable *Src0R = legalizeToReg(Src0); 3200 Variable *Src1R = legalizeToReg(Src1); 3201 _vdiv(T, Src0R, Src1R); 3202 _mov(Dest, T); 3203 return; 3204 } 3205 } 3206 3207 // Handle everything else here. 3208 Int32Operands Srcs(Src0, Src1); 3209 switch (Instr->getOp()) { 3210 case InstArithmetic::_num: 3211 llvm::report_fatal_error("Unknown arithmetic operator"); 3212 return; 3213 case InstArithmetic::Add: { 3214 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) { 3215 assert(!isVectorType(DestTy)); 3216 Variable *Src0R = legalizeToReg(Src0); 3217 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0)); 3218 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1)); 3219 _mla(T, Src1R, Src2R, Src0R); 3220 _mov(Dest, T); 3221 return; 3222 } 3223 3224 if (Srcs.hasConstOperand()) { 3225 if (!Srcs.immediateIsFlexEncodable() && 3226 Srcs.negatedImmediateIsFlexEncodable()) { 3227 assert(!isVectorType(DestTy)); 3228 Variable *Src0R = Srcs.src0R(this); 3229 Operand *Src1F = Srcs.negatedSrc1F(this); 3230 if (!Srcs.swappedOperands()) { 3231 _sub(T, Src0R, Src1F); 3232 } else { 3233 _rsb(T, Src0R, Src1F); 3234 } 3235 _mov(Dest, T); 3236 return; 3237 } 3238 } 3239 Variable *Src0R = Srcs.src0R(this); 3240 if (isVectorType(DestTy)) { 3241 Variable *Src1R = legalizeToReg(Src1); 3242 _vadd(T, Src0R, Src1R); 3243 } else { 3244 Operand *Src1RF = Srcs.src1RF(this); 3245 _add(T, Src0R, Src1RF); 3246 } 3247 _mov(Dest, T); 3248 return; 3249 } 3250 case InstArithmetic::And: { 3251 if (Srcs.hasConstOperand()) { 3252 if (!Srcs.immediateIsFlexEncodable() && 3253 Srcs.invertedImmediateIsFlexEncodable()) { 3254 Variable *Src0R = Srcs.src0R(this); 3255 Operand *Src1F = Srcs.invertedSrc1F(this); 3256 _bic(T, Src0R, Src1F); 3257 _mov(Dest, T); 3258 return; 3259 } 3260 } 3261 assert(isIntegerType(DestTy)); 3262 Variable *Src0R = Srcs.src0R(this); 3263 if (isVectorType(DestTy)) { 3264 Variable *Src1R = legalizeToReg(Src1); 3265 _vand(T, Src0R, Src1R); 3266 } else { 3267 Operand *Src1RF = Srcs.src1RF(this); 3268 _and(T, Src0R, Src1RF); 3269 } 3270 _mov(Dest, T); 3271 return; 3272 } 3273 case InstArithmetic::Or: { 3274 Variable *Src0R = Srcs.src0R(this); 3275 assert(isIntegerType(DestTy)); 3276 if (isVectorType(DestTy)) { 3277 Variable *Src1R = legalizeToReg(Src1); 3278 _vorr(T, Src0R, Src1R); 3279 } else { 3280 Operand *Src1RF = Srcs.src1RF(this); 3281 _orr(T, Src0R, Src1RF); 3282 } 3283 _mov(Dest, T); 3284 return; 3285 } 3286 case InstArithmetic::Xor: { 3287 Variable *Src0R = Srcs.src0R(this); 3288 assert(isIntegerType(DestTy)); 3289 if (isVectorType(DestTy)) { 3290 Variable *Src1R = legalizeToReg(Src1); 3291 _veor(T, Src0R, Src1R); 3292 } else { 3293 Operand *Src1RF = Srcs.src1RF(this); 3294 _eor(T, Src0R, Src1RF); 3295 } 3296 _mov(Dest, T); 3297 return; 3298 } 3299 case InstArithmetic::Sub: { 3300 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) { 3301 assert(!isVectorType(DestTy)); 3302 Variable *Src0R = legalizeToReg(Src0); 3303 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0)); 3304 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1)); 3305 _mls(T, Src1R, Src2R, Src0R); 3306 _mov(Dest, T); 3307 return; 3308 } 3309 3310 if (Srcs.hasConstOperand()) { 3311 assert(!isVectorType(DestTy)); 3312 if (Srcs.immediateIsFlexEncodable()) { 3313 Variable *Src0R = Srcs.src0R(this); 3314 Operand *Src1RF = Srcs.src1RF(this); 3315 if (Srcs.swappedOperands()) { 3316 _rsb(T, Src0R, Src1RF); 3317 } else { 3318 _sub(T, Src0R, Src1RF); 3319 } 3320 _mov(Dest, T); 3321 return; 3322 } 3323 if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) { 3324 Variable *Src0R = Srcs.src0R(this); 3325 Operand *Src1F = Srcs.negatedSrc1F(this); 3326 _add(T, Src0R, Src1F); 3327 _mov(Dest, T); 3328 return; 3329 } 3330 } 3331 Variable *Src0R = Srcs.unswappedSrc0R(this); 3332 Variable *Src1R = Srcs.unswappedSrc1R(this); 3333 if (isVectorType(DestTy)) { 3334 _vsub(T, Src0R, Src1R); 3335 } else { 3336 _sub(T, Src0R, Src1R); 3337 } 3338 _mov(Dest, T); 3339 return; 3340 } 3341 case InstArithmetic::Mul: { 3342 const bool OptM1 = Func->getOptLevel() == Opt_m1; 3343 if (!OptM1 && Srcs.hasConstOperand()) { 3344 constexpr std::size_t MaxShifts = 4; 3345 std::array<StrengthReduction::AggregationElement, MaxShifts> Shifts; 3346 SizeT NumOperations; 3347 int32_t Const = Srcs.getConstantValue(); 3348 const bool Invert = Const < 0; 3349 const bool MultiplyByZero = Const == 0; 3350 Operand *_0 = 3351 legalize(Ctx->getConstantZero(DestTy), Legal_Reg | Legal_Flex); 3352 3353 if (MultiplyByZero) { 3354 _mov(T, _0); 3355 _mov(Dest, T); 3356 return; 3357 } 3358 3359 if (Invert) { 3360 Const = -Const; 3361 } 3362 3363 if (StrengthReduction::tryToOptimize(Const, &NumOperations, &Shifts)) { 3364 assert(NumOperations >= 1); 3365 Variable *Src0R = Srcs.src0R(this); 3366 int32_t Start; 3367 int32_t End; 3368 if (NumOperations == 1 || Shifts[NumOperations - 1].shAmt() != 0) { 3369 // Multiplication by a power of 2 (NumOperations == 1); or 3370 // Multiplication by a even number not a power of 2. 3371 Start = 1; 3372 End = NumOperations; 3373 assert(Shifts[0].aggregateWithAdd()); 3374 _lsl(T, Src0R, shAmtImm(Shifts[0].shAmt())); 3375 } else { 3376 // Multiplication by an odd number. Put the free barrel shifter to a 3377 // good use. 3378 Start = 0; 3379 End = NumOperations - 2; 3380 const StrengthReduction::AggregationElement &Last = 3381 Shifts[NumOperations - 1]; 3382 const StrengthReduction::AggregationElement &SecondToLast = 3383 Shifts[NumOperations - 2]; 3384 if (!Last.aggregateWithAdd()) { 3385 assert(SecondToLast.aggregateWithAdd()); 3386 _rsb(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R)); 3387 } else if (!SecondToLast.aggregateWithAdd()) { 3388 assert(Last.aggregateWithAdd()); 3389 _sub(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R)); 3390 } else { 3391 _add(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R)); 3392 } 3393 } 3394 3395 // Odd numbers : S E I I 3396 // +---+---+---+---+---+---+ ... +---+---+---+---+ 3397 // Shifts = | | | | | | | ... | | | | | 3398 // +---+---+---+---+---+---+ ... +---+---+---+---+ 3399 // Even numbers: I S E 3400 // 3401 // S: Start; E: End; I: Init 3402 for (int32_t I = Start; I < End; ++I) { 3403 const StrengthReduction::AggregationElement &Current = Shifts[I]; 3404 Operand *SrcF = Current.createShiftedOperand(Func, Src0R); 3405 if (Current.aggregateWithAdd()) { 3406 _add(T, T, SrcF); 3407 } else { 3408 _sub(T, T, SrcF); 3409 } 3410 } 3411 3412 if (Invert) { 3413 // T = 0 - T. 3414 _rsb(T, T, _0); 3415 } 3416 3417 _mov(Dest, T); 3418 return; 3419 } 3420 } 3421 Variable *Src0R = Srcs.unswappedSrc0R(this); 3422 Variable *Src1R = Srcs.unswappedSrc1R(this); 3423 if (isVectorType(DestTy)) { 3424 _vmul(T, Src0R, Src1R); 3425 } else { 3426 _mul(T, Src0R, Src1R); 3427 } 3428 _mov(Dest, T); 3429 return; 3430 } 3431 case InstArithmetic::Shl: { 3432 Variable *Src0R = Srcs.unswappedSrc0R(this); 3433 if (!isVectorType(T->getType())) { 3434 if (Srcs.isSrc1ImmediateZero()) { 3435 _mov(T, Src0R); 3436 } else { 3437 Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this); 3438 _lsl(T, Src0R, Src1R); 3439 } 3440 } else { 3441 if (Srcs.hasConstOperand()) { 3442 ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1()); 3443 _vshl(T, Src0R, ShAmt); 3444 } else { 3445 auto *Src1R = Srcs.unswappedSrc1R(this); 3446 _vshl(T, Src0R, Src1R)->setSignType(InstARM32::FS_Unsigned); 3447 } 3448 } 3449 _mov(Dest, T); 3450 return; 3451 } 3452 case InstArithmetic::Lshr: { 3453 Variable *Src0R = Srcs.unswappedSrc0R(this); 3454 if (!isVectorType(T->getType())) { 3455 if (DestTy != IceType_i32) { 3456 _uxt(Src0R, Src0R); 3457 } 3458 if (Srcs.isSrc1ImmediateZero()) { 3459 _mov(T, Src0R); 3460 } else { 3461 Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this); 3462 _lsr(T, Src0R, Src1R); 3463 } 3464 } else { 3465 if (Srcs.hasConstOperand()) { 3466 ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1()); 3467 _vshr(T, Src0R, ShAmt)->setSignType(InstARM32::FS_Unsigned); 3468 } else { 3469 auto *Src1R = Srcs.unswappedSrc1R(this); 3470 auto *Src1RNeg = makeReg(Src1R->getType()); 3471 _vneg(Src1RNeg, Src1R); 3472 _vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Unsigned); 3473 } 3474 } 3475 _mov(Dest, T); 3476 return; 3477 } 3478 case InstArithmetic::Ashr: { 3479 Variable *Src0R = Srcs.unswappedSrc0R(this); 3480 if (!isVectorType(T->getType())) { 3481 if (DestTy != IceType_i32) { 3482 _sxt(Src0R, Src0R); 3483 } 3484 if (Srcs.isSrc1ImmediateZero()) { 3485 _mov(T, Src0R); 3486 } else { 3487 _asr(T, Src0R, Srcs.unswappedSrc1RShAmtImm(this)); 3488 } 3489 } else { 3490 if (Srcs.hasConstOperand()) { 3491 ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1()); 3492 _vshr(T, Src0R, ShAmt)->setSignType(InstARM32::FS_Signed); 3493 } else { 3494 auto *Src1R = Srcs.unswappedSrc1R(this); 3495 auto *Src1RNeg = makeReg(Src1R->getType()); 3496 _vneg(Src1RNeg, Src1R); 3497 _vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Signed); 3498 } 3499 } 3500 _mov(Dest, T); 3501 return; 3502 } 3503 case InstArithmetic::Udiv: 3504 case InstArithmetic::Sdiv: 3505 case InstArithmetic::Urem: 3506 case InstArithmetic::Srem: 3507 llvm::report_fatal_error( 3508 "Integer div/rem should have been handled earlier."); 3509 return; 3510 case InstArithmetic::Fadd: 3511 case InstArithmetic::Fsub: 3512 case InstArithmetic::Fmul: 3513 case InstArithmetic::Fdiv: 3514 case InstArithmetic::Frem: 3515 llvm::report_fatal_error( 3516 "Floating point arith should have been handled earlier."); 3517 return; 3518 } 3519 } 3520 3521 void TargetARM32::lowerAssign(const InstAssign *Instr) { 3522 Variable *Dest = Instr->getDest(); 3523 3524 if (Dest->isRematerializable()) { 3525 Context.insert<InstFakeDef>(Dest); 3526 return; 3527 } 3528 3529 Operand *Src0 = Instr->getSrc(0); 3530 assert(Dest->getType() == Src0->getType()); 3531 if (Dest->getType() == IceType_i64) { 3532 Src0 = legalizeUndef(Src0); 3533 3534 Variable *T_Lo = makeReg(IceType_i32); 3535 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3536 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); 3537 _mov(T_Lo, Src0Lo); 3538 _mov(DestLo, T_Lo); 3539 3540 Variable *T_Hi = makeReg(IceType_i32); 3541 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3542 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); 3543 _mov(T_Hi, Src0Hi); 3544 _mov(DestHi, T_Hi); 3545 3546 return; 3547 } 3548 3549 Operand *NewSrc; 3550 if (Dest->hasReg()) { 3551 // If Dest already has a physical register, then legalize the Src operand 3552 // into a Variable with the same register assignment. This especially 3553 // helps allow the use of Flex operands. 3554 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); 3555 } else { 3556 // Dest could be a stack operand. Since we could potentially need to do a 3557 // Store (and store can only have Register operands), legalize this to a 3558 // register. 3559 NewSrc = legalize(Src0, Legal_Reg); 3560 } 3561 3562 if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) { 3563 NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem); 3564 } 3565 _mov(Dest, NewSrc); 3566 } 3567 3568 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( 3569 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, 3570 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) { 3571 InstARM32Label *NewShortCircuitLabel = nullptr; 3572 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); 3573 3574 const Inst *Producer = Computations.getProducerOf(Boolean); 3575 3576 if (Producer == nullptr) { 3577 // No producer, no problem: just do emit code to perform (Boolean & 1) and 3578 // set the flags register. The branch should be taken if the resulting flags 3579 // indicate a non-zero result. 3580 _tst(legalizeToReg(Boolean), _1); 3581 return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE)); 3582 } 3583 3584 switch (Producer->getKind()) { 3585 default: 3586 llvm::report_fatal_error("Unexpected producer."); 3587 case Inst::Icmp: { 3588 return ShortCircuitCondAndLabel( 3589 lowerIcmpCond(llvm::cast<InstIcmp>(Producer))); 3590 } break; 3591 case Inst::Fcmp: { 3592 return ShortCircuitCondAndLabel( 3593 lowerFcmpCond(llvm::cast<InstFcmp>(Producer))); 3594 } break; 3595 case Inst::Cast: { 3596 const auto *CastProducer = llvm::cast<InstCast>(Producer); 3597 assert(CastProducer->getCastKind() == InstCast::Trunc); 3598 Operand *Src = CastProducer->getSrc(0); 3599 if (Src->getType() == IceType_i64) 3600 Src = loOperand(Src); 3601 _tst(legalizeToReg(Src), _1); 3602 return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE)); 3603 } break; 3604 case Inst::Arithmetic: { 3605 const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer); 3606 switch (ArithProducer->getOp()) { 3607 default: 3608 llvm::report_fatal_error("Unhandled Arithmetic Producer."); 3609 case InstArithmetic::And: { 3610 if (!(ShortCircuitable & SC_And)) { 3611 NewShortCircuitLabel = InstARM32Label::create(Func, this); 3612 } 3613 3614 LowerInt1BranchTarget NewTarget = 3615 TargetFalse.createForLabelOrDuplicate(NewShortCircuitLabel); 3616 3617 ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch( 3618 Producer->getSrc(0), TargetTrue, NewTarget, SC_And); 3619 const CondWhenTrue &Cond = CondAndLabel.Cond; 3620 3621 _br_short_circuit(NewTarget, Cond.invert()); 3622 3623 InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget; 3624 if (ShortCircuitLabel != nullptr) 3625 Context.insert(ShortCircuitLabel); 3626 3627 return ShortCircuitCondAndLabel( 3628 lowerInt1ForBranch(Producer->getSrc(1), TargetTrue, NewTarget, SC_All) 3629 .assertNoLabelAndReturnCond(), 3630 NewShortCircuitLabel); 3631 } break; 3632 case InstArithmetic::Or: { 3633 if (!(ShortCircuitable & SC_Or)) { 3634 NewShortCircuitLabel = InstARM32Label::create(Func, this); 3635 } 3636 3637 LowerInt1BranchTarget NewTarget = 3638 TargetTrue.createForLabelOrDuplicate(NewShortCircuitLabel); 3639 3640 ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch( 3641 Producer->getSrc(0), NewTarget, TargetFalse, SC_Or); 3642 const CondWhenTrue &Cond = CondAndLabel.Cond; 3643 3644 _br_short_circuit(NewTarget, Cond); 3645 3646 InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget; 3647 if (ShortCircuitLabel != nullptr) 3648 Context.insert(ShortCircuitLabel); 3649 3650 return ShortCircuitCondAndLabel(lowerInt1ForBranch(Producer->getSrc(1), 3651 NewTarget, TargetFalse, 3652 SC_All) 3653 .assertNoLabelAndReturnCond(), 3654 NewShortCircuitLabel); 3655 } break; 3656 } 3657 } 3658 } 3659 } 3660 3661 void TargetARM32::lowerBr(const InstBr *Instr) { 3662 if (Instr->isUnconditional()) { 3663 _br(Instr->getTargetUnconditional()); 3664 return; 3665 } 3666 3667 CfgNode *TargetTrue = Instr->getTargetTrue(); 3668 CfgNode *TargetFalse = Instr->getTargetFalse(); 3669 ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch( 3670 Instr->getCondition(), LowerInt1BranchTarget(TargetTrue), 3671 LowerInt1BranchTarget(TargetFalse), SC_All); 3672 assert(CondAndLabel.ShortCircuitTarget == nullptr); 3673 3674 const CondWhenTrue &Cond = CondAndLabel.Cond; 3675 if (Cond.WhenTrue1 != CondARM32::kNone) { 3676 assert(Cond.WhenTrue0 != CondARM32::AL); 3677 _br(TargetTrue, Cond.WhenTrue1); 3678 } 3679 3680 switch (Cond.WhenTrue0) { 3681 default: 3682 _br(TargetTrue, TargetFalse, Cond.WhenTrue0); 3683 break; 3684 case CondARM32::kNone: 3685 _br(TargetFalse); 3686 break; 3687 case CondARM32::AL: 3688 _br(TargetTrue); 3689 break; 3690 } 3691 } 3692 3693 void TargetARM32::lowerCall(const InstCall *Instr) { 3694 Operand *CallTarget = Instr->getCallTarget(); 3695 if (Instr->isTargetHelperCall()) { 3696 auto TargetHelperPreamble = ARM32HelpersPreamble.find(CallTarget); 3697 if (TargetHelperPreamble != ARM32HelpersPreamble.end()) { 3698 (this->*TargetHelperPreamble->second)(Instr); 3699 } 3700 } 3701 MaybeLeafFunc = false; 3702 NeedsStackAlignment = true; 3703 3704 // Assign arguments to registers and stack. Also reserve stack. 3705 TargetARM32::CallingConv CC; 3706 // Pair of Arg Operand -> GPR number assignments. 3707 llvm::SmallVector<std::pair<Operand *, RegNumT>, NumGPRArgs> GPRArgs; 3708 llvm::SmallVector<std::pair<Operand *, RegNumT>, NumFP32Args> FPArgs; 3709 // Pair of Arg Operand -> stack offset. 3710 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs; 3711 size_t ParameterAreaSizeBytes = 0; 3712 3713 // Classify each argument operand according to the location where the 3714 // argument is passed. 3715 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { 3716 Operand *Arg = legalizeUndef(Instr->getArg(i)); 3717 const Type Ty = Arg->getType(); 3718 bool InReg = false; 3719 RegNumT Reg; 3720 if (isScalarIntegerType(Ty)) { 3721 InReg = CC.argInGPR(Ty, &Reg); 3722 } else { 3723 InReg = CC.argInVFP(Ty, &Reg); 3724 } 3725 3726 if (!InReg) { 3727 ParameterAreaSizeBytes = 3728 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty); 3729 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes)); 3730 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty); 3731 continue; 3732 } 3733 3734 if (Ty == IceType_i64) { 3735 Operand *Lo = loOperand(Arg); 3736 Operand *Hi = hiOperand(Arg); 3737 GPRArgs.push_back(std::make_pair( 3738 Lo, RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Reg)))); 3739 GPRArgs.push_back(std::make_pair( 3740 Hi, RegNumT::fixme(RegARM32::getI64PairSecondGPRNum(Reg)))); 3741 } else if (isScalarIntegerType(Ty)) { 3742 GPRArgs.push_back(std::make_pair(Arg, Reg)); 3743 } else { 3744 FPArgs.push_back(std::make_pair(Arg, Reg)); 3745 } 3746 } 3747 3748 // Adjust the parameter area so that the stack is aligned. It is assumed that 3749 // the stack is already aligned at the start of the calling sequence. 3750 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); 3751 3752 if (ParameterAreaSizeBytes > MaxOutArgsSizeBytes) { 3753 llvm::report_fatal_error("MaxOutArgsSizeBytes is not really a max."); 3754 } 3755 3756 // Copy arguments that are passed on the stack to the appropriate stack 3757 // locations. 3758 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 3759 for (auto &StackArg : StackArgs) { 3760 ConstantInteger32 *Loc = 3761 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second)); 3762 Type Ty = StackArg.first->getType(); 3763 OperandARM32Mem *Addr; 3764 constexpr bool SignExt = false; 3765 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) { 3766 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc); 3767 } else { 3768 Variable *NewBase = Func->makeVariable(SP->getType()); 3769 lowerArithmetic( 3770 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc)); 3771 Addr = formMemoryOperand(NewBase, Ty); 3772 } 3773 lowerStore(InstStore::create(Func, StackArg.first, Addr)); 3774 } 3775 3776 // Generate the call instruction. Assign its result to a temporary with high 3777 // register allocation weight. 3778 Variable *Dest = Instr->getDest(); 3779 // ReturnReg doubles as ReturnRegLo as necessary. 3780 Variable *ReturnReg = nullptr; 3781 Variable *ReturnRegHi = nullptr; 3782 if (Dest) { 3783 switch (Dest->getType()) { 3784 case IceType_NUM: 3785 llvm::report_fatal_error("Invalid Call dest type"); 3786 break; 3787 case IceType_void: 3788 break; 3789 case IceType_i1: 3790 assert(Computations.getProducerOf(Dest) == nullptr); 3791 // Fall-through intended. 3792 case IceType_i8: 3793 case IceType_i16: 3794 case IceType_i32: 3795 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0); 3796 break; 3797 case IceType_i64: 3798 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0); 3799 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1); 3800 break; 3801 case IceType_f32: 3802 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_s0); 3803 break; 3804 case IceType_f64: 3805 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_d0); 3806 break; 3807 case IceType_v4i1: 3808 case IceType_v8i1: 3809 case IceType_v16i1: 3810 case IceType_v16i8: 3811 case IceType_v8i16: 3812 case IceType_v4i32: 3813 case IceType_v4f32: 3814 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0); 3815 break; 3816 } 3817 } 3818 3819 // Allow ConstantRelocatable to be left alone as a direct call, but force 3820 // other constants like ConstantInteger32 to be in a register and make it an 3821 // indirect call. 3822 if (!llvm::isa<ConstantRelocatable>(CallTarget)) { 3823 CallTarget = legalize(CallTarget, Legal_Reg); 3824 } 3825 3826 // Copy arguments to be passed in registers to the appropriate registers. 3827 CfgVector<Variable *> RegArgs; 3828 for (auto &FPArg : FPArgs) { 3829 RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second)); 3830 } 3831 for (auto &GPRArg : GPRArgs) { 3832 RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second)); 3833 } 3834 3835 // Generate a FakeUse of register arguments so that they do not get dead code 3836 // eliminated as a result of the FakeKill of scratch registers after the call. 3837 // These fake-uses need to be placed here to avoid argument registers from 3838 // being used during the legalizeToReg() calls above. 3839 for (auto *RegArg : RegArgs) { 3840 Context.insert<InstFakeUse>(RegArg); 3841 } 3842 3843 InstARM32Call *NewCall = 3844 Sandboxer(this, InstBundleLock::Opt_AlignToEnd).bl(ReturnReg, CallTarget); 3845 3846 if (ReturnRegHi) 3847 Context.insert<InstFakeDef>(ReturnRegHi); 3848 3849 // Insert a register-kill pseudo instruction. 3850 Context.insert<InstFakeKill>(NewCall); 3851 3852 // Generate a FakeUse to keep the call live if necessary. 3853 if (Instr->hasSideEffects() && ReturnReg) { 3854 Context.insert<InstFakeUse>(ReturnReg); 3855 } 3856 3857 if (Dest != nullptr) { 3858 // Assign the result of the call to Dest. 3859 if (ReturnReg != nullptr) { 3860 if (ReturnRegHi) { 3861 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); 3862 Variable *DestLo = Dest64On32->getLo(); 3863 Variable *DestHi = Dest64On32->getHi(); 3864 _mov(DestLo, ReturnReg); 3865 _mov(DestHi, ReturnRegHi); 3866 } else { 3867 if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) { 3868 _mov(Dest, ReturnReg); 3869 } else { 3870 assert(isIntegerType(Dest->getType()) && 3871 typeWidthInBytes(Dest->getType()) <= 4); 3872 _mov(Dest, ReturnReg); 3873 } 3874 } 3875 } 3876 } 3877 3878 if (Instr->isTargetHelperCall()) { 3879 auto TargetHelpersPostamble = ARM32HelpersPostamble.find(CallTarget); 3880 if (TargetHelpersPostamble != ARM32HelpersPostamble.end()) { 3881 (this->*TargetHelpersPostamble->second)(Instr); 3882 } 3883 } 3884 } 3885 3886 namespace { 3887 void configureBitcastTemporary(Variable64On32 *Var) { 3888 Var->setMustNotHaveReg(); 3889 Var->getHi()->setMustHaveReg(); 3890 Var->getLo()->setMustHaveReg(); 3891 } 3892 } // end of anonymous namespace 3893 3894 void TargetARM32::lowerCast(const InstCast *Instr) { 3895 InstCast::OpKind CastKind = Instr->getCastKind(); 3896 Variable *Dest = Instr->getDest(); 3897 const Type DestTy = Dest->getType(); 3898 Operand *Src0 = legalizeUndef(Instr->getSrc(0)); 3899 switch (CastKind) { 3900 default: 3901 Func->setError("Cast type not supported"); 3902 return; 3903 case InstCast::Sext: { 3904 if (isVectorType(DestTy)) { 3905 Variable *T0 = makeReg(DestTy); 3906 Variable *T1 = makeReg(DestTy); 3907 ConstantInteger32 *ShAmt = nullptr; 3908 switch (DestTy) { 3909 default: 3910 llvm::report_fatal_error("Unexpected type in vector sext."); 3911 case IceType_v16i8: 3912 ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(7)); 3913 break; 3914 case IceType_v8i16: 3915 ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(15)); 3916 break; 3917 case IceType_v4i32: 3918 ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(31)); 3919 break; 3920 } 3921 auto *Src0R = legalizeToReg(Src0); 3922 _vshl(T0, Src0R, ShAmt); 3923 _vshr(T1, T0, ShAmt)->setSignType(InstARM32::FS_Signed); 3924 _mov(Dest, T1); 3925 } else if (DestTy == IceType_i64) { 3926 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2 3927 Constant *ShiftAmt = Ctx->getConstantInt32(31); 3928 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3929 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3930 Variable *T_Lo = makeReg(DestLo->getType()); 3931 if (Src0->getType() == IceType_i32) { 3932 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); 3933 _mov(T_Lo, Src0RF); 3934 } else if (Src0->getType() != IceType_i1) { 3935 Variable *Src0R = legalizeToReg(Src0); 3936 _sxt(T_Lo, Src0R); 3937 } else { 3938 Operand *_0 = Ctx->getConstantZero(IceType_i32); 3939 Operand *_m1 = Ctx->getConstantInt32(-1); 3940 lowerInt1ForSelect(T_Lo, Src0, _m1, _0); 3941 } 3942 _mov(DestLo, T_Lo); 3943 Variable *T_Hi = makeReg(DestHi->getType()); 3944 if (Src0->getType() != IceType_i1) { 3945 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo, 3946 OperandARM32::ASR, ShiftAmt)); 3947 } else { 3948 // For i1, the asr instruction is already done above. 3949 _mov(T_Hi, T_Lo); 3950 } 3951 _mov(DestHi, T_Hi); 3952 } else if (Src0->getType() != IceType_i1) { 3953 // t1 = sxt src; dst = t1 3954 Variable *Src0R = legalizeToReg(Src0); 3955 Variable *T = makeReg(DestTy); 3956 _sxt(T, Src0R); 3957 _mov(Dest, T); 3958 } else { 3959 Constant *_0 = Ctx->getConstantZero(IceType_i32); 3960 Operand *_m1 = Ctx->getConstantInt(DestTy, -1); 3961 Variable *T = makeReg(DestTy); 3962 lowerInt1ForSelect(T, Src0, _m1, _0); 3963 _mov(Dest, T); 3964 } 3965 break; 3966 } 3967 case InstCast::Zext: { 3968 if (isVectorType(DestTy)) { 3969 auto *Mask = makeReg(DestTy); 3970 auto *_1 = Ctx->getConstantInt32(1); 3971 auto *T = makeReg(DestTy); 3972 auto *Src0R = legalizeToReg(Src0); 3973 _mov(Mask, _1); 3974 _vand(T, Src0R, Mask); 3975 _mov(Dest, T); 3976 } else if (DestTy == IceType_i64) { 3977 // t1=uxtb src; dst.lo=t1; dst.hi=0 3978 Operand *_0 = 3979 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); 3980 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3981 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3982 Variable *T_Lo = makeReg(DestLo->getType()); 3983 3984 switch (Src0->getType()) { 3985 default: { 3986 assert(Src0->getType() != IceType_i64); 3987 _uxt(T_Lo, legalizeToReg(Src0)); 3988 } break; 3989 case IceType_i32: { 3990 _mov(T_Lo, legalize(Src0, Legal_Reg | Legal_Flex)); 3991 } break; 3992 case IceType_i1: { 3993 SafeBoolChain Safe = lowerInt1(T_Lo, Src0); 3994 if (Safe == SBC_No) { 3995 Operand *_1 = 3996 legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); 3997 _and(T_Lo, T_Lo, _1); 3998 } 3999 } break; 4000 } 4001 4002 _mov(DestLo, T_Lo); 4003 4004 Variable *T_Hi = makeReg(DestLo->getType()); 4005 _mov(T_Hi, _0); 4006 _mov(DestHi, T_Hi); 4007 } else if (Src0->getType() == IceType_i1) { 4008 Variable *T = makeReg(DestTy); 4009 4010 SafeBoolChain Safe = lowerInt1(T, Src0); 4011 if (Safe == SBC_No) { 4012 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); 4013 _and(T, T, _1); 4014 } 4015 4016 _mov(Dest, T); 4017 } else { 4018 // t1 = uxt src; dst = t1 4019 Variable *Src0R = legalizeToReg(Src0); 4020 Variable *T = makeReg(DestTy); 4021 _uxt(T, Src0R); 4022 _mov(Dest, T); 4023 } 4024 break; 4025 } 4026 case InstCast::Trunc: { 4027 if (isVectorType(DestTy)) { 4028 auto *T = makeReg(DestTy); 4029 auto *Src0R = legalizeToReg(Src0); 4030 _mov(T, Src0R); 4031 _mov(Dest, T); 4032 } else { 4033 if (Src0->getType() == IceType_i64) 4034 Src0 = loOperand(Src0); 4035 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); 4036 // t1 = trunc Src0RF; Dest = t1 4037 Variable *T = makeReg(DestTy); 4038 _mov(T, Src0RF); 4039 if (DestTy == IceType_i1) 4040 _and(T, T, Ctx->getConstantInt1(1)); 4041 _mov(Dest, T); 4042 } 4043 break; 4044 } 4045 case InstCast::Fptrunc: 4046 case InstCast::Fpext: { 4047 // fptrunc: dest.f32 = fptrunc src0.fp64 4048 // fpext: dest.f64 = fptrunc src0.fp32 4049 const bool IsTrunc = CastKind == InstCast::Fptrunc; 4050 assert(!isVectorType(DestTy)); 4051 assert(DestTy == (IsTrunc ? IceType_f32 : IceType_f64)); 4052 assert(Src0->getType() == (IsTrunc ? IceType_f64 : IceType_f32)); 4053 Variable *Src0R = legalizeToReg(Src0); 4054 Variable *T = makeReg(DestTy); 4055 _vcvt(T, Src0R, IsTrunc ? InstARM32Vcvt::D2s : InstARM32Vcvt::S2d); 4056 _mov(Dest, T); 4057 break; 4058 } 4059 case InstCast::Fptosi: 4060 case InstCast::Fptoui: { 4061 const bool DestIsSigned = CastKind == InstCast::Fptosi; 4062 Variable *Src0R = legalizeToReg(Src0); 4063 4064 if (isVectorType(DestTy)) { 4065 assert(typeElementType(Src0->getType()) == IceType_f32); 4066 auto *T = makeReg(DestTy); 4067 _vcvt(T, Src0R, 4068 DestIsSigned ? InstARM32Vcvt::Vs2si : InstARM32Vcvt::Vs2ui); 4069 _mov(Dest, T); 4070 break; 4071 } 4072 4073 const bool Src0IsF32 = isFloat32Asserting32Or64(Src0->getType()); 4074 if (llvm::isa<Variable64On32>(Dest)) { 4075 llvm::report_fatal_error("fp-to-i64 should have been pre-lowered."); 4076 } 4077 // fptosi: 4078 // t1.fp = vcvt src0.fp 4079 // t2.i32 = vmov t1.fp 4080 // dest.int = conv t2.i32 @ Truncates the result if needed. 4081 // fptoui: 4082 // t1.fp = vcvt src0.fp 4083 // t2.u32 = vmov t1.fp 4084 // dest.uint = conv t2.u32 @ Truncates the result if needed. 4085 Variable *T_fp = makeReg(IceType_f32); 4086 const InstARM32Vcvt::VcvtVariant Conversion = 4087 Src0IsF32 ? (DestIsSigned ? InstARM32Vcvt::S2si : InstARM32Vcvt::S2ui) 4088 : (DestIsSigned ? InstARM32Vcvt::D2si : InstARM32Vcvt::D2ui); 4089 _vcvt(T_fp, Src0R, Conversion); 4090 Variable *T = makeReg(IceType_i32); 4091 _mov(T, T_fp); 4092 if (DestTy != IceType_i32) { 4093 Variable *T_1 = makeReg(DestTy); 4094 lowerCast(InstCast::create(Func, InstCast::Trunc, T_1, T)); 4095 T = T_1; 4096 } 4097 _mov(Dest, T); 4098 break; 4099 } 4100 case InstCast::Sitofp: 4101 case InstCast::Uitofp: { 4102 const bool SourceIsSigned = CastKind == InstCast::Sitofp; 4103 4104 if (isVectorType(DestTy)) { 4105 assert(typeElementType(DestTy) == IceType_f32); 4106 auto *T = makeReg(DestTy); 4107 Variable *Src0R = legalizeToReg(Src0); 4108 _vcvt(T, Src0R, 4109 SourceIsSigned ? InstARM32Vcvt::Vsi2s : InstARM32Vcvt::Vui2s); 4110 _mov(Dest, T); 4111 break; 4112 } 4113 4114 const bool DestIsF32 = isFloat32Asserting32Or64(DestTy); 4115 if (Src0->getType() == IceType_i64) { 4116 llvm::report_fatal_error("i64-to-fp should have been pre-lowered."); 4117 } 4118 // sitofp: 4119 // t1.i32 = sext src.int @ sign-extends src0 if needed. 4120 // t2.fp32 = vmov t1.i32 4121 // t3.fp = vcvt.{fp}.s32 @ fp is either f32 or f64 4122 // uitofp: 4123 // t1.i32 = zext src.int @ zero-extends src0 if needed. 4124 // t2.fp32 = vmov t1.i32 4125 // t3.fp = vcvt.{fp}.s32 @ fp is either f32 or f64 4126 if (Src0->getType() != IceType_i32) { 4127 Variable *Src0R_32 = makeReg(IceType_i32); 4128 lowerCast(InstCast::create(Func, SourceIsSigned ? InstCast::Sext 4129 : InstCast::Zext, 4130 Src0R_32, Src0)); 4131 Src0 = Src0R_32; 4132 } 4133 Variable *Src0R = legalizeToReg(Src0); 4134 Variable *Src0R_f32 = makeReg(IceType_f32); 4135 _mov(Src0R_f32, Src0R); 4136 Src0R = Src0R_f32; 4137 Variable *T = makeReg(DestTy); 4138 const InstARM32Vcvt::VcvtVariant Conversion = 4139 DestIsF32 4140 ? (SourceIsSigned ? InstARM32Vcvt::Si2s : InstARM32Vcvt::Ui2s) 4141 : (SourceIsSigned ? InstARM32Vcvt::Si2d : InstARM32Vcvt::Ui2d); 4142 _vcvt(T, Src0R, Conversion); 4143 _mov(Dest, T); 4144 break; 4145 } 4146 case InstCast::Bitcast: { 4147 Operand *Src0 = Instr->getSrc(0); 4148 if (DestTy == Src0->getType()) { 4149 auto *Assign = InstAssign::create(Func, Dest, Src0); 4150 lowerAssign(Assign); 4151 return; 4152 } 4153 switch (DestTy) { 4154 case IceType_NUM: 4155 case IceType_void: 4156 llvm::report_fatal_error("Unexpected bitcast."); 4157 case IceType_i1: 4158 UnimplementedLoweringError(this, Instr); 4159 break; 4160 case IceType_i8: 4161 assert(Src0->getType() == IceType_v8i1); 4162 llvm::report_fatal_error( 4163 "i8 to v8i1 conversion should have been prelowered."); 4164 break; 4165 case IceType_i16: 4166 assert(Src0->getType() == IceType_v16i1); 4167 llvm::report_fatal_error( 4168 "i16 to v16i1 conversion should have been prelowered."); 4169 break; 4170 case IceType_i32: 4171 case IceType_f32: { 4172 Variable *Src0R = legalizeToReg(Src0); 4173 Variable *T = makeReg(DestTy); 4174 _mov(T, Src0R); 4175 lowerAssign(InstAssign::create(Func, Dest, T)); 4176 break; 4177 } 4178 case IceType_i64: { 4179 // t0, t1 <- src0 4180 // dest[31..0] = t0 4181 // dest[63..32] = t1 4182 assert(Src0->getType() == IceType_f64); 4183 auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); 4184 T->initHiLo(Func); 4185 configureBitcastTemporary(T); 4186 Variable *Src0R = legalizeToReg(Src0); 4187 _mov(T, Src0R); 4188 Context.insert<InstFakeUse>(T->getHi()); 4189 Context.insert<InstFakeUse>(T->getLo()); 4190 lowerAssign(InstAssign::create(Func, Dest, T)); 4191 break; 4192 } 4193 case IceType_f64: { 4194 // T0 <- lo(src) 4195 // T1 <- hi(src) 4196 // vmov T2, T0, T1 4197 // Dest <- T2 4198 assert(Src0->getType() == IceType_i64); 4199 Variable *T = makeReg(DestTy); 4200 auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); 4201 Src64->initHiLo(Func); 4202 configureBitcastTemporary(Src64); 4203 lowerAssign(InstAssign::create(Func, Src64, Src0)); 4204 _mov(T, Src64); 4205 lowerAssign(InstAssign::create(Func, Dest, T)); 4206 break; 4207 } 4208 case IceType_v8i1: 4209 assert(Src0->getType() == IceType_i8); 4210 llvm::report_fatal_error( 4211 "v8i1 to i8 conversion should have been prelowered."); 4212 break; 4213 case IceType_v16i1: 4214 assert(Src0->getType() == IceType_i16); 4215 llvm::report_fatal_error( 4216 "v16i1 to i16 conversion should have been prelowered."); 4217 break; 4218 case IceType_v4i1: 4219 case IceType_v8i16: 4220 case IceType_v16i8: 4221 case IceType_v4f32: 4222 case IceType_v4i32: { 4223 assert(typeWidthInBytes(DestTy) == typeWidthInBytes(Src0->getType())); 4224 assert(isVectorType(DestTy) == isVectorType(Src0->getType())); 4225 Variable *T = makeReg(DestTy); 4226 _mov(T, Src0); 4227 _mov(Dest, T); 4228 break; 4229 } 4230 } 4231 break; 4232 } 4233 } 4234 } 4235 4236 void TargetARM32::lowerExtractElement(const InstExtractElement *Instr) { 4237 Variable *Dest = Instr->getDest(); 4238 Type DestTy = Dest->getType(); 4239 4240 Variable *Src0 = legalizeToReg(Instr->getSrc(0)); 4241 Operand *Src1 = Instr->getSrc(1); 4242 4243 if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) { 4244 const uint32_t Index = Imm->getValue(); 4245 Variable *T = makeReg(DestTy); 4246 Variable *TSrc0 = makeReg(Src0->getType()); 4247 4248 if (isFloatingType(DestTy)) { 4249 // We need to make sure the source is in a suitable register. 4250 TSrc0->setRegClass(RegARM32::RCARM32_QtoS); 4251 } 4252 4253 _mov(TSrc0, Src0); 4254 _extractelement(T, TSrc0, Index); 4255 _mov(Dest, T); 4256 return; 4257 } 4258 assert(false && "extractelement requires a constant index"); 4259 } 4260 4261 namespace { 4262 // Validates FCMPARM32_TABLE's declaration w.r.t. InstFcmp::FCondition ordering 4263 // (and naming). 4264 enum { 4265 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) _fcmp_ll_##val, 4266 FCMPARM32_TABLE 4267 #undef X 4268 _fcmp_ll_NUM 4269 }; 4270 4271 enum { 4272 #define X(tag, str) _fcmp_hl_##tag = InstFcmp::tag, 4273 ICEINSTFCMP_TABLE 4274 #undef X 4275 _fcmp_hl_NUM 4276 }; 4277 4278 static_assert((uint32_t)_fcmp_hl_NUM == (uint32_t)_fcmp_ll_NUM, 4279 "Inconsistency between high-level and low-level fcmp tags."); 4280 #define X(tag, str) \ 4281 static_assert( \ 4282 (uint32_t)_fcmp_hl_##tag == (uint32_t)_fcmp_ll_##tag, \ 4283 "Inconsistency between high-level and low-level fcmp tag " #tag); 4284 ICEINSTFCMP_TABLE 4285 #undef X 4286 4287 struct { 4288 CondARM32::Cond CC0; 4289 CondARM32::Cond CC1; 4290 } TableFcmp[] = { 4291 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) \ 4292 { CondARM32::CC0, CondARM32::CC1 } \ 4293 , 4294 FCMPARM32_TABLE 4295 #undef X 4296 }; 4297 4298 bool isFloatingPointZero(const Operand *Src) { 4299 if (const auto *F32 = llvm::dyn_cast<const ConstantFloat>(Src)) { 4300 return Utils::isPositiveZero(F32->getValue()); 4301 } 4302 4303 if (const auto *F64 = llvm::dyn_cast<const ConstantDouble>(Src)) { 4304 return Utils::isPositiveZero(F64->getValue()); 4305 } 4306 4307 return false; 4308 } 4309 } // end of anonymous namespace 4310 4311 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { 4312 InstFcmp::FCond Condition = Instr->getCondition(); 4313 switch (Condition) { 4314 case InstFcmp::False: 4315 return CondWhenTrue(CondARM32::kNone); 4316 case InstFcmp::True: 4317 return CondWhenTrue(CondARM32::AL); 4318 break; 4319 default: { 4320 Variable *Src0R = legalizeToReg(Instr->getSrc(0)); 4321 Operand *Src1 = Instr->getSrc(1); 4322 if (isFloatingPointZero(Src1)) { 4323 _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType())); 4324 } else { 4325 _vcmp(Src0R, legalizeToReg(Src1)); 4326 } 4327 _vmrs(); 4328 assert(Condition < llvm::array_lengthof(TableFcmp)); 4329 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1); 4330 } 4331 } 4332 } 4333 4334 void TargetARM32::lowerFcmp(const InstFcmp *Instr) { 4335 Variable *Dest = Instr->getDest(); 4336 const Type DestTy = Dest->getType(); 4337 4338 if (isVectorType(DestTy)) { 4339 if (Instr->getCondition() == InstFcmp::False) { 4340 constexpr Type SafeTypeForMovingConstant = IceType_v4i32; 4341 auto *T = makeReg(SafeTypeForMovingConstant); 4342 _mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(0))); 4343 _mov(Dest, T); 4344 return; 4345 } 4346 4347 if (Instr->getCondition() == InstFcmp::True) { 4348 constexpr Type SafeTypeForMovingConstant = IceType_v4i32; 4349 auto *T = makeReg(SafeTypeForMovingConstant); 4350 _mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(1))); 4351 _mov(Dest, T); 4352 return; 4353 } 4354 4355 Variable *T0; 4356 Variable *T1; 4357 bool Negate = false; 4358 auto *Src0 = legalizeToReg(Instr->getSrc(0)); 4359 auto *Src1 = legalizeToReg(Instr->getSrc(1)); 4360 4361 switch (Instr->getCondition()) { 4362 default: 4363 llvm::report_fatal_error("Unhandled fp comparison."); 4364 #define _Vcnone(Tptr, S0, S1) \ 4365 do { \ 4366 *(Tptr) = nullptr; \ 4367 } while (0) 4368 #define _Vceq(Tptr, S0, S1) \ 4369 do { \ 4370 *(Tptr) = makeReg(DestTy); \ 4371 _vceq(*(Tptr), S0, S1); \ 4372 } while (0) 4373 #define _Vcge(Tptr, S0, S1) \ 4374 do { \ 4375 *(Tptr) = makeReg(DestTy); \ 4376 _vcge(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed); \ 4377 } while (0) 4378 #define _Vcgt(Tptr, S0, S1) \ 4379 do { \ 4380 *(Tptr) = makeReg(DestTy); \ 4381 _vcgt(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed); \ 4382 } while (0) 4383 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) \ 4384 case InstFcmp::val: { \ 4385 _Vc##CC0_V(&T0, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1); \ 4386 _Vc##CC1_V(&T1, (INV_V) ? Src0 : Src1, (INV_V) ? Src1 : Src0); \ 4387 Negate = NEG_V; \ 4388 } break; 4389 FCMPARM32_TABLE 4390 #undef X 4391 #undef _Vcgt 4392 #undef _Vcge 4393 #undef _Vceq 4394 #undef _Vcnone 4395 } 4396 assert(T0 != nullptr); 4397 Variable *T = T0; 4398 if (T1 != nullptr) { 4399 T = makeReg(DestTy); 4400 _vorr(T, T0, T1); 4401 } 4402 4403 if (Negate) { 4404 auto *TNeg = makeReg(DestTy); 4405 _vmvn(TNeg, T); 4406 T = TNeg; 4407 } 4408 4409 _mov(Dest, T); 4410 return; 4411 } 4412 4413 Variable *T = makeReg(IceType_i1); 4414 Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex); 4415 Operand *_0 = 4416 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); 4417 4418 CondWhenTrue Cond = lowerFcmpCond(Instr); 4419 4420 bool RedefineT = false; 4421 if (Cond.WhenTrue0 != CondARM32::AL) { 4422 _mov(T, _0); 4423 RedefineT = true; 4424 } 4425 4426 if (Cond.WhenTrue0 == CondARM32::kNone) { 4427 _mov(Dest, T); 4428 return; 4429 } 4430 4431 if (RedefineT) { 4432 _mov_redefined(T, _1, Cond.WhenTrue0); 4433 } else { 4434 _mov(T, _1, Cond.WhenTrue0); 4435 } 4436 4437 if (Cond.WhenTrue1 != CondARM32::kNone) { 4438 _mov_redefined(T, _1, Cond.WhenTrue1); 4439 } 4440 4441 _mov(Dest, T); 4442 } 4443 4444 TargetARM32::CondWhenTrue 4445 TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0, 4446 Operand *Src1) { 4447 assert(Condition < llvm::array_lengthof(TableIcmp64)); 4448 4449 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1)); 4450 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1)); 4451 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand()); 4452 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands()); 4453 4454 if (SrcsLo.hasConstOperand()) { 4455 const uint32_t ValueLo = SrcsLo.getConstantValue(); 4456 const uint32_t ValueHi = SrcsHi.getConstantValue(); 4457 const uint64_t Value = (static_cast<uint64_t>(ValueHi) << 32) | ValueLo; 4458 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && 4459 Value == 0) { 4460 Variable *T = makeReg(IceType_i32); 4461 Variable *Src0LoR = SrcsLo.src0R(this); 4462 Variable *Src0HiR = SrcsHi.src0R(this); 4463 _orrs(T, Src0LoR, Src0HiR); 4464 Context.insert<InstFakeUse>(T); 4465 return CondWhenTrue(TableIcmp64[Condition].C1); 4466 } 4467 4468 Variable *Src0RLo = SrcsLo.src0R(this); 4469 Variable *Src0RHi = SrcsHi.src0R(this); 4470 Operand *Src1RFLo = SrcsLo.src1RF(this); 4471 Operand *Src1RFHi = ValueLo == ValueHi ? Src1RFLo : SrcsHi.src1RF(this); 4472 4473 const bool UseRsb = 4474 TableIcmp64[Condition].Swapped != SrcsLo.swappedOperands(); 4475 4476 if (UseRsb) { 4477 if (TableIcmp64[Condition].IsSigned) { 4478 Variable *T = makeReg(IceType_i32); 4479 _rsbs(T, Src0RLo, Src1RFLo); 4480 Context.insert<InstFakeUse>(T); 4481 4482 T = makeReg(IceType_i32); 4483 _rscs(T, Src0RHi, Src1RFHi); 4484 // We need to add a FakeUse here because liveness gets mad at us (Def 4485 // without Use.) Note that flag-setting instructions are considered to 4486 // have side effects and, therefore, are not DCE'ed. 4487 Context.insert<InstFakeUse>(T); 4488 } else { 4489 Variable *T = makeReg(IceType_i32); 4490 _rsbs(T, Src0RHi, Src1RFHi); 4491 Context.insert<InstFakeUse>(T); 4492 4493 T = makeReg(IceType_i32); 4494 _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ); 4495 Context.insert<InstFakeUse>(T); 4496 } 4497 } else { 4498 if (TableIcmp64[Condition].IsSigned) { 4499 _cmp(Src0RLo, Src1RFLo); 4500 Variable *T = makeReg(IceType_i32); 4501 _sbcs(T, Src0RHi, Src1RFHi); 4502 Context.insert<InstFakeUse>(T); 4503 } else { 4504 _cmp(Src0RHi, Src1RFHi); 4505 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ); 4506 } 4507 } 4508 4509 return CondWhenTrue(TableIcmp64[Condition].C1); 4510 } 4511 4512 Variable *Src0RLo, *Src0RHi; 4513 Operand *Src1RFLo, *Src1RFHi; 4514 if (TableIcmp64[Condition].Swapped) { 4515 Src0RLo = legalizeToReg(loOperand(Src1)); 4516 Src0RHi = legalizeToReg(hiOperand(Src1)); 4517 Src1RFLo = legalizeToReg(loOperand(Src0)); 4518 Src1RFHi = legalizeToReg(hiOperand(Src0)); 4519 } else { 4520 Src0RLo = legalizeToReg(loOperand(Src0)); 4521 Src0RHi = legalizeToReg(hiOperand(Src0)); 4522 Src1RFLo = legalizeToReg(loOperand(Src1)); 4523 Src1RFHi = legalizeToReg(hiOperand(Src1)); 4524 } 4525 4526 // a=icmp cond, b, c ==> 4527 // GCC does: 4528 // cmp b.hi, c.hi or cmp b.lo, c.lo 4529 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi 4530 // mov.<C1> t, #1 mov.<C1> t, #1 4531 // mov.<C2> t, #0 mov.<C2> t, #0 4532 // mov a, t mov a, t 4533 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" 4534 // is used for signed compares. In some cases, b and c need to be swapped as 4535 // well. 4536 // 4537 // LLVM does: 4538 // for EQ and NE: 4539 // eor t1, b.hi, c.hi 4540 // eor t2, b.lo, c.hi 4541 // orrs t, t1, t2 4542 // mov.<C> t, #1 4543 // mov a, t 4544 // 4545 // that's nice in that it's just as short but has fewer dependencies for 4546 // better ILP at the cost of more registers. 4547 // 4548 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two 4549 // unconditional mov #0, two cmps, two conditional mov #1, and one 4550 // conditional reg mov. That has few dependencies for good ILP, but is a 4551 // longer sequence. 4552 // 4553 // So, we are going with the GCC version since it's usually better (except 4554 // perhaps for eq/ne). We could revisit special-casing eq/ne later. 4555 if (TableIcmp64[Condition].IsSigned) { 4556 Variable *ScratchReg = makeReg(IceType_i32); 4557 _cmp(Src0RLo, Src1RFLo); 4558 _sbcs(ScratchReg, Src0RHi, Src1RFHi); 4559 // ScratchReg isn't going to be used, but we need the side-effect of 4560 // setting flags from this operation. 4561 Context.insert<InstFakeUse>(ScratchReg); 4562 } else { 4563 _cmp(Src0RHi, Src1RFHi); 4564 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ); 4565 } 4566 return CondWhenTrue(TableIcmp64[Condition].C1); 4567 } 4568 4569 TargetARM32::CondWhenTrue 4570 TargetARM32::lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0, 4571 Operand *Src1) { 4572 Int32Operands Srcs(Src0, Src1); 4573 if (!Srcs.hasConstOperand()) { 4574 4575 Variable *Src0R = Srcs.src0R(this); 4576 Operand *Src1RF = Srcs.src1RF(this); 4577 _cmp(Src0R, Src1RF); 4578 return CondWhenTrue(getIcmp32Mapping(Condition)); 4579 } 4580 4581 Variable *Src0R = Srcs.src0R(this); 4582 const int32_t Value = Srcs.getConstantValue(); 4583 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) { 4584 _tst(Src0R, Src0R); 4585 return CondWhenTrue(getIcmp32Mapping(Condition)); 4586 } 4587 4588 if (!Srcs.swappedOperands() && !Srcs.immediateIsFlexEncodable() && 4589 Srcs.negatedImmediateIsFlexEncodable()) { 4590 Operand *Src1F = Srcs.negatedSrc1F(this); 4591 _cmn(Src0R, Src1F); 4592 return CondWhenTrue(getIcmp32Mapping(Condition)); 4593 } 4594 4595 Operand *Src1RF = Srcs.src1RF(this); 4596 if (!Srcs.swappedOperands()) { 4597 _cmp(Src0R, Src1RF); 4598 } else { 4599 Variable *T = makeReg(IceType_i32); 4600 _rsbs(T, Src0R, Src1RF); 4601 Context.insert<InstFakeUse>(T); 4602 } 4603 return CondWhenTrue(getIcmp32Mapping(Condition)); 4604 } 4605 4606 TargetARM32::CondWhenTrue 4607 TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0, 4608 Operand *Src1) { 4609 Int32Operands Srcs(Src0, Src1); 4610 const int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType()); 4611 assert(ShAmt >= 0); 4612 4613 if (!Srcs.hasConstOperand()) { 4614 Variable *Src0R = makeReg(IceType_i32); 4615 Operand *ShAmtImm = shAmtImm(ShAmt); 4616 _lsl(Src0R, legalizeToReg(Src0), ShAmtImm); 4617 4618 Variable *Src1R = legalizeToReg(Src1); 4619 auto *Src1F = OperandARM32FlexReg::create(Func, IceType_i32, Src1R, 4620 OperandARM32::LSL, ShAmtImm); 4621 _cmp(Src0R, Src1F); 4622 return CondWhenTrue(getIcmp32Mapping(Condition)); 4623 } 4624 4625 const int32_t Value = Srcs.getConstantValue(); 4626 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) { 4627 Operand *ShAmtImm = shAmtImm(ShAmt); 4628 Variable *T = makeReg(IceType_i32); 4629 _lsls(T, Srcs.src0R(this), ShAmtImm); 4630 Context.insert<InstFakeUse>(T); 4631 return CondWhenTrue(getIcmp32Mapping(Condition)); 4632 } 4633 4634 Variable *ConstR = makeReg(IceType_i32); 4635 _mov(ConstR, 4636 legalize(Ctx->getConstantInt32(Value << ShAmt), Legal_Reg | Legal_Flex)); 4637 Operand *NonConstF = OperandARM32FlexReg::create( 4638 Func, IceType_i32, Srcs.src0R(this), OperandARM32::LSL, 4639 Ctx->getConstantInt32(ShAmt)); 4640 4641 if (Srcs.swappedOperands()) { 4642 _cmp(ConstR, NonConstF); 4643 } else { 4644 Variable *T = makeReg(IceType_i32); 4645 _rsbs(T, ConstR, NonConstF); 4646 Context.insert<InstFakeUse>(T); 4647 } 4648 return CondWhenTrue(getIcmp32Mapping(Condition)); 4649 } 4650 4651 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) { 4652 return lowerIcmpCond(Instr->getCondition(), Instr->getSrc(0), 4653 Instr->getSrc(1)); 4654 } 4655 4656 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(InstIcmp::ICond Condition, 4657 Operand *Src0, 4658 Operand *Src1) { 4659 Src0 = legalizeUndef(Src0); 4660 Src1 = legalizeUndef(Src1); 4661 4662 // a=icmp cond b, c ==> 4663 // GCC does: 4664 // <u/s>xtb tb, b 4665 // <u/s>xtb tc, c 4666 // cmp tb, tc 4667 // mov.C1 t, #0 4668 // mov.C2 t, #1 4669 // mov a, t 4670 // where the unsigned/sign extension is not needed for 32-bit. They also have 4671 // special cases for EQ and NE. E.g., for NE: 4672 // <extend to tb, tc> 4673 // subs t, tb, tc 4674 // movne t, #1 4675 // mov a, t 4676 // 4677 // LLVM does: 4678 // lsl tb, b, #<N> 4679 // mov t, #0 4680 // cmp tb, c, lsl #<N> 4681 // mov.<C> t, #1 4682 // mov a, t 4683 // 4684 // the left shift is by 0, 16, or 24, which allows the comparison to focus on 4685 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For 4686 // the unsigned case, for some reason it does similar to GCC and does a uxtb 4687 // first. It's not clear to me why that special-casing is needed. 4688 // 4689 // We'll go with the LLVM way for now, since it's shorter and has just as few 4690 // dependencies. 4691 switch (Src0->getType()) { 4692 default: 4693 llvm::report_fatal_error("Unhandled type in lowerIcmpCond"); 4694 case IceType_i1: 4695 case IceType_i8: 4696 case IceType_i16: 4697 return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1); 4698 case IceType_i32: 4699 return lowerInt32IcmpCond(Condition, Src0, Src1); 4700 case IceType_i64: 4701 return lowerInt64IcmpCond(Condition, Src0, Src1); 4702 } 4703 } 4704 4705 void TargetARM32::lowerIcmp(const InstIcmp *Instr) { 4706 Variable *Dest = Instr->getDest(); 4707 const Type DestTy = Dest->getType(); 4708 4709 if (isVectorType(DestTy)) { 4710 auto *T = makeReg(DestTy); 4711 auto *Src0 = legalizeToReg(Instr->getSrc(0)); 4712 auto *Src1 = legalizeToReg(Instr->getSrc(1)); 4713 const Type SrcTy = Src0->getType(); 4714 4715 bool NeedsShl = false; 4716 Type NewTypeAfterShl; 4717 SizeT ShAmt; 4718 switch (SrcTy) { 4719 default: 4720 break; 4721 case IceType_v16i1: 4722 NeedsShl = true; 4723 NewTypeAfterShl = IceType_v16i8; 4724 ShAmt = 7; 4725 break; 4726 case IceType_v8i1: 4727 NeedsShl = true; 4728 NewTypeAfterShl = IceType_v8i16; 4729 ShAmt = 15; 4730 break; 4731 case IceType_v4i1: 4732 NeedsShl = true; 4733 NewTypeAfterShl = IceType_v4i32; 4734 ShAmt = 31; 4735 break; 4736 } 4737 4738 if (NeedsShl) { 4739 auto *Imm = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmt)); 4740 auto *Src0T = makeReg(NewTypeAfterShl); 4741 auto *Src0Shl = makeReg(NewTypeAfterShl); 4742 _mov(Src0T, Src0); 4743 _vshl(Src0Shl, Src0T, Imm); 4744 Src0 = Src0Shl; 4745 4746 auto *Src1T = makeReg(NewTypeAfterShl); 4747 auto *Src1Shl = makeReg(NewTypeAfterShl); 4748 _mov(Src1T, Src1); 4749 _vshl(Src1Shl, Src1T, Imm); 4750 Src1 = Src1Shl; 4751 } 4752 4753 switch (Instr->getCondition()) { 4754 default: 4755 llvm::report_fatal_error("Unhandled integer comparison."); 4756 #define _Vceq(T, S0, S1, Signed) _vceq(T, S0, S1) 4757 #define _Vcge(T, S0, S1, Signed) \ 4758 _vcge(T, S0, S1) \ 4759 ->setSignType(Signed ? InstARM32::FS_Signed : InstARM32::FS_Unsigned) 4760 #define _Vcgt(T, S0, S1, Signed) \ 4761 _vcgt(T, S0, S1) \ 4762 ->setSignType(Signed ? InstARM32::FS_Signed : InstARM32::FS_Unsigned) 4763 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \ 4764 case InstIcmp::val: { \ 4765 _Vc##C_V(T, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1, is_signed); \ 4766 if (NEG_V) { \ 4767 auto *TInv = makeReg(DestTy); \ 4768 _vmvn(TInv, T); \ 4769 T = TInv; \ 4770 } \ 4771 } break; 4772 ICMPARM32_TABLE 4773 #undef X 4774 #undef _Vcgt 4775 #undef _Vcge 4776 #undef _Vceq 4777 } 4778 _mov(Dest, T); 4779 return; 4780 } 4781 4782 Operand *_0 = 4783 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); 4784 Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex); 4785 Variable *T = makeReg(IceType_i1); 4786 4787 _mov(T, _0); 4788 CondWhenTrue Cond = lowerIcmpCond(Instr); 4789 _mov_redefined(T, _1, Cond.WhenTrue0); 4790 _mov(Dest, T); 4791 4792 assert(Cond.WhenTrue1 == CondARM32::kNone); 4793 4794 return; 4795 } 4796 4797 void TargetARM32::lowerInsertElement(const InstInsertElement *Instr) { 4798 Variable *Dest = Instr->getDest(); 4799 Type DestTy = Dest->getType(); 4800 4801 Variable *Src0 = legalizeToReg(Instr->getSrc(0)); 4802 Variable *Src1 = legalizeToReg(Instr->getSrc(1)); 4803 Operand *Src2 = Instr->getSrc(2); 4804 4805 if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) { 4806 const uint32_t Index = Imm->getValue(); 4807 Variable *T = makeReg(DestTy); 4808 4809 if (isFloatingType(DestTy)) { 4810 T->setRegClass(RegARM32::RCARM32_QtoS); 4811 } 4812 4813 _mov(T, Src0); 4814 _insertelement(T, Src1, Index); 4815 _set_dest_redefined(); 4816 _mov(Dest, T); 4817 return; 4818 } 4819 assert(false && "insertelement requires a constant index"); 4820 } 4821 4822 namespace { 4823 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { 4824 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) 4825 return Integer->getValue(); 4826 return Intrinsics::MemoryOrderInvalid; 4827 } 4828 } // end of anonymous namespace 4829 4830 void TargetARM32::lowerLoadLinkedStoreExclusive( 4831 Type Ty, Operand *Addr, std::function<Variable *(Variable *)> Operation, 4832 CondARM32::Cond Cond) { 4833 4834 auto *Retry = Context.insert<InstARM32Label>(this); 4835 4836 { // scoping for loop highlighting. 4837 Variable *Success = makeReg(IceType_i32); 4838 Variable *Tmp = (Ty == IceType_i64) ? makeI64RegPair() : makeReg(Ty); 4839 auto *_0 = Ctx->getConstantZero(IceType_i32); 4840 4841 Context.insert<InstFakeDef>(Tmp); 4842 Context.insert<InstFakeUse>(Tmp); 4843 Variable *AddrR = legalizeToReg(Addr); 4844 _ldrex(Tmp, formMemoryOperand(AddrR, Ty))->setDestRedefined(); 4845 auto *StoreValue = Operation(Tmp); 4846 assert(StoreValue->mustHaveReg()); 4847 // strex requires Dest to be a register other than Value or Addr. This 4848 // restriction is cleanly represented by adding an "early" definition of 4849 // Dest (or a latter use of all the sources.) 4850 Context.insert<InstFakeDef>(Success); 4851 if (Cond != CondARM32::AL) { 4852 _mov_redefined(Success, legalize(_0, Legal_Reg | Legal_Flex), 4853 InstARM32::getOppositeCondition(Cond)); 4854 } 4855 _strex(Success, StoreValue, formMemoryOperand(AddrR, Ty), Cond) 4856 ->setDestRedefined(); 4857 _cmp(Success, _0); 4858 } 4859 4860 _br(Retry, CondARM32::NE); 4861 } 4862 4863 namespace { 4864 InstArithmetic *createArithInst(Cfg *Func, uint32_t Operation, Variable *Dest, 4865 Variable *Src0, Operand *Src1) { 4866 InstArithmetic::OpKind Oper; 4867 switch (Operation) { 4868 default: 4869 llvm::report_fatal_error("Unknown AtomicRMW operation"); 4870 case Intrinsics::AtomicExchange: 4871 llvm::report_fatal_error("Can't handle Atomic xchg operation"); 4872 case Intrinsics::AtomicAdd: 4873 Oper = InstArithmetic::Add; 4874 break; 4875 case Intrinsics::AtomicAnd: 4876 Oper = InstArithmetic::And; 4877 break; 4878 case Intrinsics::AtomicSub: 4879 Oper = InstArithmetic::Sub; 4880 break; 4881 case Intrinsics::AtomicOr: 4882 Oper = InstArithmetic::Or; 4883 break; 4884 case Intrinsics::AtomicXor: 4885 Oper = InstArithmetic::Xor; 4886 break; 4887 } 4888 return InstArithmetic::create(Func, Oper, Dest, Src0, Src1); 4889 } 4890 } // end of anonymous namespace 4891 4892 void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation, 4893 Operand *Addr, Operand *Val) { 4894 // retry: 4895 // ldrex tmp, [addr] 4896 // mov contents, tmp 4897 // op result, contents, Val 4898 // strex success, result, [addr] 4899 // cmp success, 0 4900 // jne retry 4901 // fake-use(addr, operand) @ prevents undesirable clobbering. 4902 // mov dest, contents 4903 auto DestTy = Dest->getType(); 4904 4905 if (DestTy == IceType_i64) { 4906 lowerInt64AtomicRMW(Dest, Operation, Addr, Val); 4907 return; 4908 } 4909 4910 Operand *ValRF = nullptr; 4911 if (llvm::isa<ConstantInteger32>(Val)) { 4912 ValRF = Val; 4913 } else { 4914 ValRF = legalizeToReg(Val); 4915 } 4916 auto *ContentsR = makeReg(DestTy); 4917 auto *ResultR = makeReg(DestTy); 4918 4919 _dmb(); 4920 lowerLoadLinkedStoreExclusive( 4921 DestTy, Addr, 4922 [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) { 4923 lowerAssign(InstAssign::create(Func, ContentsR, Tmp)); 4924 if (Operation == Intrinsics::AtomicExchange) { 4925 lowerAssign(InstAssign::create(Func, ResultR, ValRF)); 4926 } else { 4927 lowerArithmetic( 4928 createArithInst(Func, Operation, ResultR, ContentsR, ValRF)); 4929 } 4930 return ResultR; 4931 }); 4932 _dmb(); 4933 if (auto *ValR = llvm::dyn_cast<Variable>(ValRF)) { 4934 Context.insert<InstFakeUse>(ValR); 4935 } 4936 // Can't dce ContentsR. 4937 Context.insert<InstFakeUse>(ContentsR); 4938 lowerAssign(InstAssign::create(Func, Dest, ContentsR)); 4939 } 4940 4941 void TargetARM32::lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation, 4942 Operand *Addr, Operand *Val) { 4943 assert(Dest->getType() == IceType_i64); 4944 4945 auto *ResultR = makeI64RegPair(); 4946 4947 Context.insert<InstFakeDef>(ResultR); 4948 4949 Operand *ValRF = nullptr; 4950 if (llvm::dyn_cast<ConstantInteger64>(Val)) { 4951 ValRF = Val; 4952 } else { 4953 auto *ValR64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); 4954 ValR64->initHiLo(Func); 4955 ValR64->setMustNotHaveReg(); 4956 ValR64->getLo()->setMustHaveReg(); 4957 ValR64->getHi()->setMustHaveReg(); 4958 lowerAssign(InstAssign::create(Func, ValR64, Val)); 4959 ValRF = ValR64; 4960 } 4961 4962 auto *ContentsR = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); 4963 ContentsR->initHiLo(Func); 4964 ContentsR->setMustNotHaveReg(); 4965 ContentsR->getLo()->setMustHaveReg(); 4966 ContentsR->getHi()->setMustHaveReg(); 4967 4968 _dmb(); 4969 lowerLoadLinkedStoreExclusive( 4970 IceType_i64, Addr, 4971 [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) { 4972 lowerAssign(InstAssign::create(Func, ContentsR, Tmp)); 4973 Context.insert<InstFakeUse>(Tmp); 4974 if (Operation == Intrinsics::AtomicExchange) { 4975 lowerAssign(InstAssign::create(Func, ResultR, ValRF)); 4976 } else { 4977 lowerArithmetic( 4978 createArithInst(Func, Operation, ResultR, ContentsR, ValRF)); 4979 } 4980 Context.insert<InstFakeUse>(ResultR->getHi()); 4981 Context.insert<InstFakeDef>(ResultR, ResultR->getLo()) 4982 ->setDestRedefined(); 4983 return ResultR; 4984 }); 4985 _dmb(); 4986 if (auto *ValR64 = llvm::dyn_cast<Variable64On32>(ValRF)) { 4987 Context.insert<InstFakeUse>(ValR64->getLo()); 4988 Context.insert<InstFakeUse>(ValR64->getHi()); 4989 } 4990 lowerAssign(InstAssign::create(Func, Dest, ContentsR)); 4991 } 4992 4993 void TargetARM32::postambleCtpop64(const InstCall *Instr) { 4994 Operand *Arg0 = Instr->getArg(0); 4995 if (isInt32Asserting32Or64(Arg0->getType())) { 4996 return; 4997 } 4998 // The popcount helpers always return 32-bit values, while the intrinsic's 4999 // signature matches some 64-bit platform's native instructions and expect to 5000 // fill a 64-bit reg. Thus, clear the upper bits of the dest just in case the 5001 // user doesn't do that in the IR or doesn't toss the bits via truncate. 5002 auto *DestHi = llvm::cast<Variable>(hiOperand(Instr->getDest())); 5003 Variable *T = makeReg(IceType_i32); 5004 Operand *_0 = 5005 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); 5006 _mov(T, _0); 5007 _mov(DestHi, T); 5008 } 5009 5010 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { 5011 Variable *Dest = Instr->getDest(); 5012 Type DestTy = (Dest != nullptr) ? Dest->getType() : IceType_void; 5013 Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID; 5014 switch (ID) { 5015 case Intrinsics::AtomicFence: 5016 case Intrinsics::AtomicFenceAll: 5017 assert(Dest == nullptr); 5018 _dmb(); 5019 return; 5020 case Intrinsics::AtomicIsLockFree: { 5021 Operand *ByteSize = Instr->getArg(0); 5022 auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize); 5023 if (CI == nullptr) { 5024 // The PNaCl ABI requires the byte size to be a compile-time constant. 5025 Func->setError("AtomicIsLockFree byte size should be compile-time const"); 5026 return; 5027 } 5028 static constexpr int32_t NotLockFree = 0; 5029 static constexpr int32_t LockFree = 1; 5030 int32_t Result = NotLockFree; 5031 switch (CI->getValue()) { 5032 case 1: 5033 case 2: 5034 case 4: 5035 case 8: 5036 Result = LockFree; 5037 break; 5038 } 5039 _mov(Dest, legalizeToReg(Ctx->getConstantInt32(Result))); 5040 return; 5041 } 5042 case Intrinsics::AtomicLoad: { 5043 assert(isScalarIntegerType(DestTy)); 5044 // We require the memory address to be naturally aligned. Given that is the 5045 // case, then normal loads are atomic. 5046 if (!Intrinsics::isMemoryOrderValid( 5047 ID, getConstantMemoryOrder(Instr->getArg(1)))) { 5048 Func->setError("Unexpected memory ordering for AtomicLoad"); 5049 return; 5050 } 5051 Variable *T; 5052 5053 if (DestTy == IceType_i64) { 5054 // ldrex is the only arm instruction that is guaranteed to load a 64-bit 5055 // integer atomically. Everything else works with a regular ldr. 5056 T = makeI64RegPair(); 5057 _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64)); 5058 } else { 5059 T = makeReg(DestTy); 5060 _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy)); 5061 } 5062 _dmb(); 5063 lowerAssign(InstAssign::create(Func, Dest, T)); 5064 // Adding a fake-use T to ensure the atomic load is not removed if Dest is 5065 // unused. 5066 Context.insert<InstFakeUse>(T); 5067 return; 5068 } 5069 case Intrinsics::AtomicStore: { 5070 // We require the memory address to be naturally aligned. Given that is the 5071 // case, then normal loads are atomic. 5072 if (!Intrinsics::isMemoryOrderValid( 5073 ID, getConstantMemoryOrder(Instr->getArg(2)))) { 5074 Func->setError("Unexpected memory ordering for AtomicStore"); 5075 return; 5076 } 5077 5078 auto *Value = Instr->getArg(0); 5079 if (Value->getType() == IceType_i64) { 5080 auto *ValueR = makeI64RegPair(); 5081 Context.insert<InstFakeDef>(ValueR); 5082 lowerAssign(InstAssign::create(Func, ValueR, Value)); 5083 _dmb(); 5084 lowerLoadLinkedStoreExclusive( 5085 IceType_i64, Instr->getArg(1), [this, ValueR](Variable *Tmp) { 5086 // The following fake-use prevents the ldrex instruction from being 5087 // dead code eliminated. 5088 Context.insert<InstFakeUse>(llvm::cast<Variable>(loOperand(Tmp))); 5089 Context.insert<InstFakeUse>(llvm::cast<Variable>(hiOperand(Tmp))); 5090 Context.insert<InstFakeUse>(Tmp); 5091 return ValueR; 5092 }); 5093 Context.insert<InstFakeUse>(ValueR); 5094 _dmb(); 5095 return; 5096 } 5097 5098 auto *ValueR = legalizeToReg(Instr->getArg(0)); 5099 const auto ValueTy = ValueR->getType(); 5100 assert(isScalarIntegerType(ValueTy)); 5101 auto *Addr = legalizeToReg(Instr->getArg(1)); 5102 5103 // non-64-bit stores are atomically as long as the address is aligned. This 5104 // is PNaCl, so addresses are aligned. 5105 _dmb(); 5106 _str(ValueR, formMemoryOperand(Addr, ValueTy)); 5107 _dmb(); 5108 return; 5109 } 5110 case Intrinsics::AtomicCmpxchg: { 5111 // retry: 5112 // ldrex tmp, [addr] 5113 // cmp tmp, expected 5114 // mov expected, tmp 5115 // strexeq success, new, [addr] 5116 // cmpeq success, #0 5117 // bne retry 5118 // mov dest, expected 5119 assert(isScalarIntegerType(DestTy)); 5120 // We require the memory address to be naturally aligned. Given that is the 5121 // case, then normal loads are atomic. 5122 if (!Intrinsics::isMemoryOrderValid( 5123 ID, getConstantMemoryOrder(Instr->getArg(3)), 5124 getConstantMemoryOrder(Instr->getArg(4)))) { 5125 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); 5126 return; 5127 } 5128 5129 if (DestTy == IceType_i64) { 5130 Variable *LoadedValue = nullptr; 5131 5132 auto *New = makeI64RegPair(); 5133 Context.insert<InstFakeDef>(New); 5134 lowerAssign(InstAssign::create(Func, New, Instr->getArg(2))); 5135 5136 auto *Expected = makeI64RegPair(); 5137 Context.insert<InstFakeDef>(Expected); 5138 lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1))); 5139 5140 _dmb(); 5141 lowerLoadLinkedStoreExclusive( 5142 DestTy, Instr->getArg(0), 5143 [this, Expected, New, Instr, DestTy, &LoadedValue](Variable *Tmp) { 5144 auto *ExpectedLoR = llvm::cast<Variable>(loOperand(Expected)); 5145 auto *ExpectedHiR = llvm::cast<Variable>(hiOperand(Expected)); 5146 auto *TmpLoR = llvm::cast<Variable>(loOperand(Tmp)); 5147 auto *TmpHiR = llvm::cast<Variable>(hiOperand(Tmp)); 5148 _cmp(TmpLoR, ExpectedLoR); 5149 _cmp(TmpHiR, ExpectedHiR, CondARM32::EQ); 5150 LoadedValue = Tmp; 5151 return New; 5152 }, 5153 CondARM32::EQ); 5154 _dmb(); 5155 5156 Context.insert<InstFakeUse>(LoadedValue); 5157 lowerAssign(InstAssign::create(Func, Dest, LoadedValue)); 5158 // The fake-use Expected prevents the assignments to Expected (above) 5159 // from being removed if Dest is not used. 5160 Context.insert<InstFakeUse>(Expected); 5161 // New needs to be alive here, or its live range will end in the 5162 // strex instruction. 5163 Context.insert<InstFakeUse>(New); 5164 return; 5165 } 5166 5167 auto *New = legalizeToReg(Instr->getArg(2)); 5168 auto *Expected = legalizeToReg(Instr->getArg(1)); 5169 Variable *LoadedValue = nullptr; 5170 5171 _dmb(); 5172 lowerLoadLinkedStoreExclusive( 5173 DestTy, Instr->getArg(0), 5174 [this, Expected, New, Instr, DestTy, &LoadedValue](Variable *Tmp) { 5175 lowerIcmpCond(InstIcmp::Eq, Tmp, Expected); 5176 LoadedValue = Tmp; 5177 return New; 5178 }, 5179 CondARM32::EQ); 5180 _dmb(); 5181 5182 lowerAssign(InstAssign::create(Func, Dest, LoadedValue)); 5183 Context.insert<InstFakeUse>(Expected); 5184 Context.insert<InstFakeUse>(New); 5185 return; 5186 } 5187 case Intrinsics::AtomicRMW: { 5188 if (!Intrinsics::isMemoryOrderValid( 5189 ID, getConstantMemoryOrder(Instr->getArg(3)))) { 5190 Func->setError("Unexpected memory ordering for AtomicRMW"); 5191 return; 5192 } 5193 lowerAtomicRMW( 5194 Dest, static_cast<uint32_t>( 5195 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()), 5196 Instr->getArg(1), Instr->getArg(2)); 5197 return; 5198 } 5199 case Intrinsics::Bswap: { 5200 Operand *Val = Instr->getArg(0); 5201 Type Ty = Val->getType(); 5202 if (Ty == IceType_i64) { 5203 Val = legalizeUndef(Val); 5204 Variable *Val_Lo = legalizeToReg(loOperand(Val)); 5205 Variable *Val_Hi = legalizeToReg(hiOperand(Val)); 5206 Variable *T_Lo = makeReg(IceType_i32); 5207 Variable *T_Hi = makeReg(IceType_i32); 5208 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 5209 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 5210 _rev(T_Lo, Val_Lo); 5211 _rev(T_Hi, Val_Hi); 5212 _mov(DestLo, T_Hi); 5213 _mov(DestHi, T_Lo); 5214 } else { 5215 assert(Ty == IceType_i32 || Ty == IceType_i16); 5216 Variable *ValR = legalizeToReg(Val); 5217 Variable *T = makeReg(Ty); 5218 _rev(T, ValR); 5219 if (Val->getType() == IceType_i16) { 5220 Operand *_16 = shAmtImm(16); 5221 _lsr(T, T, _16); 5222 } 5223 _mov(Dest, T); 5224 } 5225 return; 5226 } 5227 case Intrinsics::Ctpop: { 5228 llvm::report_fatal_error("Ctpop should have been prelowered."); 5229 } 5230 case Intrinsics::Ctlz: { 5231 // The "is zero undef" parameter is ignored and we always return a 5232 // well-defined value. 5233 Operand *Val = Instr->getArg(0); 5234 Variable *ValLoR; 5235 Variable *ValHiR = nullptr; 5236 if (Val->getType() == IceType_i64) { 5237 Val = legalizeUndef(Val); 5238 ValLoR = legalizeToReg(loOperand(Val)); 5239 ValHiR = legalizeToReg(hiOperand(Val)); 5240 } else { 5241 ValLoR = legalizeToReg(Val); 5242 } 5243 lowerCLZ(Dest, ValLoR, ValHiR); 5244 return; 5245 } 5246 case Intrinsics::Cttz: { 5247 // Essentially like Clz, but reverse the bits first. 5248 Operand *Val = Instr->getArg(0); 5249 Variable *ValLoR; 5250 Variable *ValHiR = nullptr; 5251 if (Val->getType() == IceType_i64) { 5252 Val = legalizeUndef(Val); 5253 ValLoR = legalizeToReg(loOperand(Val)); 5254 ValHiR = legalizeToReg(hiOperand(Val)); 5255 Variable *TLo = makeReg(IceType_i32); 5256 Variable *THi = makeReg(IceType_i32); 5257 _rbit(TLo, ValLoR); 5258 _rbit(THi, ValHiR); 5259 ValLoR = THi; 5260 ValHiR = TLo; 5261 } else { 5262 ValLoR = legalizeToReg(Val); 5263 Variable *T = makeReg(IceType_i32); 5264 _rbit(T, ValLoR); 5265 ValLoR = T; 5266 } 5267 lowerCLZ(Dest, ValLoR, ValHiR); 5268 return; 5269 } 5270 case Intrinsics::Fabs: { 5271 Variable *T = makeReg(DestTy); 5272 _vabs(T, legalizeToReg(Instr->getArg(0))); 5273 _mov(Dest, T); 5274 return; 5275 } 5276 case Intrinsics::Longjmp: { 5277 llvm::report_fatal_error("longjmp should have been prelowered."); 5278 } 5279 case Intrinsics::Memcpy: { 5280 llvm::report_fatal_error("memcpy should have been prelowered."); 5281 } 5282 case Intrinsics::Memmove: { 5283 llvm::report_fatal_error("memmove should have been prelowered."); 5284 } 5285 case Intrinsics::Memset: { 5286 llvm::report_fatal_error("memmove should have been prelowered."); 5287 } 5288 case Intrinsics::NaClReadTP: { 5289 if (SandboxingType != ST_NaCl) { 5290 llvm::report_fatal_error("nacl-read-tp should have been prelowered."); 5291 } 5292 Variable *TP = legalizeToReg(OperandARM32Mem::create( 5293 Func, getPointerType(), getPhysicalRegister(RegARM32::Reg_r9), 5294 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)))); 5295 _mov(Dest, TP); 5296 return; 5297 } 5298 case Intrinsics::Setjmp: { 5299 llvm::report_fatal_error("setjmp should have been prelowered."); 5300 } 5301 case Intrinsics::Sqrt: { 5302 assert(isScalarFloatingType(Dest->getType()) || 5303 getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl); 5304 Variable *Src = legalizeToReg(Instr->getArg(0)); 5305 Variable *T = makeReg(DestTy); 5306 _vsqrt(T, Src); 5307 _mov(Dest, T); 5308 return; 5309 } 5310 case Intrinsics::Stacksave: { 5311 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 5312 _mov(Dest, SP); 5313 return; 5314 } 5315 case Intrinsics::Stackrestore: { 5316 Variable *Val = legalizeToReg(Instr->getArg(0)); 5317 Sandboxer(this).reset_sp(Val); 5318 return; 5319 } 5320 case Intrinsics::Trap: 5321 _trap(); 5322 return; 5323 case Intrinsics::AddSaturateSigned: 5324 case Intrinsics::AddSaturateUnsigned: { 5325 bool Unsigned = (ID == Intrinsics::AddSaturateUnsigned); 5326 Variable *Src0 = legalizeToReg(Instr->getArg(0)); 5327 Variable *Src1 = legalizeToReg(Instr->getArg(1)); 5328 Variable *T = makeReg(DestTy); 5329 _vqadd(T, Src0, Src1, Unsigned); 5330 _mov(Dest, T); 5331 return; 5332 } 5333 case Intrinsics::LoadSubVector: { 5334 assert(llvm::isa<ConstantInteger32>(Instr->getArg(1)) && 5335 "LoadSubVector second argument must be a constant"); 5336 Variable *Dest = Instr->getDest(); 5337 Type Ty = Dest->getType(); 5338 auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(1)); 5339 Operand *Addr = Instr->getArg(0); 5340 OperandARM32Mem *Src = formMemoryOperand(Addr, Ty); 5341 doMockBoundsCheck(Src); 5342 5343 if (Dest->isRematerializable()) { 5344 Context.insert<InstFakeDef>(Dest); 5345 return; 5346 } 5347 5348 auto *T = makeReg(Ty); 5349 switch (SubVectorSize->getValue()) { 5350 case 4: 5351 _vldr1d(T, Src); 5352 break; 5353 case 8: 5354 _vldr1q(T, Src); 5355 break; 5356 default: 5357 Func->setError("Unexpected size for LoadSubVector"); 5358 return; 5359 } 5360 _mov(Dest, T); 5361 return; 5362 } 5363 case Intrinsics::StoreSubVector: { 5364 assert(llvm::isa<ConstantInteger32>(Instr->getArg(2)) && 5365 "StoreSubVector third argument must be a constant"); 5366 auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(2)); 5367 Variable *Value = legalizeToReg(Instr->getArg(0)); 5368 Operand *Addr = Instr->getArg(1); 5369 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); 5370 doMockBoundsCheck(NewAddr); 5371 5372 Value = legalizeToReg(Value); 5373 5374 switch (SubVectorSize->getValue()) { 5375 case 4: 5376 _vstr1d(Value, NewAddr); 5377 break; 5378 case 8: 5379 _vstr1q(Value, NewAddr); 5380 break; 5381 default: 5382 Func->setError("Unexpected size for StoreSubVector"); 5383 return; 5384 } 5385 return; 5386 } 5387 case Intrinsics::MultiplyAddPairs: { 5388 Variable *Src0 = legalizeToReg(Instr->getArg(0)); 5389 Variable *Src1 = legalizeToReg(Instr->getArg(1)); 5390 Variable *T = makeReg(DestTy); 5391 _vmlap(T, Src0, Src1); 5392 _mov(Dest, T); 5393 return; 5394 } 5395 case Intrinsics::MultiplyHighSigned: 5396 case Intrinsics::MultiplyHighUnsigned: { 5397 bool Unsigned = (ID == Intrinsics::MultiplyHighUnsigned); 5398 Variable *Src0 = legalizeToReg(Instr->getArg(0)); 5399 Variable *Src1 = legalizeToReg(Instr->getArg(1)); 5400 Variable *T = makeReg(DestTy); 5401 _vmulh(T, Src0, Src1, Unsigned); 5402 _mov(Dest, T); 5403 return; 5404 } 5405 case Intrinsics::Nearbyint: { 5406 UnimplementedLoweringError(this, Instr); 5407 return; 5408 } 5409 case Intrinsics::Round: { 5410 UnimplementedLoweringError(this, Instr); 5411 return; 5412 } 5413 case Intrinsics::SignMask: { 5414 UnimplementedLoweringError(this, Instr); 5415 return; 5416 } 5417 case Intrinsics::SubtractSaturateSigned: 5418 case Intrinsics::SubtractSaturateUnsigned: { 5419 bool Unsigned = (ID == Intrinsics::SubtractSaturateUnsigned); 5420 Variable *Src0 = legalizeToReg(Instr->getArg(0)); 5421 Variable *Src1 = legalizeToReg(Instr->getArg(1)); 5422 Variable *T = makeReg(DestTy); 5423 _vqsub(T, Src0, Src1, Unsigned); 5424 _mov(Dest, T); 5425 return; 5426 } 5427 case Intrinsics::VectorPackSigned: 5428 case Intrinsics::VectorPackUnsigned: { 5429 bool Unsigned = (ID == Intrinsics::VectorPackUnsigned); 5430 bool Saturating = true; 5431 Variable *Src0 = legalizeToReg(Instr->getArg(0)); 5432 Variable *Src1 = legalizeToReg(Instr->getArg(1)); 5433 Variable *T = makeReg(DestTy); 5434 _vqmovn2(T, Src0, Src1, Unsigned, Saturating); 5435 _mov(Dest, T); 5436 return; 5437 } 5438 default: // UnknownIntrinsic 5439 Func->setError("Unexpected intrinsic"); 5440 return; 5441 } 5442 return; 5443 } 5444 5445 void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) { 5446 Type Ty = Dest->getType(); 5447 assert(Ty == IceType_i32 || Ty == IceType_i64); 5448 Variable *T = makeReg(IceType_i32); 5449 _clz(T, ValLoR); 5450 if (Ty == IceType_i64) { 5451 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 5452 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 5453 Operand *Zero = 5454 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); 5455 Operand *ThirtyTwo = 5456 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); 5457 _cmp(ValHiR, Zero); 5458 Variable *T2 = makeReg(IceType_i32); 5459 _add(T2, T, ThirtyTwo); 5460 _clz(T2, ValHiR, CondARM32::NE); 5461 // T2 is actually a source as well when the predicate is not AL (since it 5462 // may leave T2 alone). We use _set_dest_redefined to prolong the liveness 5463 // of T2 as if it was used as a source. 5464 _set_dest_redefined(); 5465 _mov(DestLo, T2); 5466 Variable *T3 = makeReg(Zero->getType()); 5467 _mov(T3, Zero); 5468 _mov(DestHi, T3); 5469 return; 5470 } 5471 _mov(Dest, T); 5472 return; 5473 } 5474 5475 void TargetARM32::lowerLoad(const InstLoad *Load) { 5476 // A Load instruction can be treated the same as an Assign instruction, after 5477 // the source operand is transformed into an OperandARM32Mem operand. 5478 Type Ty = Load->getDest()->getType(); 5479 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); 5480 Variable *DestLoad = Load->getDest(); 5481 5482 // TODO(jvoung): handled folding opportunities. Sign and zero extension can 5483 // be folded into a load. 5484 auto *Assign = InstAssign::create(Func, DestLoad, Src0); 5485 lowerAssign(Assign); 5486 } 5487 5488 namespace { 5489 void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset, 5490 const Variable *OffsetReg, int16_t OffsetRegShAmt, 5491 const Inst *Reason) { 5492 if (!BuildDefs::dump()) 5493 return; 5494 if (!Func->isVerbose(IceV_AddrOpt)) 5495 return; 5496 OstreamLocker _(Func->getContext()); 5497 Ostream &Str = Func->getContext()->getStrDump(); 5498 Str << "Instruction: "; 5499 Reason->dumpDecorated(Func); 5500 Str << " results in Base="; 5501 if (Base) 5502 Base->dump(Func); 5503 else 5504 Str << "<null>"; 5505 Str << ", OffsetReg="; 5506 if (OffsetReg) 5507 OffsetReg->dump(Func); 5508 else 5509 Str << "<null>"; 5510 Str << ", Shift=" << OffsetRegShAmt << ", Offset=" << Offset << "\n"; 5511 } 5512 5513 bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var, 5514 int32_t *Offset, const Inst **Reason) { 5515 // Var originates from Var=SrcVar ==> set Var:=SrcVar 5516 if (*Var == nullptr) 5517 return false; 5518 const Inst *VarAssign = VMetadata->getSingleDefinition(*Var); 5519 if (!VarAssign) 5520 return false; 5521 assert(!VMetadata->isMultiDef(*Var)); 5522 if (!llvm::isa<InstAssign>(VarAssign)) 5523 return false; 5524 5525 Operand *SrcOp = VarAssign->getSrc(0); 5526 bool Optimized = false; 5527 if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) { 5528 if (!VMetadata->isMultiDef(SrcVar) || 5529 // TODO: ensure SrcVar stays single-BB 5530 false) { 5531 Optimized = true; 5532 *Var = SrcVar; 5533 } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) { 5534 int32_t MoreOffset = Const->getValue(); 5535 int32_t NewOffset = MoreOffset + *Offset; 5536 if (Utils::WouldOverflowAdd(*Offset, MoreOffset)) 5537 return false; 5538 *Var = nullptr; 5539 *Offset += NewOffset; 5540 Optimized = true; 5541 } 5542 } 5543 5544 if (Optimized) { 5545 *Reason = VarAssign; 5546 } 5547 5548 return Optimized; 5549 } 5550 5551 bool isAddOrSub(const Inst *Instr, InstArithmetic::OpKind *Kind) { 5552 if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) { 5553 switch (Arith->getOp()) { 5554 default: 5555 return false; 5556 case InstArithmetic::Add: 5557 case InstArithmetic::Sub: 5558 *Kind = Arith->getOp(); 5559 return true; 5560 } 5561 } 5562 return false; 5563 } 5564 5565 bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata, Variable **Base, 5566 Variable **OffsetReg, int32_t OffsetRegShamt, 5567 const Inst **Reason) { 5568 // OffsetReg==nullptr && Base is Base=Var1+Var2 ==> 5569 // set Base=Var1, OffsetReg=Var2, Shift=0 5570 if (*Base == nullptr) 5571 return false; 5572 if (*OffsetReg != nullptr) 5573 return false; 5574 (void)OffsetRegShamt; 5575 assert(OffsetRegShamt == 0); 5576 const Inst *BaseInst = VMetadata->getSingleDefinition(*Base); 5577 if (BaseInst == nullptr) 5578 return false; 5579 assert(!VMetadata->isMultiDef(*Base)); 5580 if (BaseInst->getSrcSize() < 2) 5581 return false; 5582 auto *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0)); 5583 if (!Var1) 5584 return false; 5585 if (VMetadata->isMultiDef(Var1)) 5586 return false; 5587 auto *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1)); 5588 if (!Var2) 5589 return false; 5590 if (VMetadata->isMultiDef(Var2)) 5591 return false; 5592 InstArithmetic::OpKind _; 5593 if (!isAddOrSub(BaseInst, &_) || 5594 // TODO: ensure Var1 and Var2 stay single-BB 5595 false) 5596 return false; 5597 *Base = Var1; 5598 *OffsetReg = Var2; 5599 // OffsetRegShamt is already 0. 5600 *Reason = BaseInst; 5601 return true; 5602 } 5603 5604 bool matchShiftedOffsetReg(const VariablesMetadata *VMetadata, 5605 Variable **OffsetReg, OperandARM32::ShiftKind *Kind, 5606 int32_t *OffsetRegShamt, const Inst **Reason) { 5607 // OffsetReg is OffsetReg=Var*Const && log2(Const)+Shift<=32 ==> 5608 // OffsetReg=Var, Shift+=log2(Const) 5609 // OffsetReg is OffsetReg=Var<<Const && Const+Shift<=32 ==> 5610 // OffsetReg=Var, Shift+=Const 5611 // OffsetReg is OffsetReg=Var>>Const && Const-Shift>=-32 ==> 5612 // OffsetReg=Var, Shift-=Const 5613 OperandARM32::ShiftKind NewShiftKind = OperandARM32::kNoShift; 5614 if (*OffsetReg == nullptr) 5615 return false; 5616 auto *IndexInst = VMetadata->getSingleDefinition(*OffsetReg); 5617 if (IndexInst == nullptr) 5618 return false; 5619 assert(!VMetadata->isMultiDef(*OffsetReg)); 5620 if (IndexInst->getSrcSize() < 2) 5621 return false; 5622 auto *ArithInst = llvm::dyn_cast<InstArithmetic>(IndexInst); 5623 if (ArithInst == nullptr) 5624 return false; 5625 auto *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0)); 5626 if (Var == nullptr) 5627 return false; 5628 auto *Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1)); 5629 if (Const == nullptr) { 5630 assert(!llvm::isa<ConstantInteger32>(ArithInst->getSrc(0))); 5631 return false; 5632 } 5633 if (VMetadata->isMultiDef(Var) || Const->getType() != IceType_i32) 5634 return false; 5635 5636 uint32_t NewShamt = -1; 5637 switch (ArithInst->getOp()) { 5638 default: 5639 return false; 5640 case InstArithmetic::Shl: { 5641 NewShiftKind = OperandARM32::LSL; 5642 NewShamt = Const->getValue(); 5643 if (NewShamt > 31) 5644 return false; 5645 } break; 5646 case InstArithmetic::Lshr: { 5647 NewShiftKind = OperandARM32::LSR; 5648 NewShamt = Const->getValue(); 5649 if (NewShamt > 31) 5650 return false; 5651 } break; 5652 case InstArithmetic::Ashr: { 5653 NewShiftKind = OperandARM32::ASR; 5654 NewShamt = Const->getValue(); 5655 if (NewShamt > 31) 5656 return false; 5657 } break; 5658 case InstArithmetic::Udiv: 5659 case InstArithmetic::Mul: { 5660 const uint32_t UnsignedConst = Const->getValue(); 5661 NewShamt = llvm::findFirstSet(UnsignedConst); 5662 if (NewShamt != llvm::findLastSet(UnsignedConst)) { 5663 // First bit set is not the same as the last bit set, so Const is not 5664 // a power of 2. 5665 return false; 5666 } 5667 NewShiftKind = ArithInst->getOp() == InstArithmetic::Udiv 5668 ? OperandARM32::LSR 5669 : OperandARM32::LSL; 5670 } break; 5671 } 5672 // Allowed "transitions": 5673 // kNoShift -> * iff NewShamt < 31 5674 // LSL -> LSL iff NewShamt + OffsetRegShamt < 31 5675 // LSR -> LSR iff NewShamt + OffsetRegShamt < 31 5676 // ASR -> ASR iff NewShamt + OffsetRegShamt < 31 5677 if (*Kind != OperandARM32::kNoShift && *Kind != NewShiftKind) { 5678 return false; 5679 } 5680 const int32_t NewOffsetRegShamt = *OffsetRegShamt + NewShamt; 5681 if (NewOffsetRegShamt > 31) 5682 return false; 5683 *OffsetReg = Var; 5684 *OffsetRegShamt = NewOffsetRegShamt; 5685 *Kind = NewShiftKind; 5686 *Reason = IndexInst; 5687 return true; 5688 } 5689 5690 bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base, 5691 int32_t *Offset, const Inst **Reason) { 5692 // Base is Base=Var+Const || Base is Base=Const+Var ==> 5693 // set Base=Var, Offset+=Const 5694 // Base is Base=Var-Const ==> 5695 // set Base=Var, Offset-=Const 5696 if (*Base == nullptr) 5697 return false; 5698 const Inst *BaseInst = VMetadata->getSingleDefinition(*Base); 5699 if (BaseInst == nullptr) { 5700 return false; 5701 } 5702 assert(!VMetadata->isMultiDef(*Base)); 5703 5704 auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst); 5705 if (ArithInst == nullptr) 5706 return false; 5707 InstArithmetic::OpKind Kind; 5708 if (!isAddOrSub(ArithInst, &Kind)) 5709 return false; 5710 bool IsAdd = Kind == InstArithmetic::Add; 5711 Operand *Src0 = ArithInst->getSrc(0); 5712 Operand *Src1 = ArithInst->getSrc(1); 5713 auto *Var0 = llvm::dyn_cast<Variable>(Src0); 5714 auto *Var1 = llvm::dyn_cast<Variable>(Src1); 5715 auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0); 5716 auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1); 5717 Variable *NewBase = nullptr; 5718 int32_t NewOffset = *Offset; 5719 5720 if (Var0 == nullptr && Const0 == nullptr) { 5721 assert(llvm::isa<ConstantRelocatable>(Src0)); 5722 return false; 5723 } 5724 5725 if (Var1 == nullptr && Const1 == nullptr) { 5726 assert(llvm::isa<ConstantRelocatable>(Src1)); 5727 return false; 5728 } 5729 5730 if (Var0 && Var1) 5731 // TODO(jpp): merge base/index splitting into here. 5732 return false; 5733 if (!IsAdd && Var1) 5734 return false; 5735 if (Var0) 5736 NewBase = Var0; 5737 else if (Var1) 5738 NewBase = Var1; 5739 // Compute the updated constant offset. 5740 if (Const0) { 5741 int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue(); 5742 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset)) 5743 return false; 5744 NewOffset += MoreOffset; 5745 } 5746 if (Const1) { 5747 int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue(); 5748 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset)) 5749 return false; 5750 NewOffset += MoreOffset; 5751 } 5752 5753 // Update the computed address parameters once we are sure optimization 5754 // is valid. 5755 *Base = NewBase; 5756 *Offset = NewOffset; 5757 *Reason = BaseInst; 5758 return true; 5759 } 5760 } // end of anonymous namespace 5761 5762 OperandARM32Mem *TargetARM32::formAddressingMode(Type Ty, Cfg *Func, 5763 const Inst *LdSt, 5764 Operand *Base) { 5765 assert(Base != nullptr); 5766 int32_t OffsetImm = 0; 5767 Variable *OffsetReg = nullptr; 5768 int32_t OffsetRegShamt = 0; 5769 OperandARM32::ShiftKind ShiftKind = OperandARM32::kNoShift; 5770 5771 Func->resetCurrentNode(); 5772 if (Func->isVerbose(IceV_AddrOpt)) { 5773 OstreamLocker _(Func->getContext()); 5774 Ostream &Str = Func->getContext()->getStrDump(); 5775 Str << "\nAddress mode formation:\t"; 5776 LdSt->dumpDecorated(Func); 5777 } 5778 5779 if (isVectorType(Ty)) 5780 // vector loads and stores do not allow offsets, and only support the 5781 // "[reg]" addressing mode (the other supported modes are write back.) 5782 return nullptr; 5783 5784 auto *BaseVar = llvm::dyn_cast<Variable>(Base); 5785 if (BaseVar == nullptr) 5786 return nullptr; 5787 5788 (void)MemTraitsSize; 5789 assert(Ty < MemTraitsSize); 5790 auto *TypeTraits = &MemTraits[Ty]; 5791 const bool CanHaveIndex = !NeedSandboxing && TypeTraits->CanHaveIndex; 5792 const bool CanHaveShiftedIndex = 5793 !NeedSandboxing && TypeTraits->CanHaveShiftedIndex; 5794 const bool CanHaveImm = TypeTraits->CanHaveImm; 5795 const int32_t ValidImmMask = TypeTraits->ValidImmMask; 5796 (void)ValidImmMask; 5797 assert(!CanHaveImm || ValidImmMask >= 0); 5798 5799 const VariablesMetadata *VMetadata = Func->getVMetadata(); 5800 const Inst *Reason = nullptr; 5801 5802 do { 5803 if (Reason != nullptr) { 5804 dumpAddressOpt(Func, BaseVar, OffsetImm, OffsetReg, OffsetRegShamt, 5805 Reason); 5806 Reason = nullptr; 5807 } 5808 5809 if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) { 5810 continue; 5811 } 5812 5813 if (CanHaveIndex && 5814 matchAssign(VMetadata, &OffsetReg, &OffsetImm, &Reason)) { 5815 continue; 5816 } 5817 5818 if (CanHaveIndex && matchCombinedBaseIndex(VMetadata, &BaseVar, &OffsetReg, 5819 OffsetRegShamt, &Reason)) { 5820 continue; 5821 } 5822 5823 if (CanHaveShiftedIndex) { 5824 if (matchShiftedOffsetReg(VMetadata, &OffsetReg, &ShiftKind, 5825 &OffsetRegShamt, &Reason)) { 5826 continue; 5827 } 5828 5829 if ((OffsetRegShamt == 0) && 5830 matchShiftedOffsetReg(VMetadata, &BaseVar, &ShiftKind, 5831 &OffsetRegShamt, &Reason)) { 5832 std::swap(BaseVar, OffsetReg); 5833 continue; 5834 } 5835 } 5836 5837 if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) { 5838 continue; 5839 } 5840 } while (Reason); 5841 5842 if (BaseVar == nullptr) { 5843 // [OffsetReg{, LSL Shamt}{, #OffsetImm}] is not legal in ARM, so we have to 5844 // legalize the addressing mode to [BaseReg, OffsetReg{, LSL Shamt}]. 5845 // Instead of a zeroed BaseReg, we initialize it with OffsetImm: 5846 // 5847 // [OffsetReg{, LSL Shamt}{, #OffsetImm}] -> 5848 // mov BaseReg, #OffsetImm 5849 // use of [BaseReg, OffsetReg{, LSL Shamt}] 5850 // 5851 const Type PointerType = getPointerType(); 5852 BaseVar = makeReg(PointerType); 5853 Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm)); 5854 OffsetImm = 0; 5855 } else if (OffsetImm != 0) { 5856 // ARM Ldr/Str instructions have limited range immediates. The formation 5857 // loop above materialized an Immediate carelessly, so we ensure the 5858 // generated offset is sane. 5859 const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm; 5860 const InstArithmetic::OpKind Op = 5861 OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub; 5862 5863 if (!CanHaveImm || !isLegalMemOffset(Ty, OffsetImm) || 5864 OffsetReg != nullptr) { 5865 if (OffsetReg == nullptr) { 5866 // We formed a [Base, #const] addressing mode which is not encodable in 5867 // ARM. There is little point in forming an address mode now if we don't 5868 // have an offset. Effectively, we would end up with something like 5869 // 5870 // [Base, #const] -> add T, Base, #const 5871 // use of [T] 5872 // 5873 // Which is exactly what we already have. So we just bite the bullet 5874 // here and don't form any address mode. 5875 return nullptr; 5876 } 5877 // We formed [Base, Offset {, LSL Amnt}, #const]. Oops. Legalize it to 5878 // 5879 // [Base, Offset, {LSL amount}, #const] -> 5880 // add T, Base, #const 5881 // use of [T, Offset {, LSL amount}] 5882 const Type PointerType = getPointerType(); 5883 Variable *T = makeReg(PointerType); 5884 Context.insert<InstArithmetic>(Op, T, BaseVar, 5885 Ctx->getConstantInt32(PositiveOffset)); 5886 BaseVar = T; 5887 OffsetImm = 0; 5888 } 5889 } 5890 5891 assert(BaseVar != nullptr); 5892 assert(OffsetImm == 0 || OffsetReg == nullptr); 5893 assert(OffsetReg == nullptr || CanHaveIndex); 5894 assert(OffsetImm < 0 ? (ValidImmMask & -OffsetImm) == -OffsetImm 5895 : (ValidImmMask & OffsetImm) == OffsetImm); 5896 5897 if (OffsetReg != nullptr) { 5898 Variable *OffsetR = makeReg(getPointerType()); 5899 Context.insert<InstAssign>(OffsetR, OffsetReg); 5900 return OperandARM32Mem::create(Func, Ty, BaseVar, OffsetR, ShiftKind, 5901 OffsetRegShamt); 5902 } 5903 5904 return OperandARM32Mem::create( 5905 Func, Ty, BaseVar, 5906 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm))); 5907 } 5908 5909 void TargetARM32::doAddressOptLoad() { 5910 Inst *Instr = iteratorToInst(Context.getCur()); 5911 assert(llvm::isa<InstLoad>(Instr)); 5912 Variable *Dest = Instr->getDest(); 5913 Operand *Addr = Instr->getSrc(0); 5914 if (OperandARM32Mem *Mem = 5915 formAddressingMode(Dest->getType(), Func, Instr, Addr)) { 5916 Instr->setDeleted(); 5917 Context.insert<InstLoad>(Dest, Mem); 5918 } 5919 } 5920 5921 void TargetARM32::randomlyInsertNop(float Probability, 5922 RandomNumberGenerator &RNG) { 5923 RandomNumberGeneratorWrapper RNGW(RNG); 5924 if (RNGW.getTrueWithProbability(Probability)) { 5925 _nop(); 5926 } 5927 } 5928 5929 void TargetARM32::lowerPhi(const InstPhi * /*Instr*/) { 5930 Func->setError("Phi found in regular instruction list"); 5931 } 5932 5933 void TargetARM32::lowerRet(const InstRet *Instr) { 5934 Variable *Reg = nullptr; 5935 if (Instr->hasRetValue()) { 5936 Operand *Src0 = Instr->getRetValue(); 5937 Type Ty = Src0->getType(); 5938 if (Ty == IceType_i64) { 5939 Src0 = legalizeUndef(Src0); 5940 Variable *R0 = legalizeToReg(loOperand(Src0), RegARM32::Reg_r0); 5941 Variable *R1 = legalizeToReg(hiOperand(Src0), RegARM32::Reg_r1); 5942 Reg = R0; 5943 Context.insert<InstFakeUse>(R1); 5944 } else if (Ty == IceType_f32) { 5945 Variable *S0 = legalizeToReg(Src0, RegARM32::Reg_s0); 5946 Reg = S0; 5947 } else if (Ty == IceType_f64) { 5948 Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0); 5949 Reg = D0; 5950 } else if (isVectorType(Src0->getType())) { 5951 Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0); 5952 Reg = Q0; 5953 } else { 5954 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex); 5955 Reg = makeReg(Src0F->getType(), RegARM32::Reg_r0); 5956 _mov(Reg, Src0F, CondARM32::AL); 5957 } 5958 } 5959 // Add a ret instruction even if sandboxing is enabled, because addEpilog 5960 // explicitly looks for a ret instruction as a marker for where to insert the 5961 // frame removal instructions. addEpilog is responsible for restoring the 5962 // "lr" register as needed prior to this ret instruction. 5963 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg); 5964 5965 // Add a fake use of sp to make sure sp stays alive for the entire function. 5966 // Otherwise post-call sp adjustments get dead-code eliminated. 5967 // TODO: Are there more places where the fake use should be inserted? E.g. 5968 // "void f(int n){while(1) g(n);}" may not have a ret instruction. 5969 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 5970 Context.insert<InstFakeUse>(SP); 5971 } 5972 5973 void TargetARM32::lowerShuffleVector(const InstShuffleVector *Instr) { 5974 auto *Dest = Instr->getDest(); 5975 const Type DestTy = Dest->getType(); 5976 5977 auto *T = makeReg(DestTy); 5978 auto *Src0 = Instr->getSrc(0); 5979 auto *Src1 = Instr->getSrc(1); 5980 const SizeT NumElements = typeNumElements(DestTy); 5981 const Type ElementType = typeElementType(DestTy); 5982 5983 bool Replicate = true; 5984 for (SizeT I = 1; Replicate && I < Instr->getNumIndexes(); ++I) { 5985 if (Instr->getIndexValue(I) != Instr->getIndexValue(0)) { 5986 Replicate = false; 5987 } 5988 } 5989 5990 if (Replicate) { 5991 Variable *Src0Var = legalizeToReg(Src0); 5992 _vdup(T, Src0Var, Instr->getIndexValue(0)); 5993 _mov(Dest, T); 5994 return; 5995 } 5996 5997 switch (DestTy) { 5998 case IceType_v8i1: 5999 case IceType_v8i16: { 6000 static constexpr SizeT ExpectedNumElements = 8; 6001 assert(ExpectedNumElements == Instr->getNumIndexes()); 6002 (void)ExpectedNumElements; 6003 6004 if (Instr->indexesAre(0, 0, 1, 1, 2, 2, 3, 3)) { 6005 Variable *Src0R = legalizeToReg(Src0); 6006 _vzip(T, Src0R, Src0R); 6007 _mov(Dest, T); 6008 return; 6009 } 6010 6011 if (Instr->indexesAre(0, 8, 1, 9, 2, 10, 3, 11)) { 6012 Variable *Src0R = legalizeToReg(Src0); 6013 Variable *Src1R = legalizeToReg(Src1); 6014 _vzip(T, Src0R, Src1R); 6015 _mov(Dest, T); 6016 return; 6017 } 6018 6019 if (Instr->indexesAre(0, 2, 4, 6, 0, 2, 4, 6)) { 6020 Variable *Src0R = legalizeToReg(Src0); 6021 _vqmovn2(T, Src0R, Src0R, false, false); 6022 _mov(Dest, T); 6023 return; 6024 } 6025 } break; 6026 case IceType_v16i1: 6027 case IceType_v16i8: { 6028 static constexpr SizeT ExpectedNumElements = 16; 6029 assert(ExpectedNumElements == Instr->getNumIndexes()); 6030 (void)ExpectedNumElements; 6031 6032 if (Instr->indexesAre(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7)) { 6033 Variable *Src0R = legalizeToReg(Src0); 6034 _vzip(T, Src0R, Src0R); 6035 _mov(Dest, T); 6036 return; 6037 } 6038 6039 if (Instr->indexesAre(0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 6040 23)) { 6041 Variable *Src0R = legalizeToReg(Src0); 6042 Variable *Src1R = legalizeToReg(Src1); 6043 _vzip(T, Src0R, Src1R); 6044 _mov(Dest, T); 6045 return; 6046 } 6047 } break; 6048 case IceType_v4i1: 6049 case IceType_v4i32: 6050 case IceType_v4f32: { 6051 static constexpr SizeT ExpectedNumElements = 4; 6052 assert(ExpectedNumElements == Instr->getNumIndexes()); 6053 (void)ExpectedNumElements; 6054 6055 if (Instr->indexesAre(0, 0, 1, 1)) { 6056 Variable *Src0R = legalizeToReg(Src0); 6057 _vzip(T, Src0R, Src0R); 6058 _mov(Dest, T); 6059 return; 6060 } 6061 6062 if (Instr->indexesAre(0, 4, 1, 5)) { 6063 Variable *Src0R = legalizeToReg(Src0); 6064 Variable *Src1R = legalizeToReg(Src1); 6065 _vzip(T, Src0R, Src1R); 6066 _mov(Dest, T); 6067 return; 6068 } 6069 6070 if (Instr->indexesAre(0, 1, 4, 5)) { 6071 Variable *Src0R = legalizeToReg(Src0); 6072 Variable *Src1R = legalizeToReg(Src1); 6073 _vmovlh(T, Src0R, Src1R); 6074 _mov(Dest, T); 6075 return; 6076 } 6077 6078 if (Instr->indexesAre(2, 3, 2, 3)) { 6079 Variable *Src0R = legalizeToReg(Src0); 6080 _vmovhl(T, Src0R, Src0R); 6081 _mov(Dest, T); 6082 return; 6083 } 6084 6085 if (Instr->indexesAre(2, 3, 6, 7)) { 6086 Variable *Src0R = legalizeToReg(Src0); 6087 Variable *Src1R = legalizeToReg(Src1); 6088 _vmovhl(T, Src1R, Src0R); 6089 _mov(Dest, T); 6090 return; 6091 } 6092 } break; 6093 default: 6094 break; 6095 // TODO(jpp): figure out how to properly lower this without scalarization. 6096 } 6097 6098 // Unoptimized shuffle. Perform a series of inserts and extracts. 6099 Context.insert<InstFakeDef>(T); 6100 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) { 6101 auto *Index = Instr->getIndex(I); 6102 const SizeT Elem = Index->getValue(); 6103 auto *ExtElmt = makeReg(ElementType); 6104 if (Elem < NumElements) { 6105 lowerExtractElement( 6106 InstExtractElement::create(Func, ExtElmt, Src0, Index)); 6107 } else { 6108 lowerExtractElement(InstExtractElement::create( 6109 Func, ExtElmt, Src1, 6110 Ctx->getConstantInt32(Index->getValue() - NumElements))); 6111 } 6112 auto *NewT = makeReg(DestTy); 6113 lowerInsertElement(InstInsertElement::create(Func, NewT, T, ExtElmt, 6114 Ctx->getConstantInt32(I))); 6115 T = NewT; 6116 } 6117 _mov(Dest, T); 6118 } 6119 6120 void TargetARM32::lowerSelect(const InstSelect *Instr) { 6121 Variable *Dest = Instr->getDest(); 6122 Type DestTy = Dest->getType(); 6123 Operand *SrcT = Instr->getTrueOperand(); 6124 Operand *SrcF = Instr->getFalseOperand(); 6125 Operand *Condition = Instr->getCondition(); 6126 6127 if (!isVectorType(DestTy)) { 6128 lowerInt1ForSelect(Dest, Condition, legalizeUndef(SrcT), 6129 legalizeUndef(SrcF)); 6130 return; 6131 } 6132 6133 Type TType = DestTy; 6134 switch (DestTy) { 6135 default: 6136 llvm::report_fatal_error("Unexpected type for vector select."); 6137 case IceType_v4i1: 6138 TType = IceType_v4i32; 6139 break; 6140 case IceType_v8i1: 6141 TType = IceType_v8i16; 6142 break; 6143 case IceType_v16i1: 6144 TType = IceType_v16i8; 6145 break; 6146 case IceType_v4f32: 6147 TType = IceType_v4i32; 6148 break; 6149 case IceType_v4i32: 6150 case IceType_v8i16: 6151 case IceType_v16i8: 6152 break; 6153 } 6154 auto *T = makeReg(TType); 6155 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition)); 6156 auto *SrcTR = legalizeToReg(SrcT); 6157 auto *SrcFR = legalizeToReg(SrcF); 6158 _vbsl(T, SrcTR, SrcFR)->setDestRedefined(); 6159 _mov(Dest, T); 6160 } 6161 6162 void TargetARM32::lowerStore(const InstStore *Instr) { 6163 Operand *Value = Instr->getData(); 6164 Operand *Addr = Instr->getAddr(); 6165 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); 6166 Type Ty = NewAddr->getType(); 6167 6168 if (Ty == IceType_i64) { 6169 Value = legalizeUndef(Value); 6170 Variable *ValueHi = legalizeToReg(hiOperand(Value)); 6171 Variable *ValueLo = legalizeToReg(loOperand(Value)); 6172 _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr))); 6173 _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr))); 6174 } else { 6175 Variable *ValueR = legalizeToReg(Value); 6176 _str(ValueR, NewAddr); 6177 } 6178 } 6179 6180 void TargetARM32::doAddressOptStore() { 6181 Inst *Instr = iteratorToInst(Context.getCur()); 6182 assert(llvm::isa<InstStore>(Instr)); 6183 Operand *Src = Instr->getSrc(0); 6184 Operand *Addr = Instr->getSrc(1); 6185 if (OperandARM32Mem *Mem = 6186 formAddressingMode(Src->getType(), Func, Instr, Addr)) { 6187 Instr->setDeleted(); 6188 Context.insert<InstStore>(Src, Mem); 6189 } 6190 } 6191 6192 void TargetARM32::lowerSwitch(const InstSwitch *Instr) { 6193 // This implements the most naive possible lowering. 6194 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default 6195 Operand *Src0 = Instr->getComparison(); 6196 SizeT NumCases = Instr->getNumCases(); 6197 if (Src0->getType() == IceType_i64) { 6198 Src0 = legalizeUndef(Src0); 6199 Variable *Src0Lo = legalizeToReg(loOperand(Src0)); 6200 Variable *Src0Hi = legalizeToReg(hiOperand(Src0)); 6201 for (SizeT I = 0; I < NumCases; ++I) { 6202 Operand *ValueLo = Ctx->getConstantInt32(Instr->getValue(I)); 6203 Operand *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32); 6204 ValueLo = legalize(ValueLo, Legal_Reg | Legal_Flex); 6205 ValueHi = legalize(ValueHi, Legal_Reg | Legal_Flex); 6206 _cmp(Src0Lo, ValueLo); 6207 _cmp(Src0Hi, ValueHi, CondARM32::EQ); 6208 _br(Instr->getLabel(I), CondARM32::EQ); 6209 } 6210 _br(Instr->getLabelDefault()); 6211 return; 6212 } 6213 6214 Variable *Src0Var = legalizeToReg(Src0); 6215 // If Src0 is not an i32, we left shift it -- see the icmp lowering for the 6216 // reason. 6217 assert(Src0Var->mustHaveReg()); 6218 const size_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); 6219 assert(ShiftAmt < 32); 6220 if (ShiftAmt > 0) { 6221 Operand *ShAmtImm = shAmtImm(ShiftAmt); 6222 Variable *T = makeReg(IceType_i32); 6223 _lsl(T, Src0Var, ShAmtImm); 6224 Src0Var = T; 6225 } 6226 6227 for (SizeT I = 0; I < NumCases; ++I) { 6228 Operand *Value = Ctx->getConstantInt32(Instr->getValue(I) << ShiftAmt); 6229 Value = legalize(Value, Legal_Reg | Legal_Flex); 6230 _cmp(Src0Var, Value); 6231 _br(Instr->getLabel(I), CondARM32::EQ); 6232 } 6233 _br(Instr->getLabelDefault()); 6234 } 6235 6236 void TargetARM32::lowerBreakpoint(const InstBreakpoint *Instr) { 6237 UnimplementedLoweringError(this, Instr); 6238 } 6239 6240 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Instr*/) { 6241 _trap(); 6242 } 6243 6244 namespace { 6245 // Returns whether Opnd needs the GOT address. Currently, ConstantRelocatables, 6246 // and fp constants will need access to the GOT address. 6247 bool operandNeedsGot(const Operand *Opnd) { 6248 if (llvm::isa<ConstantRelocatable>(Opnd)) { 6249 return true; 6250 } 6251 6252 if (llvm::isa<ConstantFloat>(Opnd)) { 6253 uint32_t _; 6254 return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_); 6255 } 6256 6257 const auto *F64 = llvm::dyn_cast<ConstantDouble>(Opnd); 6258 if (F64 != nullptr) { 6259 uint32_t _; 6260 return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_) && 6261 !isFloatingPointZero(F64); 6262 } 6263 6264 return false; 6265 } 6266 6267 // Returns whether Phi needs the GOT address (which it does if any of its 6268 // operands needs the GOT address.) 6269 bool phiNeedsGot(const InstPhi *Phi) { 6270 if (Phi->isDeleted()) { 6271 return false; 6272 } 6273 6274 for (SizeT I = 0; I < Phi->getSrcSize(); ++I) { 6275 if (operandNeedsGot(Phi->getSrc(I))) { 6276 return true; 6277 } 6278 } 6279 6280 return false; 6281 } 6282 6283 // Returns whether **any** phi in Node needs the GOT address. 6284 bool anyPhiInNodeNeedsGot(CfgNode *Node) { 6285 for (auto &Inst : Node->getPhis()) { 6286 if (phiNeedsGot(llvm::cast<InstPhi>(&Inst))) { 6287 return true; 6288 } 6289 } 6290 return false; 6291 } 6292 6293 } // end of anonymous namespace 6294 6295 void TargetARM32::prelowerPhis() { 6296 CfgNode *Node = Context.getNode(); 6297 6298 if (SandboxingType == ST_Nonsfi) { 6299 assert(GotPtr != nullptr); 6300 if (anyPhiInNodeNeedsGot(Node)) { 6301 // If any phi instruction needs the GOT address, we place a 6302 // fake-use GotPtr 6303 // in Node to prevent the GotPtr's initialization from being dead code 6304 // eliminated. 6305 Node->getInsts().push_front(InstFakeUse::create(Func, GotPtr)); 6306 } 6307 } 6308 6309 PhiLowering::prelowerPhis32Bit(this, Node, Func); 6310 } 6311 6312 Variable *TargetARM32::makeVectorOfZeros(Type Ty, RegNumT RegNum) { 6313 Variable *Reg = makeReg(Ty, RegNum); 6314 Context.insert<InstFakeDef>(Reg); 6315 assert(isVectorType(Ty)); 6316 _veor(Reg, Reg, Reg); 6317 return Reg; 6318 } 6319 6320 // Helper for legalize() to emit the right code to lower an operand to a 6321 // register of the appropriate type. 6322 Variable *TargetARM32::copyToReg(Operand *Src, RegNumT RegNum) { 6323 Type Ty = Src->getType(); 6324 Variable *Reg = makeReg(Ty, RegNum); 6325 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Src)) { 6326 _ldr(Reg, Mem); 6327 } else { 6328 _mov(Reg, Src); 6329 } 6330 return Reg; 6331 } 6332 6333 // TODO(jpp): remove unneeded else clauses in legalize. 6334 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, 6335 RegNumT RegNum) { 6336 Type Ty = From->getType(); 6337 // Assert that a physical register is allowed. To date, all calls to 6338 // legalize() allow a physical register. Legal_Flex converts registers to the 6339 // right type OperandARM32FlexReg as needed. 6340 assert(Allowed & Legal_Reg); 6341 6342 // Copied ipsis literis from TargetX86Base<Machine>. 6343 if (RegNum.hasNoValue()) { 6344 if (Variable *Subst = getContext().availabilityGet(From)) { 6345 // At this point we know there is a potential substitution available. 6346 if (!Subst->isRematerializable() && Subst->mustHaveReg() && 6347 !Subst->hasReg()) { 6348 // At this point we know the substitution will have a register. 6349 if (From->getType() == Subst->getType()) { 6350 // At this point we know the substitution's register is compatible. 6351 return Subst; 6352 } 6353 } 6354 } 6355 } 6356 6357 // Go through the various types of operands: OperandARM32Mem, 6358 // OperandARM32Flex, Constant, and Variable. Given the above assertion, if 6359 // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we 6360 // can always copy to a register. 6361 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(From)) { 6362 // Before doing anything with a Mem operand, we need to ensure that the 6363 // Base and Index components are in physical registers. 6364 Variable *Base = Mem->getBase(); 6365 Variable *Index = Mem->getIndex(); 6366 ConstantInteger32 *Offset = Mem->getOffset(); 6367 assert(Index == nullptr || Offset == nullptr); 6368 Variable *RegBase = nullptr; 6369 Variable *RegIndex = nullptr; 6370 assert(Base); 6371 RegBase = llvm::cast<Variable>( 6372 legalize(Base, Legal_Reg | Legal_Rematerializable)); 6373 assert(Ty < MemTraitsSize); 6374 if (Index) { 6375 assert(Offset == nullptr); 6376 assert(MemTraits[Ty].CanHaveIndex); 6377 RegIndex = legalizeToReg(Index); 6378 } 6379 if (Offset && Offset->getValue() != 0) { 6380 assert(Index == nullptr); 6381 static constexpr bool ZeroExt = false; 6382 assert(MemTraits[Ty].CanHaveImm); 6383 if (!OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) { 6384 llvm::report_fatal_error("Invalid memory offset."); 6385 } 6386 } 6387 6388 // Create a new operand if there was a change. 6389 if (Base != RegBase || Index != RegIndex) { 6390 // There is only a reg +/- reg or reg + imm form. 6391 // Figure out which to re-create. 6392 if (RegIndex) { 6393 Mem = OperandARM32Mem::create(Func, Ty, RegBase, RegIndex, 6394 Mem->getShiftOp(), Mem->getShiftAmt(), 6395 Mem->getAddrMode()); 6396 } else { 6397 Mem = OperandARM32Mem::create(Func, Ty, RegBase, Offset, 6398 Mem->getAddrMode()); 6399 } 6400 } 6401 if (Allowed & Legal_Mem) { 6402 From = Mem; 6403 } else { 6404 Variable *Reg = makeReg(Ty, RegNum); 6405 _ldr(Reg, Mem); 6406 From = Reg; 6407 } 6408 return From; 6409 } 6410 6411 if (auto *Flex = llvm::dyn_cast<OperandARM32Flex>(From)) { 6412 if (!(Allowed & Legal_Flex)) { 6413 if (auto *FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) { 6414 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) { 6415 From = FlexReg->getReg(); 6416 // Fall through and let From be checked as a Variable below, where it 6417 // may or may not need a register. 6418 } else { 6419 return copyToReg(Flex, RegNum); 6420 } 6421 } else { 6422 return copyToReg(Flex, RegNum); 6423 } 6424 } else { 6425 return From; 6426 } 6427 } 6428 6429 if (llvm::isa<Constant>(From)) { 6430 if (llvm::isa<ConstantUndef>(From)) { 6431 From = legalizeUndef(From, RegNum); 6432 if (isVectorType(Ty)) 6433 return From; 6434 } 6435 // There should be no constants of vector type (other than undef). 6436 assert(!isVectorType(Ty)); 6437 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) { 6438 uint32_t RotateAmt; 6439 uint32_t Immed_8; 6440 uint32_t Value = static_cast<uint32_t>(C32->getValue()); 6441 if (OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) { 6442 // The immediate can be encoded as a Flex immediate. We may return the 6443 // Flex operand if the caller has Allow'ed it. 6444 auto *OpF = OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); 6445 const bool CanBeFlex = Allowed & Legal_Flex; 6446 if (CanBeFlex) 6447 return OpF; 6448 return copyToReg(OpF, RegNum); 6449 } else if (OperandARM32FlexImm::canHoldImm(~Value, &RotateAmt, 6450 &Immed_8)) { 6451 // Even though the immediate can't be encoded as a Flex operand, its 6452 // inverted bit pattern can, thus we use ARM's mvn to load the 32-bit 6453 // constant with a single instruction. 6454 auto *InvOpF = 6455 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); 6456 Variable *Reg = makeReg(Ty, RegNum); 6457 _mvn(Reg, InvOpF); 6458 return Reg; 6459 } else { 6460 // Do a movw/movt to a register. 6461 Variable *Reg = makeReg(Ty, RegNum); 6462 uint32_t UpperBits = (Value >> 16) & 0xFFFF; 6463 _movw(Reg, 6464 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); 6465 if (UpperBits != 0) { 6466 _movt(Reg, Ctx->getConstantInt32(UpperBits)); 6467 } 6468 return Reg; 6469 } 6470 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { 6471 Variable *Reg = makeReg(Ty, RegNum); 6472 if (SandboxingType != ST_Nonsfi) { 6473 _movw(Reg, C); 6474 _movt(Reg, C); 6475 } else { 6476 auto *GotAddr = legalizeToReg(GotPtr); 6477 GlobalString CGotoffName = createGotoffRelocation(C); 6478 loadNamedConstantRelocatablePIC( 6479 CGotoffName, Reg, [this, Reg](Variable *PC) { 6480 _ldr(Reg, OperandARM32Mem::create(Func, IceType_i32, PC, Reg)); 6481 }); 6482 _add(Reg, GotAddr, Reg); 6483 } 6484 return Reg; 6485 } else { 6486 assert(isScalarFloatingType(Ty)); 6487 uint32_t ModifiedImm; 6488 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) { 6489 Variable *T = makeReg(Ty, RegNum); 6490 _mov(T, 6491 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm)); 6492 return T; 6493 } 6494 6495 if (Ty == IceType_f64 && isFloatingPointZero(From)) { 6496 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32 6497 // because ARM does not have a veor instruction with S registers. 6498 Variable *T = makeReg(IceType_f64, RegNum); 6499 Context.insert<InstFakeDef>(T); 6500 _veor(T, T, T); 6501 return T; 6502 } 6503 6504 // Load floats/doubles from literal pool. 6505 auto *CFrom = llvm::cast<Constant>(From); 6506 assert(CFrom->getShouldBePooled()); 6507 Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName()); 6508 Variable *BaseReg = nullptr; 6509 if (SandboxingType == ST_Nonsfi) { 6510 // vldr does not support the [base, index] addressing mode, so we need 6511 // to legalize Offset to a register. Otherwise, we could simply 6512 // vldr dest, [got, reg(Offset)] 6513 BaseReg = legalizeToReg(Offset); 6514 } else { 6515 BaseReg = makeReg(getPointerType()); 6516 _movw(BaseReg, Offset); 6517 _movt(BaseReg, Offset); 6518 } 6519 From = formMemoryOperand(BaseReg, Ty); 6520 return copyToReg(From, RegNum); 6521 } 6522 } 6523 6524 if (auto *Var = llvm::dyn_cast<Variable>(From)) { 6525 if (Var->isRematerializable()) { 6526 if (Allowed & Legal_Rematerializable) { 6527 return From; 6528 } 6529 6530 Variable *T = makeReg(Var->getType(), RegNum); 6531 _mov(T, Var); 6532 return T; 6533 } 6534 // Check if the variable is guaranteed a physical register. This can happen 6535 // either when the variable is pre-colored or when it is assigned infinite 6536 // weight. 6537 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); 6538 // We need a new physical register for the operand if: 6539 // Mem is not allowed and Var isn't guaranteed a physical 6540 // register, or 6541 // RegNum is required and Var->getRegNum() doesn't match. 6542 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || 6543 (RegNum.hasValue() && (RegNum != Var->getRegNum()))) { 6544 From = copyToReg(From, RegNum); 6545 } 6546 return From; 6547 } 6548 llvm::report_fatal_error("Unhandled operand kind in legalize()"); 6549 6550 return From; 6551 } 6552 6553 /// Provide a trivial wrapper to legalize() for this common usage. 6554 Variable *TargetARM32::legalizeToReg(Operand *From, RegNumT RegNum) { 6555 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); 6556 } 6557 6558 /// Legalize undef values to concrete values. 6559 Operand *TargetARM32::legalizeUndef(Operand *From, RegNumT RegNum) { 6560 Type Ty = From->getType(); 6561 if (llvm::isa<ConstantUndef>(From)) { 6562 // Lower undefs to zero. Another option is to lower undefs to an 6563 // uninitialized register; however, using an uninitialized register results 6564 // in less predictable code. 6565 // 6566 // If in the future the implementation is changed to lower undef values to 6567 // uninitialized registers, a FakeDef will be needed: 6568 // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to 6569 // ensure that the live range of Reg is not overestimated. If the constant 6570 // being lowered is a 64 bit value, then the result should be split and the 6571 // lo and hi components will need to go in uninitialized registers. 6572 if (isVectorType(Ty)) 6573 return makeVectorOfZeros(Ty, RegNum); 6574 return Ctx->getConstantZero(Ty); 6575 } 6576 return From; 6577 } 6578 6579 OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) { 6580 auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand); 6581 // It may be the case that address mode optimization already creates an 6582 // OperandARM32Mem, so in that case it wouldn't need another level of 6583 // transformation. 6584 if (Mem) { 6585 return llvm::cast<OperandARM32Mem>(legalize(Mem)); 6586 } 6587 // If we didn't do address mode optimization, then we only have a 6588 // base/offset to work with. ARM always requires a base register, so 6589 // just use that to hold the operand. 6590 auto *Base = llvm::cast<Variable>( 6591 legalize(Operand, Legal_Reg | Legal_Rematerializable)); 6592 return OperandARM32Mem::create( 6593 Func, Ty, Base, 6594 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); 6595 } 6596 6597 Variable64On32 *TargetARM32::makeI64RegPair() { 6598 Variable64On32 *Reg = 6599 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); 6600 Reg->setMustHaveReg(); 6601 Reg->initHiLo(Func); 6602 Reg->getLo()->setMustNotHaveReg(); 6603 Reg->getHi()->setMustNotHaveReg(); 6604 return Reg; 6605 } 6606 6607 Variable *TargetARM32::makeReg(Type Type, RegNumT RegNum) { 6608 // There aren't any 64-bit integer registers for ARM32. 6609 assert(Type != IceType_i64); 6610 assert(AllowTemporaryWithNoReg || RegNum.hasValue()); 6611 Variable *Reg = Func->makeVariable(Type); 6612 if (RegNum.hasValue()) 6613 Reg->setRegNum(RegNum); 6614 else 6615 Reg->setMustHaveReg(); 6616 return Reg; 6617 } 6618 6619 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align, 6620 RegNumT TmpRegNum) { 6621 assert(llvm::isPowerOf2_32(Align)); 6622 uint32_t RotateAmt; 6623 uint32_t Immed_8; 6624 Operand *Mask; 6625 // Use AND or BIC to mask off the bits, depending on which immediate fits (if 6626 // it fits at all). Assume Align is usually small, in which case BIC works 6627 // better. Thus, this rounds down to the alignment. 6628 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) { 6629 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex, 6630 TmpRegNum); 6631 _bic(Reg, Reg, Mask); 6632 } else { 6633 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex, 6634 TmpRegNum); 6635 _and(Reg, Reg, Mask); 6636 } 6637 } 6638 6639 void TargetARM32::postLower() { 6640 if (Func->getOptLevel() == Opt_m1) 6641 return; 6642 markRedefinitions(); 6643 Context.availabilityUpdate(); 6644 } 6645 6646 void TargetARM32::makeRandomRegisterPermutation( 6647 llvm::SmallVectorImpl<RegNumT> &Permutation, 6648 const SmallBitVector &ExcludeRegisters, uint64_t Salt) const { 6649 (void)Permutation; 6650 (void)ExcludeRegisters; 6651 (void)Salt; 6652 UnimplementedError(getFlags()); 6653 } 6654 6655 void TargetARM32::emit(const ConstantInteger32 *C) const { 6656 if (!BuildDefs::dump()) 6657 return; 6658 Ostream &Str = Ctx->getStrEmit(); 6659 Str << "#" << C->getValue(); 6660 } 6661 6662 void TargetARM32::emit(const ConstantInteger64 *) const { 6663 llvm::report_fatal_error("Not expecting to emit 64-bit integers"); 6664 } 6665 6666 void TargetARM32::emit(const ConstantFloat *C) const { 6667 (void)C; 6668 UnimplementedError(getFlags()); 6669 } 6670 6671 void TargetARM32::emit(const ConstantDouble *C) const { 6672 (void)C; 6673 UnimplementedError(getFlags()); 6674 } 6675 6676 void TargetARM32::emit(const ConstantUndef *) const { 6677 llvm::report_fatal_error("undef value encountered by emitter."); 6678 } 6679 6680 void TargetARM32::emit(const ConstantRelocatable *C) const { 6681 if (!BuildDefs::dump()) 6682 return; 6683 Ostream &Str = Ctx->getStrEmit(); 6684 Str << "#"; 6685 emitWithoutPrefix(C); 6686 } 6687 6688 void TargetARM32::lowerInt1ForSelect(Variable *Dest, Operand *Boolean, 6689 Operand *TrueValue, Operand *FalseValue) { 6690 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); 6691 6692 assert(Boolean->getType() == IceType_i1); 6693 6694 bool NeedsAnd1 = false; 6695 if (TrueValue->getType() == IceType_i1) { 6696 assert(FalseValue->getType() == IceType_i1); 6697 6698 Variable *TrueValueV = Func->makeVariable(IceType_i1); 6699 SafeBoolChain Src0Safe = lowerInt1(TrueValueV, TrueValue); 6700 TrueValue = TrueValueV; 6701 6702 Variable *FalseValueV = Func->makeVariable(IceType_i1); 6703 SafeBoolChain Src1Safe = lowerInt1(FalseValueV, FalseValue); 6704 FalseValue = FalseValueV; 6705 6706 NeedsAnd1 = Src0Safe == SBC_No || Src1Safe == SBC_No; 6707 } 6708 6709 Variable *DestLo = (Dest->getType() == IceType_i64) 6710 ? llvm::cast<Variable>(loOperand(Dest)) 6711 : Dest; 6712 Variable *DestHi = (Dest->getType() == IceType_i64) 6713 ? llvm::cast<Variable>(hiOperand(Dest)) 6714 : nullptr; 6715 Operand *FalseValueLo = (FalseValue->getType() == IceType_i64) 6716 ? loOperand(FalseValue) 6717 : FalseValue; 6718 Operand *FalseValueHi = 6719 (FalseValue->getType() == IceType_i64) ? hiOperand(FalseValue) : nullptr; 6720 6721 Operand *TrueValueLo = 6722 (TrueValue->getType() == IceType_i64) ? loOperand(TrueValue) : TrueValue; 6723 Operand *TrueValueHi = 6724 (TrueValue->getType() == IceType_i64) ? hiOperand(TrueValue) : nullptr; 6725 6726 Variable *T_Lo = makeReg(DestLo->getType()); 6727 Variable *T_Hi = (DestHi == nullptr) ? nullptr : makeReg(DestHi->getType()); 6728 6729 _mov(T_Lo, legalize(FalseValueLo, Legal_Reg | Legal_Flex)); 6730 if (DestHi) { 6731 _mov(T_Hi, legalize(FalseValueHi, Legal_Reg | Legal_Flex)); 6732 } 6733 6734 CondWhenTrue Cond(CondARM32::kNone); 6735 // FlagsWereSet is used to determine wether Boolean was folded or not. If not, 6736 // add an explicit _tst instruction below. 6737 bool FlagsWereSet = false; 6738 if (const Inst *Producer = Computations.getProducerOf(Boolean)) { 6739 switch (Producer->getKind()) { 6740 default: 6741 llvm::report_fatal_error("Unexpected producer."); 6742 case Inst::Icmp: { 6743 Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer)); 6744 FlagsWereSet = true; 6745 } break; 6746 case Inst::Fcmp: { 6747 Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer)); 6748 FlagsWereSet = true; 6749 } break; 6750 case Inst::Cast: { 6751 const auto *CastProducer = llvm::cast<InstCast>(Producer); 6752 assert(CastProducer->getCastKind() == InstCast::Trunc); 6753 Boolean = CastProducer->getSrc(0); 6754 // No flags were set, so a _tst(Src, 1) will be emitted below. Don't 6755 // bother legalizing Src to a Reg because it will be legalized before 6756 // emitting the tst instruction. 6757 FlagsWereSet = false; 6758 } break; 6759 case Inst::Arithmetic: { 6760 // This is a special case: we eagerly assumed Producer could be folded, 6761 // but in reality, it can't. No reason to panic: we just lower it using 6762 // the regular lowerArithmetic helper. 6763 const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer); 6764 lowerArithmetic(ArithProducer); 6765 Boolean = ArithProducer->getDest(); 6766 // No flags were set, so a _tst(Dest, 1) will be emitted below. Don't 6767 // bother legalizing Dest to a Reg because it will be legalized before 6768 // emitting the tst instruction. 6769 FlagsWereSet = false; 6770 } break; 6771 } 6772 } 6773 6774 if (!FlagsWereSet) { 6775 // No flags have been set, so emit a tst Boolean, 1. 6776 Variable *Src = legalizeToReg(Boolean); 6777 _tst(Src, _1); 6778 Cond = CondWhenTrue(CondARM32::NE); // i.e., CondARM32::NotZero. 6779 } 6780 6781 if (Cond.WhenTrue0 == CondARM32::kNone) { 6782 assert(Cond.WhenTrue1 == CondARM32::kNone); 6783 } else { 6784 _mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex), 6785 Cond.WhenTrue0); 6786 if (DestHi) { 6787 _mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex), 6788 Cond.WhenTrue0); 6789 } 6790 } 6791 6792 if (Cond.WhenTrue1 != CondARM32::kNone) { 6793 _mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex), 6794 Cond.WhenTrue1); 6795 if (DestHi) { 6796 _mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex), 6797 Cond.WhenTrue1); 6798 } 6799 } 6800 6801 if (NeedsAnd1) { 6802 // We lowered something that is unsafe (i.e., can't provably be zero or 6803 // one). Truncate the result. 6804 _and(T_Lo, T_Lo, _1); 6805 } 6806 6807 _mov(DestLo, T_Lo); 6808 if (DestHi) { 6809 _mov(DestHi, T_Hi); 6810 } 6811 } 6812 6813 TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest, 6814 Operand *Boolean) { 6815 assert(Boolean->getType() == IceType_i1); 6816 Variable *T = makeReg(IceType_i1); 6817 Operand *_0 = 6818 legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg | Legal_Flex); 6819 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); 6820 6821 SafeBoolChain Safe = SBC_Yes; 6822 if (const Inst *Producer = Computations.getProducerOf(Boolean)) { 6823 switch (Producer->getKind()) { 6824 default: 6825 llvm::report_fatal_error("Unexpected producer."); 6826 case Inst::Icmp: { 6827 _mov(T, _0); 6828 CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer)); 6829 assert(Cond.WhenTrue0 != CondARM32::AL); 6830 assert(Cond.WhenTrue0 != CondARM32::kNone); 6831 assert(Cond.WhenTrue1 == CondARM32::kNone); 6832 _mov_redefined(T, _1, Cond.WhenTrue0); 6833 } break; 6834 case Inst::Fcmp: { 6835 _mov(T, _0); 6836 Inst *MovZero = Context.getLastInserted(); 6837 CondWhenTrue Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer)); 6838 if (Cond.WhenTrue0 == CondARM32::AL) { 6839 assert(Cond.WhenTrue1 == CondARM32::kNone); 6840 MovZero->setDeleted(); 6841 _mov(T, _1); 6842 } else if (Cond.WhenTrue0 != CondARM32::kNone) { 6843 _mov_redefined(T, _1, Cond.WhenTrue0); 6844 } 6845 if (Cond.WhenTrue1 != CondARM32::kNone) { 6846 assert(Cond.WhenTrue0 != CondARM32::kNone); 6847 assert(Cond.WhenTrue0 != CondARM32::AL); 6848 _mov_redefined(T, _1, Cond.WhenTrue1); 6849 } 6850 } break; 6851 case Inst::Cast: { 6852 const auto *CastProducer = llvm::cast<InstCast>(Producer); 6853 assert(CastProducer->getCastKind() == InstCast::Trunc); 6854 Operand *Src = CastProducer->getSrc(0); 6855 if (Src->getType() == IceType_i64) 6856 Src = loOperand(Src); 6857 _mov(T, legalize(Src, Legal_Reg | Legal_Flex)); 6858 Safe = SBC_No; 6859 } break; 6860 case Inst::Arithmetic: { 6861 const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer); 6862 Safe = lowerInt1Arithmetic(ArithProducer); 6863 _mov(T, ArithProducer->getDest()); 6864 } break; 6865 } 6866 } else { 6867 _mov(T, legalize(Boolean, Legal_Reg | Legal_Flex)); 6868 } 6869 6870 _mov(Dest, T); 6871 return Safe; 6872 } 6873 6874 namespace { 6875 namespace BoolFolding { 6876 bool shouldTrackProducer(const Inst &Instr) { 6877 switch (Instr.getKind()) { 6878 default: 6879 return false; 6880 case Inst::Icmp: 6881 case Inst::Fcmp: 6882 return true; 6883 case Inst::Cast: { 6884 switch (llvm::cast<InstCast>(&Instr)->getCastKind()) { 6885 default: 6886 return false; 6887 case InstCast::Trunc: 6888 return true; 6889 } 6890 } 6891 case Inst::Arithmetic: { 6892 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) { 6893 default: 6894 return false; 6895 case InstArithmetic::And: 6896 case InstArithmetic::Or: 6897 return true; 6898 } 6899 } 6900 } 6901 } 6902 6903 bool isValidConsumer(const Inst &Instr) { 6904 switch (Instr.getKind()) { 6905 default: 6906 return false; 6907 case Inst::Br: 6908 return true; 6909 case Inst::Select: 6910 return !isVectorType(Instr.getDest()->getType()); 6911 case Inst::Cast: { 6912 switch (llvm::cast<InstCast>(&Instr)->getCastKind()) { 6913 default: 6914 return false; 6915 case InstCast::Sext: 6916 return !isVectorType(Instr.getDest()->getType()); 6917 case InstCast::Zext: 6918 return !isVectorType(Instr.getDest()->getType()); 6919 } 6920 } 6921 case Inst::Arithmetic: { 6922 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) { 6923 default: 6924 return false; 6925 case InstArithmetic::And: 6926 return !isVectorType(Instr.getDest()->getType()); 6927 case InstArithmetic::Or: 6928 return !isVectorType(Instr.getDest()->getType()); 6929 } 6930 } 6931 } 6932 } 6933 } // end of namespace BoolFolding 6934 6935 namespace FpFolding { 6936 bool shouldTrackProducer(const Inst &Instr) { 6937 switch (Instr.getKind()) { 6938 default: 6939 return false; 6940 case Inst::Arithmetic: { 6941 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) { 6942 default: 6943 return false; 6944 case InstArithmetic::Fmul: 6945 return true; 6946 } 6947 } 6948 } 6949 } 6950 6951 bool isValidConsumer(const Inst &Instr) { 6952 switch (Instr.getKind()) { 6953 default: 6954 return false; 6955 case Inst::Arithmetic: { 6956 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) { 6957 default: 6958 return false; 6959 case InstArithmetic::Fadd: 6960 case InstArithmetic::Fsub: 6961 return true; 6962 } 6963 } 6964 } 6965 } 6966 } // end of namespace FpFolding 6967 6968 namespace IntFolding { 6969 bool shouldTrackProducer(const Inst &Instr) { 6970 switch (Instr.getKind()) { 6971 default: 6972 return false; 6973 case Inst::Arithmetic: { 6974 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) { 6975 default: 6976 return false; 6977 case InstArithmetic::Mul: 6978 return true; 6979 } 6980 } 6981 } 6982 } 6983 6984 bool isValidConsumer(const Inst &Instr) { 6985 switch (Instr.getKind()) { 6986 default: 6987 return false; 6988 case Inst::Arithmetic: { 6989 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) { 6990 default: 6991 return false; 6992 case InstArithmetic::Add: 6993 case InstArithmetic::Sub: 6994 return true; 6995 } 6996 } 6997 } 6998 } 6999 } // end of namespace FpFolding 7000 } // end of anonymous namespace 7001 7002 void TargetARM32::ComputationTracker::recordProducers(CfgNode *Node) { 7003 for (Inst &Instr : Node->getInsts()) { 7004 // Check whether Instr is a valid producer. 7005 Variable *Dest = Instr.getDest(); 7006 if (!Instr.isDeleted() // only consider non-deleted instructions; and 7007 && Dest // only instructions with an actual dest var; and 7008 && Dest->getType() == IceType_i1 // only bool-type dest vars; and 7009 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr. 7010 KnownComputations.emplace(Dest->getIndex(), 7011 ComputationEntry(&Instr, IceType_i1)); 7012 } 7013 if (!Instr.isDeleted() // only consider non-deleted instructions; and 7014 && Dest // only instructions with an actual dest var; and 7015 && isScalarFloatingType(Dest->getType()) // fp-type only dest vars; and 7016 && FpFolding::shouldTrackProducer(Instr)) { // white-listed instr. 7017 KnownComputations.emplace(Dest->getIndex(), 7018 ComputationEntry(&Instr, Dest->getType())); 7019 } 7020 if (!Instr.isDeleted() // only consider non-deleted instructions; and 7021 && Dest // only instructions with an actual dest var; and 7022 && Dest->getType() == IceType_i32 // i32 only dest vars; and 7023 && IntFolding::shouldTrackProducer(Instr)) { // white-listed instr. 7024 KnownComputations.emplace(Dest->getIndex(), 7025 ComputationEntry(&Instr, IceType_i32)); 7026 } 7027 // Check each src variable against the map. 7028 FOREACH_VAR_IN_INST(Var, Instr) { 7029 SizeT VarNum = Var->getIndex(); 7030 auto ComputationIter = KnownComputations.find(VarNum); 7031 if (ComputationIter == KnownComputations.end()) { 7032 continue; 7033 } 7034 7035 ++ComputationIter->second.NumUses; 7036 switch (ComputationIter->second.ComputationType) { 7037 default: 7038 KnownComputations.erase(VarNum); 7039 continue; 7040 case IceType_i1: 7041 if (!BoolFolding::isValidConsumer(Instr)) { 7042 KnownComputations.erase(VarNum); 7043 continue; 7044 } 7045 break; 7046 case IceType_i32: 7047 if (IndexOfVarInInst(Var) != 1 || !IntFolding::isValidConsumer(Instr)) { 7048 KnownComputations.erase(VarNum); 7049 continue; 7050 } 7051 break; 7052 case IceType_f32: 7053 case IceType_f64: 7054 if (IndexOfVarInInst(Var) != 1 || !FpFolding::isValidConsumer(Instr)) { 7055 KnownComputations.erase(VarNum); 7056 continue; 7057 } 7058 break; 7059 } 7060 7061 if (Instr.isLastUse(Var)) { 7062 ComputationIter->second.IsLiveOut = false; 7063 } 7064 } 7065 } 7066 7067 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end(); 7068 Iter != End;) { 7069 // Disable the folding if its dest may be live beyond this block. 7070 if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) { 7071 Iter = KnownComputations.erase(Iter); 7072 continue; 7073 } 7074 7075 // Mark as "dead" rather than outright deleting. This is so that other 7076 // peephole style optimizations during or before lowering have access to 7077 // this instruction in undeleted form. See for example 7078 // tryOptimizedCmpxchgCmpBr(). 7079 Iter->second.Instr->setDead(); 7080 ++Iter; 7081 } 7082 } 7083 7084 TargetARM32::Sandboxer::Sandboxer(TargetARM32 *Target, 7085 InstBundleLock::Option BundleOption) 7086 : Target(Target), BundleOption(BundleOption) {} 7087 7088 TargetARM32::Sandboxer::~Sandboxer() {} 7089 7090 namespace { 7091 OperandARM32FlexImm *indirectBranchBicMask(Cfg *Func) { 7092 constexpr uint32_t Imm8 = 0xFC; // 0xC000000F 7093 constexpr uint32_t RotateAmt = 2; 7094 return OperandARM32FlexImm::create(Func, IceType_i32, Imm8, RotateAmt); 7095 } 7096 7097 OperandARM32FlexImm *memOpBicMask(Cfg *Func) { 7098 constexpr uint32_t Imm8 = 0x0C; // 0xC0000000 7099 constexpr uint32_t RotateAmt = 2; 7100 return OperandARM32FlexImm::create(Func, IceType_i32, Imm8, RotateAmt); 7101 } 7102 7103 static bool baseNeedsBic(Variable *Base) { 7104 return Base->getRegNum() != RegARM32::Reg_r9 && 7105 Base->getRegNum() != RegARM32::Reg_sp; 7106 } 7107 } // end of anonymous namespace 7108 7109 void TargetARM32::Sandboxer::createAutoBundle() { 7110 Bundler = makeUnique<AutoBundle>(Target, BundleOption); 7111 } 7112 7113 void TargetARM32::Sandboxer::add_sp(Operand *AddAmount) { 7114 Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp); 7115 if (!Target->NeedSandboxing) { 7116 Target->_add(SP, SP, AddAmount); 7117 return; 7118 } 7119 createAutoBundle(); 7120 Target->_add(SP, SP, AddAmount); 7121 Target->_bic(SP, SP, memOpBicMask(Target->Func)); 7122 } 7123 7124 void TargetARM32::Sandboxer::align_sp(size_t Alignment) { 7125 Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp); 7126 if (!Target->NeedSandboxing) { 7127 Target->alignRegisterPow2(SP, Alignment); 7128 return; 7129 } 7130 createAutoBundle(); 7131 Target->alignRegisterPow2(SP, Alignment); 7132 Target->_bic(SP, SP, memOpBicMask(Target->Func)); 7133 } 7134 7135 InstARM32Call *TargetARM32::Sandboxer::bl(Variable *ReturnReg, 7136 Operand *CallTarget) { 7137 if (Target->NeedSandboxing) { 7138 createAutoBundle(); 7139 if (auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget)) { 7140 Target->_bic(CallTargetR, CallTargetR, 7141 indirectBranchBicMask(Target->Func)); 7142 } 7143 } 7144 return Target->Context.insert<InstARM32Call>(ReturnReg, CallTarget); 7145 } 7146 7147 void TargetARM32::Sandboxer::ldr(Variable *Dest, OperandARM32Mem *Mem, 7148 CondARM32::Cond Pred) { 7149 Variable *MemBase = Mem->getBase(); 7150 if (Target->NeedSandboxing && baseNeedsBic(MemBase)) { 7151 createAutoBundle(); 7152 assert(!Mem->isRegReg()); 7153 Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred); 7154 } 7155 Target->_ldr(Dest, Mem, Pred); 7156 } 7157 7158 void TargetARM32::Sandboxer::ldrex(Variable *Dest, OperandARM32Mem *Mem, 7159 CondARM32::Cond Pred) { 7160 Variable *MemBase = Mem->getBase(); 7161 if (Target->NeedSandboxing && baseNeedsBic(MemBase)) { 7162 createAutoBundle(); 7163 assert(!Mem->isRegReg()); 7164 Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred); 7165 } 7166 Target->_ldrex(Dest, Mem, Pred); 7167 } 7168 7169 void TargetARM32::Sandboxer::reset_sp(Variable *Src) { 7170 Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp); 7171 if (!Target->NeedSandboxing) { 7172 Target->_mov_redefined(SP, Src); 7173 return; 7174 } 7175 createAutoBundle(); 7176 Target->_mov_redefined(SP, Src); 7177 Target->_bic(SP, SP, memOpBicMask(Target->Func)); 7178 } 7179 7180 void TargetARM32::Sandboxer::ret(Variable *RetAddr, Variable *RetValue) { 7181 if (Target->NeedSandboxing) { 7182 createAutoBundle(); 7183 Target->_bic(RetAddr, RetAddr, indirectBranchBicMask(Target->Func)); 7184 } 7185 Target->_ret(RetAddr, RetValue); 7186 } 7187 7188 void TargetARM32::Sandboxer::str(Variable *Src, OperandARM32Mem *Mem, 7189 CondARM32::Cond Pred) { 7190 Variable *MemBase = Mem->getBase(); 7191 if (Target->NeedSandboxing && baseNeedsBic(MemBase)) { 7192 createAutoBundle(); 7193 assert(!Mem->isRegReg()); 7194 Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred); 7195 } 7196 Target->_str(Src, Mem, Pred); 7197 } 7198 7199 void TargetARM32::Sandboxer::strex(Variable *Dest, Variable *Src, 7200 OperandARM32Mem *Mem, CondARM32::Cond Pred) { 7201 Variable *MemBase = Mem->getBase(); 7202 if (Target->NeedSandboxing && baseNeedsBic(MemBase)) { 7203 createAutoBundle(); 7204 assert(!Mem->isRegReg()); 7205 Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred); 7206 } 7207 Target->_strex(Dest, Src, Mem, Pred); 7208 } 7209 7210 void TargetARM32::Sandboxer::sub_sp(Operand *SubAmount) { 7211 Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp); 7212 if (!Target->NeedSandboxing) { 7213 Target->_sub(SP, SP, SubAmount); 7214 return; 7215 } 7216 createAutoBundle(); 7217 Target->_sub(SP, SP, SubAmount); 7218 Target->_bic(SP, SP, memOpBicMask(Target->Func)); 7219 } 7220 7221 TargetDataARM32::TargetDataARM32(GlobalContext *Ctx) 7222 : TargetDataLowering(Ctx) {} 7223 7224 void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars, 7225 const std::string &SectionSuffix) { 7226 const bool IsPIC = getFlags().getUseNonsfi(); 7227 switch (getFlags().getOutFileType()) { 7228 case FT_Elf: { 7229 ELFObjectWriter *Writer = Ctx->getObjectWriter(); 7230 Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix, 7231 IsPIC); 7232 } break; 7233 case FT_Asm: 7234 case FT_Iasm: { 7235 OstreamLocker _(Ctx); 7236 for (const VariableDeclaration *Var : Vars) { 7237 if (getFlags().matchTranslateOnly(Var->getName(), 0)) { 7238 emitGlobal(*Var, SectionSuffix); 7239 } 7240 } 7241 } break; 7242 } 7243 } 7244 7245 namespace { 7246 template <typename T> struct ConstantPoolEmitterTraits; 7247 7248 static_assert(sizeof(uint64_t) == 8, 7249 "uint64_t is supposed to be 8 bytes wide."); 7250 7251 // TODO(jpp): implement the following when implementing constant randomization: 7252 // * template <> struct ConstantPoolEmitterTraits<uint8_t> 7253 // * template <> struct ConstantPoolEmitterTraits<uint16_t> 7254 // * template <> struct ConstantPoolEmitterTraits<uint32_t> 7255 template <> struct ConstantPoolEmitterTraits<float> { 7256 using ConstantType = ConstantFloat; 7257 static constexpr Type IceType = IceType_f32; 7258 // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy 7259 // about them being constexpr. 7260 static const char AsmTag[]; 7261 static const char TypeName[]; 7262 static uint64_t bitcastToUint64(float Value) { 7263 static_assert(sizeof(Value) == sizeof(uint32_t), 7264 "Float should be 4 bytes."); 7265 const uint32_t IntValue = Utils::bitCopy<uint32_t>(Value); 7266 return static_cast<uint64_t>(IntValue); 7267 } 7268 }; 7269 const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".long"; 7270 const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32"; 7271 7272 template <> struct ConstantPoolEmitterTraits<double> { 7273 using ConstantType = ConstantDouble; 7274 static constexpr Type IceType = IceType_f64; 7275 static const char AsmTag[]; 7276 static const char TypeName[]; 7277 static uint64_t bitcastToUint64(double Value) { 7278 static_assert(sizeof(double) == sizeof(uint64_t), 7279 "Double should be 8 bytes."); 7280 return Utils::bitCopy<uint64_t>(Value); 7281 } 7282 }; 7283 const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad"; 7284 const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64"; 7285 7286 template <typename T> 7287 void emitConstant( 7288 Ostream &Str, 7289 const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) { 7290 using Traits = ConstantPoolEmitterTraits<T>; 7291 Str << Const->getLabelName(); 7292 Str << ":\n\t" << Traits::AsmTag << "\t0x"; 7293 T Value = Const->getValue(); 7294 Str.write_hex(Traits::bitcastToUint64(Value)); 7295 Str << "\t/* " << Traits::TypeName << " " << Value << " */\n"; 7296 } 7297 7298 template <typename T> void emitConstantPool(GlobalContext *Ctx) { 7299 if (!BuildDefs::dump()) { 7300 return; 7301 } 7302 7303 using Traits = ConstantPoolEmitterTraits<T>; 7304 static constexpr size_t MinimumAlignment = 4; 7305 SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType)); 7306 assert((Align % 4) == 0 && "Constants should be aligned"); 7307 Ostream &Str = Ctx->getStrEmit(); 7308 ConstantList Pool = Ctx->getConstantPool(Traits::IceType); 7309 7310 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align 7311 << "\n" 7312 << "\t.align\t" << Align << "\n"; 7313 7314 if (getFlags().getReorderPooledConstants()) { 7315 // TODO(jpp): add constant pooling. 7316 UnimplementedError(getFlags()); 7317 } 7318 7319 for (Constant *C : Pool) { 7320 if (!C->getShouldBePooled()) { 7321 continue; 7322 } 7323 7324 emitConstant<T>(Str, llvm::dyn_cast<typename Traits::ConstantType>(C)); 7325 } 7326 } 7327 } // end of anonymous namespace 7328 7329 void TargetDataARM32::lowerConstants() { 7330 if (getFlags().getDisableTranslation()) 7331 return; 7332 switch (getFlags().getOutFileType()) { 7333 case FT_Elf: { 7334 ELFObjectWriter *Writer = Ctx->getObjectWriter(); 7335 Writer->writeConstantPool<ConstantFloat>(IceType_f32); 7336 Writer->writeConstantPool<ConstantDouble>(IceType_f64); 7337 } break; 7338 case FT_Asm: 7339 case FT_Iasm: { 7340 OstreamLocker _(Ctx); 7341 emitConstantPool<float>(Ctx); 7342 emitConstantPool<double>(Ctx); 7343 break; 7344 } 7345 } 7346 } 7347 7348 void TargetDataARM32::lowerJumpTables() { 7349 if (getFlags().getDisableTranslation()) 7350 return; 7351 switch (getFlags().getOutFileType()) { 7352 case FT_Elf: 7353 if (!Ctx->getJumpTables().empty()) { 7354 llvm::report_fatal_error("ARM32 does not support jump tables yet."); 7355 } 7356 break; 7357 case FT_Asm: 7358 // Already emitted from Cfg 7359 break; 7360 case FT_Iasm: { 7361 // TODO(kschimpf): Fill this in when we get more information. 7362 break; 7363 } 7364 } 7365 } 7366 7367 TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx) 7368 : TargetHeaderLowering(Ctx), CPUFeatures(getFlags()) {} 7369 7370 void TargetHeaderARM32::lower() { 7371 OstreamLocker _(Ctx); 7372 Ostream &Str = Ctx->getStrEmit(); 7373 Str << ".syntax unified\n"; 7374 // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of 7375 // "Addenda to, and Errata in the ABI for the ARM architecture" 7376 // http://infocenter.arm.com 7377 // /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf 7378 // 7379 // Tag_conformance should be be emitted first in a file-scope sub-subsection 7380 // of the first public subsection of the attributes. 7381 Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n"; 7382 // Chromebooks are at least A15, but do A9 for higher compat. For some 7383 // reason, the LLVM ARM asm parser has the .cpu directive override the mattr 7384 // specified on the commandline. So to test hwdiv, we need to set the .cpu 7385 // directive higher (can't just rely on --mattr=...). 7386 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { 7387 Str << ".cpu cortex-a15\n"; 7388 } else { 7389 Str << ".cpu cortex-a9\n"; 7390 } 7391 Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n" 7392 << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n"; 7393 Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n" 7394 << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n"; 7395 Str << ".fpu neon\n" 7396 << ".eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use: permit directly\n" 7397 << ".eabi_attribute 20, 1 @ Tag_ABI_FP_denormal\n" 7398 << ".eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions\n" 7399 << ".eabi_attribute 23, 3 @ Tag_ABI_FP_number_model: IEEE 754\n" 7400 << ".eabi_attribute 34, 1 @ Tag_CPU_unaligned_access\n" 7401 << ".eabi_attribute 24, 1 @ Tag_ABI_align_needed: 8-byte\n" 7402 << ".eabi_attribute 25, 1 @ Tag_ABI_align_preserved: 8-byte\n" 7403 << ".eabi_attribute 28, 1 @ Tag_ABI_VFP_args\n" 7404 << ".eabi_attribute 36, 1 @ Tag_FP_HP_extension\n" 7405 << ".eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format\n" 7406 << ".eabi_attribute 42, 1 @ Tag_MPextension_use\n" 7407 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; 7408 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { 7409 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; 7410 } 7411 // Technically R9 is used for TLS with Sandboxing, and we reserve it. 7412 // However, for compatibility with current NaCl LLVM, don't claim that. 7413 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 7414 } 7415 7416 SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM]; 7417 SmallBitVector TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; 7418 SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; 7419 7420 } // end of namespace ARM32 7421 } // end of namespace Ice 7422