1 // 2 // The Subzero Code Generator 3 // 4 // This file is distributed under the University of Illinois Open Source 5 // License. See LICENSE.TXT for details. 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost 11 /// entirely of the lowering sequence for each high-level instruction. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "IceTargetLoweringMIPS32.h" 16 17 #include "IceCfg.h" 18 #include "IceCfgNode.h" 19 #include "IceClFlags.h" 20 #include "IceDefs.h" 21 #include "IceELFObjectWriter.h" 22 #include "IceGlobalInits.h" 23 #include "IceInstMIPS32.h" 24 #include "IceInstVarIter.h" 25 #include "IceLiveness.h" 26 #include "IceOperand.h" 27 #include "IcePhiLoweringImpl.h" 28 #include "IceRegistersMIPS32.h" 29 #include "IceTargetLoweringMIPS32.def" 30 #include "IceUtils.h" 31 #include "llvm/Support/MathExtras.h" 32 33 namespace MIPS32 { 34 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) { 35 return ::Ice::MIPS32::TargetMIPS32::create(Func); 36 } 37 38 std::unique_ptr<::Ice::TargetDataLowering> 39 createTargetDataLowering(::Ice::GlobalContext *Ctx) { 40 return ::Ice::MIPS32::TargetDataMIPS32::create(Ctx); 41 } 42 43 std::unique_ptr<::Ice::TargetHeaderLowering> 44 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) { 45 return ::Ice::MIPS32::TargetHeaderMIPS32::create(Ctx); 46 } 47 48 void staticInit(::Ice::GlobalContext *Ctx) { 49 ::Ice::MIPS32::TargetMIPS32::staticInit(Ctx); 50 } 51 52 bool shouldBePooled(const ::Ice::Constant *C) { 53 return ::Ice::MIPS32::TargetMIPS32::shouldBePooled(C); 54 } 55 56 ::Ice::Type getPointerType() { 57 return ::Ice::MIPS32::TargetMIPS32::getPointerType(); 58 } 59 60 } // end of namespace MIPS32 61 62 namespace Ice { 63 namespace MIPS32 { 64 65 using llvm::isInt; 66 67 namespace { 68 69 // The maximum number of arguments to pass in GPR registers. 70 constexpr uint32_t MIPS32_MAX_GPR_ARG = 4; 71 72 std::array<RegNumT, MIPS32_MAX_GPR_ARG> GPRArgInitializer; 73 std::array<RegNumT, MIPS32_MAX_GPR_ARG / 2> I64ArgInitializer; 74 75 constexpr uint32_t MIPS32_MAX_FP_ARG = 2; 76 77 std::array<RegNumT, MIPS32_MAX_FP_ARG> FP32ArgInitializer; 78 std::array<RegNumT, MIPS32_MAX_FP_ARG> FP64ArgInitializer; 79 80 const char *getRegClassName(RegClass C) { 81 auto ClassNum = static_cast<RegClassMIPS32>(C); 82 assert(ClassNum < RCMIPS32_NUM); 83 switch (ClassNum) { 84 default: 85 assert(C < RC_Target); 86 return regClassString(C); 87 // Add handling of new register classes below. 88 } 89 } 90 91 // Stack alignment 92 constexpr uint32_t MIPS32_STACK_ALIGNMENT_BYTES = 16; 93 94 // Value is in bytes. Return Value adjusted to the next highest multiple of the 95 // stack alignment required for the given type. 96 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) { 97 size_t typeAlignInBytes = typeWidthInBytes(Ty); 98 // Vectors are stored on stack with the same alignment as that of int type 99 if (isVectorType(Ty)) 100 typeAlignInBytes = typeWidthInBytes(IceType_i64); 101 return Utils::applyAlignment(Value, typeAlignInBytes); 102 } 103 104 // Value is in bytes. Return Value adjusted to the next highest multiple of the 105 // stack alignment. 106 uint32_t applyStackAlignment(uint32_t Value) { 107 return Utils::applyAlignment(Value, MIPS32_STACK_ALIGNMENT_BYTES); 108 } 109 110 } // end of anonymous namespace 111 112 TargetMIPS32::TargetMIPS32(Cfg *Func) 113 : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl) {} 114 115 void TargetMIPS32::assignVarStackSlots(VarList &SortedSpilledVariables, 116 size_t SpillAreaPaddingBytes, 117 size_t SpillAreaSizeBytes, 118 size_t GlobalsAndSubsequentPaddingSize) { 119 const VariablesMetadata *VMetadata = Func->getVMetadata(); 120 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes; 121 size_t NextStackOffset = SpillAreaPaddingBytes; 122 CfgVector<size_t> LocalsSize(Func->getNumNodes()); 123 const bool SimpleCoalescing = !callsReturnsTwice(); 124 for (Variable *Var : SortedSpilledVariables) { 125 size_t Increment = typeWidthInBytesOnStack(Var->getType()); 126 if (SimpleCoalescing && VMetadata->isTracked(Var)) { 127 if (VMetadata->isMultiBlock(Var)) { 128 GlobalsSpaceUsed += Increment; 129 NextStackOffset = GlobalsSpaceUsed; 130 } else { 131 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex(); 132 LocalsSize[NodeIndex] += Increment; 133 NextStackOffset = SpillAreaPaddingBytes + 134 GlobalsAndSubsequentPaddingSize + 135 LocalsSize[NodeIndex]; 136 } 137 } else { 138 NextStackOffset += Increment; 139 } 140 Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset); 141 } 142 } 143 144 void TargetMIPS32::staticInit(GlobalContext *Ctx) { 145 (void)Ctx; 146 RegNumT::setLimit(RegMIPS32::Reg_NUM); 147 SmallBitVector IntegerRegisters(RegMIPS32::Reg_NUM); 148 SmallBitVector I64PairRegisters(RegMIPS32::Reg_NUM); 149 SmallBitVector Float32Registers(RegMIPS32::Reg_NUM); 150 SmallBitVector Float64Registers(RegMIPS32::Reg_NUM); 151 SmallBitVector VectorRegisters(RegMIPS32::Reg_NUM); 152 SmallBitVector InvalidRegisters(RegMIPS32::Reg_NUM); 153 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ 154 isI64Pair, isFP32, isFP64, isVec128, alias_init) \ 155 IntegerRegisters[RegMIPS32::val] = isInt; \ 156 I64PairRegisters[RegMIPS32::val] = isI64Pair; \ 157 Float32Registers[RegMIPS32::val] = isFP32; \ 158 Float64Registers[RegMIPS32::val] = isFP64; \ 159 VectorRegisters[RegMIPS32::val] = isVec128; \ 160 RegisterAliases[RegMIPS32::val].resize(RegMIPS32::Reg_NUM); \ 161 for (SizeT RegAlias : alias_init) { \ 162 assert(!RegisterAliases[RegMIPS32::val][RegAlias] && \ 163 "Duplicate alias for " #val); \ 164 RegisterAliases[RegMIPS32::val].set(RegAlias); \ 165 } \ 166 RegisterAliases[RegMIPS32::val].resize(RegMIPS32::Reg_NUM); \ 167 assert(RegisterAliases[RegMIPS32::val][RegMIPS32::val]); 168 REGMIPS32_TABLE; 169 #undef X 170 171 // TODO(mohit.bhakkad): Change these inits once we provide argument related 172 // field in register tables 173 for (size_t i = 0; i < MIPS32_MAX_GPR_ARG; i++) 174 GPRArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0 + i); 175 176 for (size_t i = 0; i < MIPS32_MAX_GPR_ARG / 2; i++) 177 I64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0A1 + i); 178 179 for (size_t i = 0; i < MIPS32_MAX_FP_ARG; i++) { 180 FP32ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12 + i * 2); 181 FP64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12F13 + i); 182 } 183 184 TypeToRegisterSet[IceType_void] = InvalidRegisters; 185 TypeToRegisterSet[IceType_i1] = IntegerRegisters; 186 TypeToRegisterSet[IceType_i8] = IntegerRegisters; 187 TypeToRegisterSet[IceType_i16] = IntegerRegisters; 188 TypeToRegisterSet[IceType_i32] = IntegerRegisters; 189 TypeToRegisterSet[IceType_i64] = IntegerRegisters; 190 TypeToRegisterSet[IceType_f32] = Float32Registers; 191 TypeToRegisterSet[IceType_f64] = Float64Registers; 192 TypeToRegisterSet[IceType_v4i1] = VectorRegisters; 193 TypeToRegisterSet[IceType_v8i1] = VectorRegisters; 194 TypeToRegisterSet[IceType_v16i1] = VectorRegisters; 195 TypeToRegisterSet[IceType_v16i8] = VectorRegisters; 196 TypeToRegisterSet[IceType_v8i16] = VectorRegisters; 197 TypeToRegisterSet[IceType_v4i32] = VectorRegisters; 198 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; 199 200 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i) 201 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i]; 202 203 filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet, 204 llvm::array_lengthof(TypeToRegisterSet), 205 RegMIPS32::getRegName, getRegClassName); 206 } 207 208 void TargetMIPS32::unsetIfNonLeafFunc() { 209 for (CfgNode *Node : Func->getNodes()) { 210 for (Inst &Instr : Node->getInsts()) { 211 if (llvm::isa<InstCall>(&Instr)) { 212 // Unset MaybeLeafFunc if call instruction exists. 213 MaybeLeafFunc = false; 214 return; 215 } 216 } 217 } 218 } 219 220 uint32_t TargetMIPS32::getStackAlignment() const { 221 return MIPS32_STACK_ALIGNMENT_BYTES; 222 } 223 224 uint32_t TargetMIPS32::getCallStackArgumentsSizeBytes(const InstCall *Call) { 225 TargetMIPS32::CallingConv CC; 226 RegNumT DummyReg; 227 size_t OutArgsSizeBytes = 0; 228 Variable *Dest = Call->getDest(); 229 bool PartialOnStack = false; 230 if (Dest != nullptr && isVectorFloatingType(Dest->getType())) { 231 CC.discardReg(RegMIPS32::Reg_A0); 232 // Next vector is partially on stack 233 PartialOnStack = true; 234 } 235 for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) { 236 Operand *Arg = legalizeUndef(Call->getArg(i)); 237 const Type Ty = Arg->getType(); 238 RegNumT RegNum; 239 if (CC.argInReg(Ty, i, &RegNum)) { 240 // If PartialOnStack is true and if this is a vector type then last two 241 // elements are on stack 242 if (PartialOnStack && isVectorType(Ty)) { 243 OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, IceType_i64); 244 OutArgsSizeBytes += typeWidthInBytesOnStack(IceType_i32) * 2; 245 } 246 continue; 247 } 248 OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty); 249 OutArgsSizeBytes += typeWidthInBytesOnStack(Ty); 250 } 251 // Add size of argument save area 252 constexpr int BytesPerStackArg = 4; 253 OutArgsSizeBytes += MIPS32_MAX_GPR_ARG * BytesPerStackArg; 254 return applyStackAlignment(OutArgsSizeBytes); 255 } 256 257 namespace { 258 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { 259 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) 260 return Integer->getValue(); 261 return Intrinsics::MemoryOrderInvalid; 262 } 263 } 264 265 void TargetMIPS32::genTargetHelperCallFor(Inst *Instr) { 266 constexpr bool NoTailCall = false; 267 constexpr bool IsTargetHelperCall = true; 268 Variable *Dest = Instr->getDest(); 269 const Type DestTy = Dest ? Dest->getType() : IceType_void; 270 271 switch (Instr->getKind()) { 272 default: 273 return; 274 case Inst::Select: { 275 if (isVectorType(DestTy)) { 276 Operand *SrcT = llvm::cast<InstSelect>(Instr)->getTrueOperand(); 277 Operand *SrcF = llvm::cast<InstSelect>(Instr)->getFalseOperand(); 278 Operand *Cond = llvm::cast<InstSelect>(Instr)->getCondition(); 279 Variable *T = Func->makeVariable(DestTy); 280 auto *Undef = ConstantUndef::create(Ctx, DestTy); 281 Context.insert<InstAssign>(T, Undef); 282 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T); 283 VarVecOn32->initVecElement(Func); 284 for (SizeT I = 0; I < typeNumElements(DestTy); ++I) { 285 auto *Index = Ctx->getConstantInt32(I); 286 auto *OpC = Func->makeVariable(typeElementType(Cond->getType())); 287 Context.insert<InstExtractElement>(OpC, Cond, Index); 288 auto *OpT = Func->makeVariable(typeElementType(DestTy)); 289 Context.insert<InstExtractElement>(OpT, SrcT, Index); 290 auto *OpF = Func->makeVariable(typeElementType(DestTy)); 291 Context.insert<InstExtractElement>(OpF, SrcF, Index); 292 auto *Dst = Func->makeVariable(typeElementType(DestTy)); 293 Variable *DestT = Func->makeVariable(DestTy); 294 Context.insert<InstSelect>(Dst, OpC, OpT, OpF); 295 Context.insert<InstInsertElement>(DestT, T, Dst, Index); 296 T = DestT; 297 } 298 Context.insert<InstAssign>(Dest, T); 299 Instr->setDeleted(); 300 } 301 return; 302 } 303 case Inst::Fcmp: { 304 if (isVectorType(DestTy)) { 305 InstFcmp::FCond Cond = llvm::cast<InstFcmp>(Instr)->getCondition(); 306 Operand *Src0 = Instr->getSrc(0); 307 Operand *Src1 = Instr->getSrc(1); 308 Variable *T = Func->makeVariable(IceType_v4f32); 309 auto *Undef = ConstantUndef::create(Ctx, IceType_v4f32); 310 Context.insert<InstAssign>(T, Undef); 311 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T); 312 VarVecOn32->initVecElement(Func); 313 for (SizeT I = 0; I < typeNumElements(IceType_v4f32); ++I) { 314 auto *Index = Ctx->getConstantInt32(I); 315 auto *Op0 = Func->makeVariable(IceType_f32); 316 Context.insert<InstExtractElement>(Op0, Src0, Index); 317 auto *Op1 = Func->makeVariable(IceType_f32); 318 Context.insert<InstExtractElement>(Op1, Src1, Index); 319 auto *Dst = Func->makeVariable(IceType_f32); 320 Variable *DestT = Func->makeVariable(IceType_v4f32); 321 Context.insert<InstFcmp>(Cond, Dst, Op0, Op1); 322 Context.insert<InstInsertElement>(DestT, T, Dst, Index); 323 T = DestT; 324 } 325 Context.insert<InstAssign>(Dest, T); 326 Instr->setDeleted(); 327 } 328 return; 329 } 330 case Inst::Icmp: { 331 if (isVectorType(DestTy)) { 332 InstIcmp::ICond Cond = llvm::cast<InstIcmp>(Instr)->getCondition(); 333 Operand *Src0 = Instr->getSrc(0); 334 Operand *Src1 = Instr->getSrc(1); 335 const Type SrcType = Src0->getType(); 336 Variable *T = Func->makeVariable(DestTy); 337 auto *Undef = ConstantUndef::create(Ctx, DestTy); 338 Context.insert<InstAssign>(T, Undef); 339 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T); 340 VarVecOn32->initVecElement(Func); 341 for (SizeT I = 0; I < typeNumElements(SrcType); ++I) { 342 auto *Index = Ctx->getConstantInt32(I); 343 auto *Op0 = Func->makeVariable(typeElementType(SrcType)); 344 Context.insert<InstExtractElement>(Op0, Src0, Index); 345 auto *Op1 = Func->makeVariable(typeElementType(SrcType)); 346 Context.insert<InstExtractElement>(Op1, Src1, Index); 347 auto *Dst = Func->makeVariable(typeElementType(DestTy)); 348 Variable *DestT = Func->makeVariable(DestTy); 349 Context.insert<InstIcmp>(Cond, Dst, Op0, Op1); 350 Context.insert<InstInsertElement>(DestT, T, Dst, Index); 351 T = DestT; 352 } 353 Context.insert<InstAssign>(Dest, T); 354 Instr->setDeleted(); 355 } 356 return; 357 } 358 case Inst::Arithmetic: { 359 const InstArithmetic::OpKind Op = 360 llvm::cast<InstArithmetic>(Instr)->getOp(); 361 if (isVectorType(DestTy)) { 362 scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1)); 363 Instr->setDeleted(); 364 return; 365 } 366 switch (DestTy) { 367 default: 368 return; 369 case IceType_i64: { 370 RuntimeHelper HelperID = RuntimeHelper::H_Num; 371 switch (Op) { 372 default: 373 return; 374 case InstArithmetic::Udiv: 375 HelperID = RuntimeHelper::H_udiv_i64; 376 break; 377 case InstArithmetic::Sdiv: 378 HelperID = RuntimeHelper::H_sdiv_i64; 379 break; 380 case InstArithmetic::Urem: 381 HelperID = RuntimeHelper::H_urem_i64; 382 break; 383 case InstArithmetic::Srem: 384 HelperID = RuntimeHelper::H_srem_i64; 385 break; 386 } 387 388 if (HelperID == RuntimeHelper::H_Num) { 389 return; 390 } 391 392 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID); 393 constexpr SizeT MaxArgs = 2; 394 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, 395 NoTailCall, IsTargetHelperCall); 396 Call->addArg(Instr->getSrc(0)); 397 Call->addArg(Instr->getSrc(1)); 398 Instr->setDeleted(); 399 return; 400 } 401 case IceType_f32: 402 case IceType_f64: { 403 if (Op != InstArithmetic::Frem) { 404 return; 405 } 406 constexpr SizeT MaxArgs = 2; 407 Operand *TargetHelper = Ctx->getRuntimeHelperFunc( 408 DestTy == IceType_f32 ? RuntimeHelper::H_frem_f32 409 : RuntimeHelper::H_frem_f64); 410 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, 411 NoTailCall, IsTargetHelperCall); 412 Call->addArg(Instr->getSrc(0)); 413 Call->addArg(Instr->getSrc(1)); 414 Instr->setDeleted(); 415 return; 416 } 417 } 418 llvm::report_fatal_error("Control flow should never have reached here."); 419 } 420 case Inst::Cast: { 421 Operand *Src0 = Instr->getSrc(0); 422 const Type SrcTy = Src0->getType(); 423 auto *CastInstr = llvm::cast<InstCast>(Instr); 424 const InstCast::OpKind CastKind = CastInstr->getCastKind(); 425 426 if (isVectorType(DestTy)) { 427 Variable *T = Func->makeVariable(DestTy); 428 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T); 429 VarVecOn32->initVecElement(Func); 430 auto *Undef = ConstantUndef::create(Ctx, DestTy); 431 Context.insert<InstAssign>(T, Undef); 432 for (SizeT I = 0; I < typeNumElements(DestTy); ++I) { 433 auto *Index = Ctx->getConstantInt32(I); 434 auto *Op = Func->makeVariable(typeElementType(SrcTy)); 435 Context.insert<InstExtractElement>(Op, Src0, Index); 436 auto *Dst = Func->makeVariable(typeElementType(DestTy)); 437 Variable *DestT = Func->makeVariable(DestTy); 438 Context.insert<InstCast>(CastKind, Dst, Op); 439 Context.insert<InstInsertElement>(DestT, T, Dst, Index); 440 T = DestT; 441 } 442 Context.insert<InstAssign>(Dest, T); 443 Instr->setDeleted(); 444 return; 445 } 446 447 switch (CastKind) { 448 default: 449 return; 450 case InstCast::Fptosi: 451 case InstCast::Fptoui: { 452 if ((DestTy != IceType_i32) && (DestTy != IceType_i64)) { 453 return; 454 } 455 const bool DestIs32 = DestTy == IceType_i32; 456 const bool DestIsSigned = CastKind == InstCast::Fptosi; 457 const bool Src0IsF32 = isFloat32Asserting32Or64(SrcTy); 458 RuntimeHelper RTHFunc = RuntimeHelper::H_Num; 459 if (DestIsSigned) { 460 if (DestIs32) { 461 return; 462 } 463 RTHFunc = Src0IsF32 ? RuntimeHelper::H_fptosi_f32_i64 464 : RuntimeHelper::H_fptosi_f64_i64; 465 } else { 466 RTHFunc = Src0IsF32 ? (DestIs32 ? RuntimeHelper::H_fptoui_f32_i32 467 : RuntimeHelper::H_fptoui_f32_i64) 468 : (DestIs32 ? RuntimeHelper::H_fptoui_f64_i32 469 : RuntimeHelper::H_fptoui_f64_i64); 470 } 471 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(RTHFunc); 472 static constexpr SizeT MaxArgs = 1; 473 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, 474 NoTailCall, IsTargetHelperCall); 475 Call->addArg(Src0); 476 Instr->setDeleted(); 477 return; 478 } 479 case InstCast::Sitofp: 480 case InstCast::Uitofp: { 481 if ((SrcTy != IceType_i32) && (SrcTy != IceType_i64)) { 482 return; 483 } 484 const bool SourceIs32 = SrcTy == IceType_i32; 485 const bool SourceIsSigned = CastKind == InstCast::Sitofp; 486 const bool DestIsF32 = isFloat32Asserting32Or64(DestTy); 487 RuntimeHelper RTHFunc = RuntimeHelper::H_Num; 488 if (SourceIsSigned) { 489 if (SourceIs32) { 490 return; 491 } 492 RTHFunc = DestIsF32 ? RuntimeHelper::H_sitofp_i64_f32 493 : RuntimeHelper::H_sitofp_i64_f64; 494 } else { 495 RTHFunc = DestIsF32 ? (SourceIs32 ? RuntimeHelper::H_uitofp_i32_f32 496 : RuntimeHelper::H_uitofp_i64_f32) 497 : (SourceIs32 ? RuntimeHelper::H_uitofp_i32_f64 498 : RuntimeHelper::H_uitofp_i64_f64); 499 } 500 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(RTHFunc); 501 static constexpr SizeT MaxArgs = 1; 502 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, 503 NoTailCall, IsTargetHelperCall); 504 Call->addArg(Src0); 505 Instr->setDeleted(); 506 return; 507 } 508 case InstCast::Bitcast: { 509 if (DestTy == SrcTy) { 510 return; 511 } 512 Variable *CallDest = Dest; 513 RuntimeHelper HelperID = RuntimeHelper::H_Num; 514 switch (DestTy) { 515 default: 516 return; 517 case IceType_i8: 518 assert(SrcTy == IceType_v8i1); 519 HelperID = RuntimeHelper::H_bitcast_8xi1_i8; 520 CallDest = Func->makeVariable(IceType_i32); 521 break; 522 case IceType_i16: 523 assert(SrcTy == IceType_v16i1); 524 HelperID = RuntimeHelper::H_bitcast_16xi1_i16; 525 CallDest = Func->makeVariable(IceType_i32); 526 break; 527 case IceType_v8i1: { 528 assert(SrcTy == IceType_i8); 529 HelperID = RuntimeHelper::H_bitcast_i8_8xi1; 530 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); 531 // Arguments to functions are required to be at least 32 bits wide. 532 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0); 533 Src0 = Src0AsI32; 534 } break; 535 case IceType_v16i1: { 536 assert(SrcTy == IceType_i16); 537 HelperID = RuntimeHelper::H_bitcast_i16_16xi1; 538 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); 539 // Arguments to functions are required to be at least 32 bits wide. 540 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0); 541 Src0 = Src0AsI32; 542 } break; 543 } 544 constexpr SizeT MaxSrcs = 1; 545 InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs); 546 Call->addArg(Src0); 547 Context.insert(Call); 548 // The PNaCl ABI disallows i8/i16 return types, so truncate the helper 549 // call result to the appropriate type as necessary. 550 if (CallDest->getType() != DestTy) 551 Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest); 552 Instr->setDeleted(); 553 return; 554 } 555 case InstCast::Trunc: { 556 if (DestTy == SrcTy) { 557 return; 558 } 559 if (!isVectorType(SrcTy)) { 560 return; 561 } 562 assert(typeNumElements(DestTy) == typeNumElements(SrcTy)); 563 assert(typeElementType(DestTy) == IceType_i1); 564 assert(isVectorIntegerType(SrcTy)); 565 return; 566 } 567 case InstCast::Sext: 568 case InstCast::Zext: { 569 if (DestTy == SrcTy) { 570 return; 571 } 572 if (!isVectorType(DestTy)) { 573 return; 574 } 575 assert(typeNumElements(DestTy) == typeNumElements(SrcTy)); 576 assert(typeElementType(SrcTy) == IceType_i1); 577 assert(isVectorIntegerType(DestTy)); 578 return; 579 } 580 } 581 llvm::report_fatal_error("Control flow should never have reached here."); 582 } 583 case Inst::IntrinsicCall: { 584 auto *IntrinsicCall = llvm::cast<InstIntrinsicCall>(Instr); 585 Intrinsics::IntrinsicID ID = IntrinsicCall->getIntrinsicInfo().ID; 586 if (isVectorType(DestTy) && ID == Intrinsics::Fabs) { 587 Operand *Src0 = IntrinsicCall->getArg(0); 588 GlobalString FabsFloat = Ctx->getGlobalString("llvm.fabs.f32"); 589 Operand *CallTarget = Ctx->getConstantExternSym(FabsFloat); 590 GlobalString FabsVec = Ctx->getGlobalString("llvm.fabs.v4f32"); 591 bool BadIntrinsic = false; 592 const Intrinsics::FullIntrinsicInfo *FullInfo = 593 Ctx->getIntrinsicsInfo().find(FabsVec, BadIntrinsic); 594 Intrinsics::IntrinsicInfo Info = FullInfo->Info; 595 596 Variable *T = Func->makeVariable(IceType_v4f32); 597 auto *Undef = ConstantUndef::create(Ctx, IceType_v4f32); 598 Context.insert<InstAssign>(T, Undef); 599 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T); 600 VarVecOn32->initVecElement(Func); 601 602 for (SizeT i = 0; i < typeNumElements(IceType_v4f32); ++i) { 603 auto *Index = Ctx->getConstantInt32(i); 604 auto *Op = Func->makeVariable(IceType_f32); 605 Context.insert<InstExtractElement>(Op, Src0, Index); 606 auto *Res = Func->makeVariable(IceType_f32); 607 Variable *DestT = Func->makeVariable(IceType_v4f32); 608 auto *Call = 609 Context.insert<InstIntrinsicCall>(1, Res, CallTarget, Info); 610 Call->addArg(Op); 611 Context.insert<InstInsertElement>(DestT, T, Res, Index); 612 T = DestT; 613 } 614 615 Context.insert<InstAssign>(Dest, T); 616 617 Instr->setDeleted(); 618 return; 619 } 620 switch (ID) { 621 default: 622 return; 623 case Intrinsics::AtomicLoad: { 624 if (DestTy != IceType_i64) 625 return; 626 if (!Intrinsics::isMemoryOrderValid( 627 ID, getConstantMemoryOrder(IntrinsicCall->getArg(1)))) { 628 Func->setError("Unexpected memory ordering for AtomicLoad"); 629 return; 630 } 631 Operand *Addr = IntrinsicCall->getArg(0); 632 Operand *TargetHelper = Ctx->getConstantExternSym( 633 Ctx->getGlobalString("__sync_val_compare_and_swap_8")); 634 static constexpr SizeT MaxArgs = 3; 635 auto *_0 = Ctx->getConstantZero(IceType_i64); 636 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, 637 NoTailCall, IsTargetHelperCall); 638 Call->addArg(Addr); 639 Call->addArg(_0); 640 Call->addArg(_0); 641 Context.insert<InstMIPS32Sync>(); 642 Instr->setDeleted(); 643 return; 644 } 645 case Intrinsics::AtomicStore: { 646 Operand *Val = IntrinsicCall->getArg(0); 647 if (Val->getType() != IceType_i64) 648 return; 649 if (!Intrinsics::isMemoryOrderValid( 650 ID, getConstantMemoryOrder(IntrinsicCall->getArg(2)))) { 651 Func->setError("Unexpected memory ordering for AtomicStore"); 652 return; 653 } 654 Operand *Addr = IntrinsicCall->getArg(1); 655 Variable *NoDest = nullptr; 656 Operand *TargetHelper = Ctx->getConstantExternSym( 657 Ctx->getGlobalString("__sync_lock_test_and_set_8")); 658 Context.insert<InstMIPS32Sync>(); 659 static constexpr SizeT MaxArgs = 2; 660 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, 661 NoTailCall, IsTargetHelperCall); 662 Call->addArg(Addr); 663 Call->addArg(Val); 664 Context.insert<InstMIPS32Sync>(); 665 Instr->setDeleted(); 666 return; 667 } 668 case Intrinsics::AtomicCmpxchg: { 669 if (DestTy != IceType_i64) 670 return; 671 if (!Intrinsics::isMemoryOrderValid( 672 ID, getConstantMemoryOrder(IntrinsicCall->getArg(3)), 673 getConstantMemoryOrder(IntrinsicCall->getArg(4)))) { 674 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); 675 return; 676 } 677 Operand *Addr = IntrinsicCall->getArg(0); 678 Operand *Oldval = IntrinsicCall->getArg(1); 679 Operand *Newval = IntrinsicCall->getArg(2); 680 Operand *TargetHelper = Ctx->getConstantExternSym( 681 Ctx->getGlobalString("__sync_val_compare_and_swap_8")); 682 Context.insert<InstMIPS32Sync>(); 683 static constexpr SizeT MaxArgs = 3; 684 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, 685 NoTailCall, IsTargetHelperCall); 686 Call->addArg(Addr); 687 Call->addArg(Oldval); 688 Call->addArg(Newval); 689 Context.insert<InstMIPS32Sync>(); 690 Instr->setDeleted(); 691 return; 692 } 693 case Intrinsics::AtomicRMW: { 694 if (DestTy != IceType_i64) 695 return; 696 if (!Intrinsics::isMemoryOrderValid( 697 ID, getConstantMemoryOrder(IntrinsicCall->getArg(3)))) { 698 Func->setError("Unexpected memory ordering for AtomicRMW"); 699 return; 700 } 701 auto Operation = static_cast<Intrinsics::AtomicRMWOperation>( 702 llvm::cast<ConstantInteger32>(IntrinsicCall->getArg(0))->getValue()); 703 auto *Addr = IntrinsicCall->getArg(1); 704 auto *Newval = IntrinsicCall->getArg(2); 705 Operand *TargetHelper; 706 switch (Operation) { 707 case Intrinsics::AtomicAdd: 708 TargetHelper = Ctx->getConstantExternSym( 709 Ctx->getGlobalString("__sync_fetch_and_add_8")); 710 break; 711 case Intrinsics::AtomicSub: 712 TargetHelper = Ctx->getConstantExternSym( 713 Ctx->getGlobalString("__sync_fetch_and_sub_8")); 714 break; 715 case Intrinsics::AtomicOr: 716 TargetHelper = Ctx->getConstantExternSym( 717 Ctx->getGlobalString("__sync_fetch_and_or_8")); 718 break; 719 case Intrinsics::AtomicAnd: 720 TargetHelper = Ctx->getConstantExternSym( 721 Ctx->getGlobalString("__sync_fetch_and_and_8")); 722 break; 723 case Intrinsics::AtomicXor: 724 TargetHelper = Ctx->getConstantExternSym( 725 Ctx->getGlobalString("__sync_fetch_and_xor_8")); 726 break; 727 case Intrinsics::AtomicExchange: 728 TargetHelper = Ctx->getConstantExternSym( 729 Ctx->getGlobalString("__sync_lock_test_and_set_8")); 730 break; 731 default: 732 llvm::report_fatal_error("Unknown AtomicRMW operation"); 733 return; 734 } 735 Context.insert<InstMIPS32Sync>(); 736 static constexpr SizeT MaxArgs = 2; 737 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, 738 NoTailCall, IsTargetHelperCall); 739 Call->addArg(Addr); 740 Call->addArg(Newval); 741 Context.insert<InstMIPS32Sync>(); 742 Instr->setDeleted(); 743 return; 744 } 745 case Intrinsics::Ctpop: { 746 Operand *Src0 = IntrinsicCall->getArg(0); 747 Operand *TargetHelper = 748 Ctx->getRuntimeHelperFunc(isInt32Asserting32Or64(Src0->getType()) 749 ? RuntimeHelper::H_call_ctpop_i32 750 : RuntimeHelper::H_call_ctpop_i64); 751 static constexpr SizeT MaxArgs = 1; 752 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, 753 NoTailCall, IsTargetHelperCall); 754 Call->addArg(Src0); 755 Instr->setDeleted(); 756 return; 757 } 758 case Intrinsics::Longjmp: { 759 static constexpr SizeT MaxArgs = 2; 760 static constexpr Variable *NoDest = nullptr; 761 Operand *TargetHelper = 762 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_longjmp); 763 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, 764 NoTailCall, IsTargetHelperCall); 765 Call->addArg(IntrinsicCall->getArg(0)); 766 Call->addArg(IntrinsicCall->getArg(1)); 767 Instr->setDeleted(); 768 return; 769 } 770 case Intrinsics::Memcpy: { 771 static constexpr SizeT MaxArgs = 3; 772 static constexpr Variable *NoDest = nullptr; 773 Operand *TargetHelper = 774 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memcpy); 775 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, 776 NoTailCall, IsTargetHelperCall); 777 Call->addArg(IntrinsicCall->getArg(0)); 778 Call->addArg(IntrinsicCall->getArg(1)); 779 Call->addArg(IntrinsicCall->getArg(2)); 780 Instr->setDeleted(); 781 return; 782 } 783 case Intrinsics::Memmove: { 784 static constexpr SizeT MaxArgs = 3; 785 static constexpr Variable *NoDest = nullptr; 786 Operand *TargetHelper = 787 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memmove); 788 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, 789 NoTailCall, IsTargetHelperCall); 790 Call->addArg(IntrinsicCall->getArg(0)); 791 Call->addArg(IntrinsicCall->getArg(1)); 792 Call->addArg(IntrinsicCall->getArg(2)); 793 Instr->setDeleted(); 794 return; 795 } 796 case Intrinsics::Memset: { 797 Operand *ValOp = IntrinsicCall->getArg(1); 798 assert(ValOp->getType() == IceType_i8); 799 Variable *ValExt = Func->makeVariable(stackSlotType()); 800 Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp); 801 802 static constexpr SizeT MaxArgs = 3; 803 static constexpr Variable *NoDest = nullptr; 804 Operand *TargetHelper = 805 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memset); 806 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, 807 NoTailCall, IsTargetHelperCall); 808 Call->addArg(IntrinsicCall->getArg(0)); 809 Call->addArg(ValExt); 810 Call->addArg(IntrinsicCall->getArg(2)); 811 Instr->setDeleted(); 812 return; 813 } 814 case Intrinsics::NaClReadTP: { 815 if (SandboxingType == ST_NaCl) { 816 return; 817 } 818 static constexpr SizeT MaxArgs = 0; 819 assert(SandboxingType != ST_Nonsfi); 820 Operand *TargetHelper = 821 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_read_tp); 822 Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall, 823 IsTargetHelperCall); 824 Instr->setDeleted(); 825 return; 826 } 827 case Intrinsics::Setjmp: { 828 static constexpr SizeT MaxArgs = 1; 829 Operand *TargetHelper = 830 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_setjmp); 831 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, 832 NoTailCall, IsTargetHelperCall); 833 Call->addArg(IntrinsicCall->getArg(0)); 834 Instr->setDeleted(); 835 return; 836 } 837 } 838 llvm::report_fatal_error("Control flow should never have reached here."); 839 } 840 } 841 } 842 843 void TargetMIPS32::findMaxStackOutArgsSize() { 844 // MinNeededOutArgsBytes should be updated if the Target ever creates a 845 // high-level InstCall that requires more stack bytes. 846 size_t MinNeededOutArgsBytes = 0; 847 if (!MaybeLeafFunc) 848 MinNeededOutArgsBytes = MIPS32_MAX_GPR_ARG * 4; 849 MaxOutArgsSizeBytes = MinNeededOutArgsBytes; 850 for (CfgNode *Node : Func->getNodes()) { 851 Context.init(Node); 852 while (!Context.atEnd()) { 853 PostIncrLoweringContext PostIncrement(Context); 854 Inst *CurInstr = iteratorToInst(Context.getCur()); 855 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { 856 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); 857 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); 858 } 859 } 860 } 861 CurrentAllocaOffset = MaxOutArgsSizeBytes; 862 } 863 864 void TargetMIPS32::translateO2() { 865 TimerMarker T(TimerStack::TT_O2, Func); 866 867 // TODO(stichnot): share passes with X86? 868 // https://code.google.com/p/nativeclient/issues/detail?id=4094 869 genTargetHelperCalls(); 870 871 unsetIfNonLeafFunc(); 872 873 findMaxStackOutArgsSize(); 874 875 // Merge Alloca instructions, and lay out the stack. 876 static constexpr bool SortAndCombineAllocas = true; 877 Func->processAllocas(SortAndCombineAllocas); 878 Func->dump("After Alloca processing"); 879 880 if (!getFlags().getEnablePhiEdgeSplit()) { 881 // Lower Phi instructions. 882 Func->placePhiLoads(); 883 if (Func->hasError()) 884 return; 885 Func->placePhiStores(); 886 if (Func->hasError()) 887 return; 888 Func->deletePhis(); 889 if (Func->hasError()) 890 return; 891 Func->dump("After Phi lowering"); 892 } 893 894 // Address mode optimization. 895 Func->getVMetadata()->init(VMK_SingleDefs); 896 Func->doAddressOpt(); 897 898 // Argument lowering 899 Func->doArgLowering(); 900 901 // Target lowering. This requires liveness analysis for some parts of the 902 // lowering decisions, such as compare/branch fusing. If non-lightweight 903 // liveness analysis is used, the instructions need to be renumbered first. 904 // TODO: This renumbering should only be necessary if we're actually 905 // calculating live intervals, which we only do for register allocation. 906 Func->renumberInstructions(); 907 if (Func->hasError()) 908 return; 909 910 // TODO: It should be sufficient to use the fastest liveness calculation, 911 // i.e. livenessLightweight(). However, for some reason that slows down the 912 // rest of the translation. Investigate. 913 Func->liveness(Liveness_Basic); 914 if (Func->hasError()) 915 return; 916 Func->dump("After MIPS32 address mode opt"); 917 918 Func->genCode(); 919 if (Func->hasError()) 920 return; 921 Func->dump("After MIPS32 codegen"); 922 923 // Register allocation. This requires instruction renumbering and full 924 // liveness analysis. 925 Func->renumberInstructions(); 926 if (Func->hasError()) 927 return; 928 Func->liveness(Liveness_Intervals); 929 if (Func->hasError()) 930 return; 931 // The post-codegen dump is done here, after liveness analysis and associated 932 // cleanup, to make the dump cleaner and more useful. 933 Func->dump("After initial MIPS32 codegen"); 934 // Validate the live range computations. The expensive validation call is 935 // deliberately only made when assertions are enabled. 936 assert(Func->validateLiveness()); 937 Func->getVMetadata()->init(VMK_All); 938 regAlloc(RAK_Global); 939 if (Func->hasError()) 940 return; 941 Func->dump("After linear scan regalloc"); 942 943 if (getFlags().getEnablePhiEdgeSplit()) { 944 Func->advancedPhiLowering(); 945 Func->dump("After advanced Phi lowering"); 946 } 947 948 // Stack frame mapping. 949 Func->genFrame(); 950 if (Func->hasError()) 951 return; 952 Func->dump("After stack frame mapping"); 953 954 postLowerLegalization(); 955 if (Func->hasError()) 956 return; 957 Func->dump("After postLowerLegalization"); 958 959 Func->contractEmptyNodes(); 960 Func->reorderNodes(); 961 962 // Branch optimization. This needs to be done just before code emission. In 963 // particular, no transformations that insert or reorder CfgNodes should be 964 // done after branch optimization. We go ahead and do it before nop insertion 965 // to reduce the amount of work needed for searching for opportunities. 966 Func->doBranchOpt(); 967 Func->dump("After branch optimization"); 968 969 // Nop insertion 970 if (getFlags().getShouldDoNopInsertion()) { 971 Func->doNopInsertion(); 972 } 973 } 974 975 void TargetMIPS32::translateOm1() { 976 TimerMarker T(TimerStack::TT_Om1, Func); 977 978 // TODO: share passes with X86? 979 genTargetHelperCalls(); 980 981 unsetIfNonLeafFunc(); 982 983 findMaxStackOutArgsSize(); 984 985 // Do not merge Alloca instructions, and lay out the stack. 986 static constexpr bool SortAndCombineAllocas = false; 987 Func->processAllocas(SortAndCombineAllocas); 988 Func->dump("After Alloca processing"); 989 990 Func->placePhiLoads(); 991 if (Func->hasError()) 992 return; 993 Func->placePhiStores(); 994 if (Func->hasError()) 995 return; 996 Func->deletePhis(); 997 if (Func->hasError()) 998 return; 999 Func->dump("After Phi lowering"); 1000 1001 Func->doArgLowering(); 1002 1003 Func->genCode(); 1004 if (Func->hasError()) 1005 return; 1006 Func->dump("After initial MIPS32 codegen"); 1007 1008 regAlloc(RAK_InfOnly); 1009 if (Func->hasError()) 1010 return; 1011 Func->dump("After regalloc of infinite-weight variables"); 1012 1013 Func->genFrame(); 1014 if (Func->hasError()) 1015 return; 1016 Func->dump("After stack frame mapping"); 1017 1018 postLowerLegalization(); 1019 if (Func->hasError()) 1020 return; 1021 Func->dump("After postLowerLegalization"); 1022 1023 // Nop insertion 1024 if (getFlags().getShouldDoNopInsertion()) { 1025 Func->doNopInsertion(); 1026 } 1027 } 1028 1029 bool TargetMIPS32::doBranchOpt(Inst *Instr, const CfgNode *NextNode) { 1030 if (auto *Br = llvm::dyn_cast<InstMIPS32Br>(Instr)) { 1031 return Br->optimizeBranch(NextNode); 1032 } 1033 return false; 1034 } 1035 1036 namespace { 1037 1038 const char *RegNames[RegMIPS32::Reg_NUM] = { 1039 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ 1040 isI64Pair, isFP32, isFP64, isVec128, alias_init) \ 1041 name, 1042 REGMIPS32_TABLE 1043 #undef X 1044 }; 1045 1046 } // end of anonymous namespace 1047 1048 const char *RegMIPS32::getRegName(RegNumT RegNum) { 1049 RegNum.assertIsValid(); 1050 return RegNames[RegNum]; 1051 } 1052 1053 const char *TargetMIPS32::getRegName(RegNumT RegNum, Type Ty) const { 1054 (void)Ty; 1055 return RegMIPS32::getRegName(RegNum); 1056 } 1057 1058 Variable *TargetMIPS32::getPhysicalRegister(RegNumT RegNum, Type Ty) { 1059 if (Ty == IceType_void) 1060 Ty = IceType_i32; 1061 if (PhysicalRegisters[Ty].empty()) 1062 PhysicalRegisters[Ty].resize(RegMIPS32::Reg_NUM); 1063 RegNum.assertIsValid(); 1064 Variable *Reg = PhysicalRegisters[Ty][RegNum]; 1065 if (Reg == nullptr) { 1066 Reg = Func->makeVariable(Ty); 1067 Reg->setRegNum(RegNum); 1068 PhysicalRegisters[Ty][RegNum] = Reg; 1069 // Specially mark a named physical register as an "argument" so that it is 1070 // considered live upon function entry. Otherwise it's possible to get 1071 // liveness validation errors for saving callee-save registers. 1072 Func->addImplicitArg(Reg); 1073 // Don't bother tracking the live range of a named physical register. 1074 Reg->setIgnoreLiveness(); 1075 } 1076 return Reg; 1077 } 1078 1079 void TargetMIPS32::emitJumpTable(const Cfg *Func, 1080 const InstJumpTable *JumpTable) const { 1081 (void)Func; 1082 (void)JumpTable; 1083 UnimplementedError(getFlags()); 1084 } 1085 1086 /// Provide a trivial wrapper to legalize() for this common usage. 1087 Variable *TargetMIPS32::legalizeToReg(Operand *From, RegNumT RegNum) { 1088 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); 1089 } 1090 1091 /// Legalize undef values to concrete values. 1092 Operand *TargetMIPS32::legalizeUndef(Operand *From, RegNumT RegNum) { 1093 (void)RegNum; 1094 Type Ty = From->getType(); 1095 if (llvm::isa<ConstantUndef>(From)) { 1096 // Lower undefs to zero. Another option is to lower undefs to an 1097 // uninitialized register; however, using an uninitialized register 1098 // results in less predictable code. 1099 // 1100 // If in the future the implementation is changed to lower undef 1101 // values to uninitialized registers, a FakeDef will be needed: 1102 // Context.insert(InstFakeDef::create(Func, Reg)); 1103 // This is in order to ensure that the live range of Reg is not 1104 // overestimated. If the constant being lowered is a 64 bit value, 1105 // then the result should be split and the lo and hi components will 1106 // need to go in uninitialized registers. 1107 if (isVectorType(Ty)) { 1108 Variable *Var = makeReg(Ty, RegNum); 1109 auto *Reg = llvm::cast<VariableVecOn32>(Var); 1110 Reg->initVecElement(Func); 1111 auto *Zero = getZero(); 1112 for (Variable *Var : Reg->getContainers()) { 1113 _mov(Var, Zero); 1114 } 1115 return Reg; 1116 } 1117 return Ctx->getConstantZero(Ty); 1118 } 1119 return From; 1120 } 1121 1122 Variable *TargetMIPS32::makeReg(Type Type, RegNumT RegNum) { 1123 // There aren't any 64-bit integer registers for Mips32. 1124 assert(Type != IceType_i64); 1125 Variable *Reg = Func->makeVariable(Type); 1126 if (RegNum.hasValue()) 1127 Reg->setRegNum(RegNum); 1128 else 1129 Reg->setMustHaveReg(); 1130 return Reg; 1131 } 1132 1133 OperandMIPS32Mem *TargetMIPS32::formMemoryOperand(Operand *Operand, Type Ty) { 1134 // It may be the case that address mode optimization already creates an 1135 // OperandMIPS32Mem, so in that case it wouldn't need another level of 1136 // transformation. 1137 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) { 1138 return llvm::cast<OperandMIPS32Mem>(legalize(Mem)); 1139 } 1140 1141 // If we didn't do address mode optimization, then we only have a base/offset 1142 // to work with. MIPS always requires a base register, so just use that to 1143 // hold the operand. 1144 auto *Base = llvm::cast<Variable>( 1145 legalize(Operand, Legal_Reg | Legal_Rematerializable)); 1146 const int32_t Offset = Base->hasStackOffset() ? Base->getStackOffset() : 0; 1147 return OperandMIPS32Mem::create( 1148 Func, Ty, Base, 1149 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(Offset))); 1150 } 1151 1152 void TargetMIPS32::emitVariable(const Variable *Var) const { 1153 if (!BuildDefs::dump()) 1154 return; 1155 Ostream &Str = Ctx->getStrEmit(); 1156 const Type FrameSPTy = IceType_i32; 1157 if (Var->hasReg()) { 1158 Str << '$' << getRegName(Var->getRegNum(), Var->getType()); 1159 return; 1160 } 1161 if (Var->mustHaveReg()) { 1162 llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() + 1163 ") has no register assigned - function " + 1164 Func->getFunctionName()); 1165 } 1166 const int32_t Offset = Var->getStackOffset(); 1167 Str << Offset; 1168 Str << "($" << getRegName(getFrameOrStackReg(), FrameSPTy); 1169 Str << ")"; 1170 } 1171 1172 TargetMIPS32::CallingConv::CallingConv() 1173 : GPRegsUsed(RegMIPS32::Reg_NUM), 1174 GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()), 1175 I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()), 1176 VFPRegsUsed(RegMIPS32::Reg_NUM), 1177 FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()), 1178 FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()) {} 1179 1180 // In MIPS O32 abi FP argument registers can be used only if first argument is 1181 // of type float/double. UseFPRegs flag is used to care of that. Also FP arg 1182 // registers can be used only for first 2 arguments, so we require argument 1183 // number to make register allocation decisions. 1184 bool TargetMIPS32::CallingConv::argInReg(Type Ty, uint32_t ArgNo, 1185 RegNumT *Reg) { 1186 if (isScalarIntegerType(Ty) || isVectorType(Ty)) 1187 return argInGPR(Ty, Reg); 1188 if (isScalarFloatingType(Ty)) { 1189 if (ArgNo == 0) { 1190 UseFPRegs = true; 1191 return argInVFP(Ty, Reg); 1192 } 1193 if (UseFPRegs && ArgNo == 1) { 1194 UseFPRegs = false; 1195 return argInVFP(Ty, Reg); 1196 } 1197 return argInGPR(Ty, Reg); 1198 } 1199 llvm::report_fatal_error("argInReg: Invalid type."); 1200 return false; 1201 } 1202 1203 bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) { 1204 CfgVector<RegNumT> *Source; 1205 1206 switch (Ty) { 1207 default: { 1208 llvm::report_fatal_error("argInGPR: Invalid type."); 1209 return false; 1210 } break; 1211 case IceType_v4i1: 1212 case IceType_v8i1: 1213 case IceType_v16i1: 1214 case IceType_v16i8: 1215 case IceType_v8i16: 1216 case IceType_v4i32: 1217 case IceType_v4f32: 1218 case IceType_i32: 1219 case IceType_f32: { 1220 Source = &GPRArgs; 1221 } break; 1222 case IceType_i64: 1223 case IceType_f64: { 1224 Source = &I64Args; 1225 } break; 1226 } 1227 1228 discardUnavailableGPRsAndTheirAliases(Source); 1229 1230 // If $4 is used for any scalar type (or returining v4f32) then the next 1231 // vector type if passed in $6:$7:stack:stack 1232 if (isVectorType(Ty)) { 1233 alignGPR(Source); 1234 } 1235 1236 if (Source->empty()) { 1237 GPRegsUsed.set(); 1238 return false; 1239 } 1240 1241 *Reg = Source->back(); 1242 // Note that we don't Source->pop_back() here. This is intentional. Notice how 1243 // we mark all of Reg's aliases as Used. So, for the next argument, 1244 // Source->back() is marked as unavailable, and it is thus implicitly popped 1245 // from the stack. 1246 GPRegsUsed |= RegisterAliases[*Reg]; 1247 1248 // All vector arguments irrespective of their base type are passed in GP 1249 // registers. First vector argument is passed in $4:$5:$6:$7 and 2nd 1250 // is passed in $6:$7:stack:stack. If it is 1st argument then discard 1251 // $4:$5:$6:$7 otherwise discard $6:$7 only. 1252 if (isVectorType(Ty)) { 1253 if (((unsigned)*Reg) == RegMIPS32::Reg_A0) { 1254 GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A1]; 1255 GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A2]; 1256 GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3]; 1257 } else { 1258 GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3]; 1259 } 1260 } 1261 1262 return true; 1263 } 1264 1265 inline void TargetMIPS32::CallingConv::discardNextGPRAndItsAliases( 1266 CfgVector<RegNumT> *Regs) { 1267 GPRegsUsed |= RegisterAliases[Regs->back()]; 1268 Regs->pop_back(); 1269 } 1270 1271 inline void TargetMIPS32::CallingConv::alignGPR(CfgVector<RegNumT> *Regs) { 1272 if (Regs->back() == RegMIPS32::Reg_A1 || Regs->back() == RegMIPS32::Reg_A3) 1273 discardNextGPRAndItsAliases(Regs); 1274 } 1275 1276 // GPR are not packed when passing parameters. Thus, a function foo(i32, i64, 1277 // i32) will have the first argument in a0, the second in a2-a3, and the third 1278 // on the stack. To model this behavior, whenever we pop a register from Regs, 1279 // we remove all of its aliases from the pool of available GPRs. This has the 1280 // effect of computing the "closure" on the GPR registers. 1281 void TargetMIPS32::CallingConv::discardUnavailableGPRsAndTheirAliases( 1282 CfgVector<RegNumT> *Regs) { 1283 while (!Regs->empty() && GPRegsUsed[Regs->back()]) { 1284 discardNextGPRAndItsAliases(Regs); 1285 } 1286 } 1287 1288 bool TargetMIPS32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) { 1289 CfgVector<RegNumT> *Source; 1290 1291 switch (Ty) { 1292 default: { 1293 llvm::report_fatal_error("argInVFP: Invalid type."); 1294 return false; 1295 } break; 1296 case IceType_f32: { 1297 Source = &FP32Args; 1298 } break; 1299 case IceType_f64: { 1300 Source = &FP64Args; 1301 } break; 1302 } 1303 1304 discardUnavailableVFPRegsAndTheirAliases(Source); 1305 1306 if (Source->empty()) { 1307 VFPRegsUsed.set(); 1308 return false; 1309 } 1310 1311 *Reg = Source->back(); 1312 VFPRegsUsed |= RegisterAliases[*Reg]; 1313 1314 // In MIPS O32 abi if fun arguments are (f32, i32) then one can not use reg_a0 1315 // for second argument even though it's free. f32 arg goes in reg_f12, i32 arg 1316 // goes in reg_a1. Similarly if arguments are (f64, i32) second argument goes 1317 // in reg_a3 and a0, a1 are not used. 1318 Source = &GPRArgs; 1319 // Discard one GPR reg for f32(4 bytes), two for f64(4 + 4 bytes) 1320 if (Ty == IceType_f64) { 1321 // In MIPS o32 abi, when we use GPR argument pairs to store F64 values, pair 1322 // must be aligned at even register. Similarly when we discard GPR registers 1323 // when some arguments from starting 16 bytes goes in FPR, we must take care 1324 // of alignment. For example if fun args are (f32, f64, f32), for first f32 1325 // we discard a0, now for f64 argument, which will go in F14F15, we must 1326 // first align GPR vector to even register by discarding a1, then discard 1327 // two GPRs a2 and a3. Now last f32 argument will go on stack. 1328 alignGPR(Source); 1329 discardNextGPRAndItsAliases(Source); 1330 } 1331 discardNextGPRAndItsAliases(Source); 1332 return true; 1333 } 1334 1335 void TargetMIPS32::CallingConv::discardUnavailableVFPRegsAndTheirAliases( 1336 CfgVector<RegNumT> *Regs) { 1337 while (!Regs->empty() && VFPRegsUsed[Regs->back()]) { 1338 Regs->pop_back(); 1339 } 1340 } 1341 1342 void TargetMIPS32::lowerArguments() { 1343 VarList &Args = Func->getArgs(); 1344 TargetMIPS32::CallingConv CC; 1345 1346 // For each register argument, replace Arg in the argument list with the home 1347 // register. Then generate an instruction in the prolog to copy the home 1348 // register to the assigned location of Arg. 1349 Context.init(Func->getEntryNode()); 1350 Context.setInsertPoint(Context.getCur()); 1351 1352 // v4f32 is returned through stack. $4 is setup by the caller and passed as 1353 // first argument implicitly. Callee then copies the return vector at $4. 1354 Variable *ImplicitRetVec = nullptr; 1355 if (isVectorFloatingType(Func->getReturnType())) { 1356 ImplicitRetVec = Func->makeVariable(IceType_i32); 1357 ImplicitRetVec->setName(Func, "ImplicitRet_v4f32"); 1358 ImplicitRetVec->setIsArg(); 1359 Args.insert(Args.begin(), ImplicitRetVec); 1360 setImplicitRet(ImplicitRetVec); 1361 } 1362 1363 for (SizeT i = 0, E = Args.size(); i < E; ++i) { 1364 Variable *Arg = Args[i]; 1365 Type Ty = Arg->getType(); 1366 RegNumT RegNum; 1367 if (!CC.argInReg(Ty, i, &RegNum)) { 1368 continue; 1369 } 1370 Variable *RegisterArg = Func->makeVariable(Ty); 1371 if (BuildDefs::dump()) { 1372 RegisterArg->setName(Func, "home_reg:" + Arg->getName()); 1373 } 1374 RegisterArg->setIsArg(); 1375 Arg->setIsArg(false); 1376 Args[i] = RegisterArg; 1377 1378 if (isVectorType(Ty)) { 1379 auto *RegisterArgVec = llvm::cast<VariableVecOn32>(RegisterArg); 1380 RegisterArgVec->initVecElement(Func); 1381 RegisterArgVec->getContainers()[0]->setRegNum( 1382 RegNumT::fixme((unsigned)RegNum + 0)); 1383 RegisterArgVec->getContainers()[1]->setRegNum( 1384 RegNumT::fixme((unsigned)RegNum + 1)); 1385 // First two elements of second vector argument are passed 1386 // in $6:$7 and remaining two on stack. Do not assign register 1387 // to this is second vector argument. 1388 if (i == 0) { 1389 RegisterArgVec->getContainers()[2]->setRegNum( 1390 RegNumT::fixme((unsigned)RegNum + 2)); 1391 RegisterArgVec->getContainers()[3]->setRegNum( 1392 RegNumT::fixme((unsigned)RegNum + 3)); 1393 } else { 1394 RegisterArgVec->getContainers()[2]->setRegNum( 1395 RegNumT::fixme(RegNumT())); 1396 RegisterArgVec->getContainers()[3]->setRegNum( 1397 RegNumT::fixme(RegNumT())); 1398 } 1399 } else { 1400 switch (Ty) { 1401 default: { RegisterArg->setRegNum(RegNum); } break; 1402 case IceType_i64: { 1403 auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg); 1404 RegisterArg64->initHiLo(Func); 1405 RegisterArg64->getLo()->setRegNum( 1406 RegNumT::fixme(RegMIPS32::get64PairFirstRegNum(RegNum))); 1407 RegisterArg64->getHi()->setRegNum( 1408 RegNumT::fixme(RegMIPS32::get64PairSecondRegNum(RegNum))); 1409 } break; 1410 } 1411 } 1412 Context.insert<InstAssign>(Arg, RegisterArg); 1413 } 1414 1415 // Insert fake use of ImplicitRet_v4f32 to keep it live 1416 if (ImplicitRetVec) { 1417 for (CfgNode *Node : Func->getNodes()) { 1418 for (Inst &Instr : Node->getInsts()) { 1419 if (llvm::isa<InstRet>(&Instr)) { 1420 Context.setInsertPoint(instToIterator(&Instr)); 1421 Context.insert<InstFakeUse>(ImplicitRetVec); 1422 break; 1423 } 1424 } 1425 } 1426 } 1427 } 1428 1429 Type TargetMIPS32::stackSlotType() { return IceType_i32; } 1430 1431 // Helper function for addProlog(). 1432 // 1433 // This assumes Arg is an argument passed on the stack. This sets the frame 1434 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an 1435 // I64 arg that has been split into Lo and Hi components, it calls itself 1436 // recursively on the components, taking care to handle Lo first because of the 1437 // little-endian architecture. Lastly, this function generates an instruction 1438 // to copy Arg into its assigned register if applicable. 1439 void TargetMIPS32::finishArgumentLowering(Variable *Arg, bool PartialOnStack, 1440 Variable *FramePtr, 1441 size_t BasicFrameOffset, 1442 size_t *InArgsSizeBytes) { 1443 const Type Ty = Arg->getType(); 1444 *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty); 1445 1446 // If $4 is used for any scalar type (or returining v4f32) then the next 1447 // vector type if passed in $6:$7:stack:stack. Load 3nd and 4th element 1448 // from agument stack. 1449 if (auto *ArgVecOn32 = llvm::dyn_cast<VariableVecOn32>(Arg)) { 1450 if (PartialOnStack == false) { 1451 auto *Elem0 = ArgVecOn32->getContainers()[0]; 1452 auto *Elem1 = ArgVecOn32->getContainers()[1]; 1453 finishArgumentLowering(Elem0, PartialOnStack, FramePtr, BasicFrameOffset, 1454 InArgsSizeBytes); 1455 finishArgumentLowering(Elem1, PartialOnStack, FramePtr, BasicFrameOffset, 1456 InArgsSizeBytes); 1457 } 1458 auto *Elem2 = ArgVecOn32->getContainers()[2]; 1459 auto *Elem3 = ArgVecOn32->getContainers()[3]; 1460 finishArgumentLowering(Elem2, PartialOnStack, FramePtr, BasicFrameOffset, 1461 InArgsSizeBytes); 1462 finishArgumentLowering(Elem3, PartialOnStack, FramePtr, BasicFrameOffset, 1463 InArgsSizeBytes); 1464 return; 1465 } 1466 1467 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) { 1468 Variable *const Lo = Arg64On32->getLo(); 1469 Variable *const Hi = Arg64On32->getHi(); 1470 finishArgumentLowering(Lo, PartialOnStack, FramePtr, BasicFrameOffset, 1471 InArgsSizeBytes); 1472 finishArgumentLowering(Hi, PartialOnStack, FramePtr, BasicFrameOffset, 1473 InArgsSizeBytes); 1474 return; 1475 } 1476 1477 assert(Ty != IceType_i64); 1478 assert(!isVectorType(Ty)); 1479 1480 const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes; 1481 *InArgsSizeBytes += typeWidthInBytesOnStack(Ty); 1482 1483 if (!Arg->hasReg()) { 1484 Arg->setStackOffset(ArgStackOffset); 1485 return; 1486 } 1487 1488 // If the argument variable has been assigned a register, we need to copy the 1489 // value from the stack slot. 1490 Variable *Parameter = Func->makeVariable(Ty); 1491 Parameter->setMustNotHaveReg(); 1492 Parameter->setStackOffset(ArgStackOffset); 1493 _mov(Arg, Parameter); 1494 } 1495 1496 void TargetMIPS32::addProlog(CfgNode *Node) { 1497 // Stack frame layout: 1498 // 1499 // +------------------------+ 1500 // | 1. preserved registers | 1501 // +------------------------+ 1502 // | 2. padding | 1503 // +------------------------+ 1504 // | 3. global spill area | 1505 // +------------------------+ 1506 // | 4. padding | 1507 // +------------------------+ 1508 // | 5. local spill area | 1509 // +------------------------+ 1510 // | 6. padding | 1511 // +------------------------+ 1512 // | 7. allocas | 1513 // +------------------------+ 1514 // | 8. padding | 1515 // +------------------------+ 1516 // | 9. out args | 1517 // +------------------------+ <--- StackPointer 1518 // 1519 // The following variables record the size in bytes of the given areas: 1520 // * PreservedRegsSizeBytes: area 1 1521 // * SpillAreaPaddingBytes: area 2 1522 // * GlobalsSize: area 3 1523 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 1524 // * LocalsSpillAreaSize: area 5 1525 // * SpillAreaSizeBytes: areas 2 - 9 1526 // * maxOutArgsSizeBytes(): area 9 1527 1528 Context.init(Node); 1529 Context.setInsertPoint(Context.getCur()); 1530 1531 SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None); 1532 RegsUsed = SmallBitVector(CalleeSaves.size()); 1533 1534 VarList SortedSpilledVariables; 1535 1536 size_t GlobalsSize = 0; 1537 // If there is a separate locals area, this represents that area. Otherwise 1538 // it counts any variable not counted by GlobalsSize. 1539 SpillAreaSizeBytes = 0; 1540 // If there is a separate locals area, this specifies the alignment for it. 1541 uint32_t LocalsSlotsAlignmentBytes = 0; 1542 // The entire spill locations area gets aligned to largest natural alignment 1543 // of the variables that have a spill slot. 1544 uint32_t SpillAreaAlignmentBytes = 0; 1545 // For now, we don't have target-specific variables that need special 1546 // treatment (no stack-slot-linked SpillVariable type). 1547 std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) { 1548 static constexpr bool AssignStackSlot = false; 1549 static constexpr bool DontAssignStackSlot = !AssignStackSlot; 1550 if (llvm::isa<Variable64On32>(Var)) { 1551 return DontAssignStackSlot; 1552 } 1553 return AssignStackSlot; 1554 }; 1555 1556 // Compute the list of spilled variables and bounds for GlobalsSize, etc. 1557 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, 1558 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, 1559 &LocalsSlotsAlignmentBytes, TargetVarHook); 1560 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; 1561 SpillAreaSizeBytes += GlobalsSize; 1562 1563 PreservedGPRs.reserve(CalleeSaves.size()); 1564 1565 // Consider FP and RA as callee-save / used as needed. 1566 if (UsesFramePointer) { 1567 if (RegsUsed[RegMIPS32::Reg_FP]) { 1568 llvm::report_fatal_error("Frame pointer has been used."); 1569 } 1570 CalleeSaves[RegMIPS32::Reg_FP] = true; 1571 RegsUsed[RegMIPS32::Reg_FP] = true; 1572 } 1573 if (!MaybeLeafFunc) { 1574 CalleeSaves[RegMIPS32::Reg_RA] = true; 1575 RegsUsed[RegMIPS32::Reg_RA] = true; 1576 } 1577 1578 // Make two passes over the used registers. The first pass records all the 1579 // used registers -- and their aliases. Then, we figure out which GPR 1580 // registers should be saved. 1581 SmallBitVector ToPreserve(RegMIPS32::Reg_NUM); 1582 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 1583 if (CalleeSaves[i] && RegsUsed[i]) { 1584 ToPreserve |= RegisterAliases[i]; 1585 } 1586 } 1587 1588 uint32_t NumCallee = 0; 1589 1590 // RegClasses is a tuple of 1591 // 1592 // <First Register in Class, Last Register in Class, Vector of Save Registers> 1593 // 1594 // We use this tuple to figure out which register we should save/restore 1595 // during 1596 // prolog/epilog. 1597 using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>; 1598 const RegClassType RegClass = RegClassType( 1599 RegMIPS32::Reg_GPR_First, RegMIPS32::Reg_FPR_Last, &PreservedGPRs); 1600 const uint32_t FirstRegInClass = std::get<0>(RegClass); 1601 const uint32_t LastRegInClass = std::get<1>(RegClass); 1602 VarList *const PreservedRegsInClass = std::get<2>(RegClass); 1603 for (uint32_t Reg = LastRegInClass; Reg > FirstRegInClass; Reg--) { 1604 if (!ToPreserve[Reg]) { 1605 continue; 1606 } 1607 ++NumCallee; 1608 Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg)); 1609 PreservedRegsSizeBytes += 1610 typeWidthInBytesOnStack(PhysicalRegister->getType()); 1611 PreservedRegsInClass->push_back(PhysicalRegister); 1612 } 1613 1614 Ctx->statsUpdateRegistersSaved(NumCallee); 1615 1616 // Align the variables area. SpillAreaPaddingBytes is the size of the region 1617 // after the preserved registers and before the spill areas. 1618 // LocalsSlotsPaddingBytes is the amount of padding between the globals and 1619 // locals area if they are separate. 1620 assert(SpillAreaAlignmentBytes <= MIPS32_STACK_ALIGNMENT_BYTES); 1621 (void)MIPS32_STACK_ALIGNMENT_BYTES; 1622 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); 1623 uint32_t SpillAreaPaddingBytes = 0; 1624 uint32_t LocalsSlotsPaddingBytes = 0; 1625 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, 1626 GlobalsSize, LocalsSlotsAlignmentBytes, 1627 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); 1628 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; 1629 uint32_t GlobalsAndSubsequentPaddingSize = 1630 GlobalsSize + LocalsSlotsPaddingBytes; 1631 1632 // Adds the out args space to the stack, and align SP if necessary. 1633 if (!NeedsStackAlignment) { 1634 SpillAreaSizeBytes += MaxOutArgsSizeBytes * (VariableAllocaUsed ? 0 : 1); 1635 } else { 1636 SpillAreaSizeBytes = applyStackAlignment( 1637 SpillAreaSizeBytes + 1638 (VariableAllocaUsed ? VariableAllocaAlignBytes : MaxOutArgsSizeBytes)); 1639 } 1640 1641 // Combine fixed alloca with SpillAreaSize. 1642 SpillAreaSizeBytes += FixedAllocaSizeBytes; 1643 1644 TotalStackSizeBytes = 1645 applyStackAlignment(PreservedRegsSizeBytes + SpillAreaSizeBytes); 1646 1647 // Generate "addiu sp, sp, -TotalStackSizeBytes" 1648 if (TotalStackSizeBytes) { 1649 // Use the scratch register if needed to legalize the immediate. 1650 Sandboxer(this).addiu_sp(-TotalStackSizeBytes); 1651 } 1652 1653 Ctx->statsUpdateFrameBytes(TotalStackSizeBytes); 1654 1655 if (!PreservedGPRs.empty()) { 1656 uint32_t StackOffset = TotalStackSizeBytes; 1657 for (Variable *Var : *PreservedRegsInClass) { 1658 Type RegType; 1659 if (RegMIPS32::isFPRReg(Var->getRegNum())) 1660 RegType = IceType_f32; 1661 else 1662 RegType = IceType_i32; 1663 auto *PhysicalRegister = makeReg(RegType, Var->getRegNum()); 1664 StackOffset -= typeWidthInBytesOnStack(RegType); 1665 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); 1666 OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create( 1667 Func, RegType, SP, 1668 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset))); 1669 Sandboxer(this).sw(PhysicalRegister, MemoryLocation); 1670 } 1671 } 1672 1673 Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP); 1674 1675 // Generate "mov FP, SP" if needed. 1676 if (UsesFramePointer) { 1677 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); 1678 _mov(FP, SP); 1679 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). 1680 Context.insert<InstFakeUse>(FP); 1681 } 1682 1683 // Fill in stack offsets for stack args, and copy args into registers for 1684 // those that were register-allocated. Args are pushed right to left, so 1685 // Arg[0] is closest to the stack/frame pointer. 1686 const VarList &Args = Func->getArgs(); 1687 size_t InArgsSizeBytes = MIPS32_MAX_GPR_ARG * 4; 1688 TargetMIPS32::CallingConv CC; 1689 uint32_t ArgNo = 0; 1690 1691 for (Variable *Arg : Args) { 1692 RegNumT DummyReg; 1693 const Type Ty = Arg->getType(); 1694 bool PartialOnStack; 1695 // Skip arguments passed in registers. 1696 if (CC.argInReg(Ty, ArgNo, &DummyReg)) { 1697 // Load argument from stack: 1698 // 1. If this is first vector argument and return type is v4f32. 1699 // In this case $4 is used to pass stack address implicitly. 1700 // 3rd and 4th element of vector argument is passed through stack. 1701 // 2. If this is second vector argument. 1702 if (ArgNo != 0 && isVectorType(Ty)) { 1703 PartialOnStack = true; 1704 finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes, 1705 &InArgsSizeBytes); 1706 } 1707 } else { 1708 PartialOnStack = false; 1709 finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes, 1710 &InArgsSizeBytes); 1711 } 1712 ++ArgNo; 1713 } 1714 1715 // Fill in stack offsets for locals. 1716 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, 1717 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize); 1718 this->HasComputedFrame = true; 1719 1720 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { 1721 OstreamLocker _(Func->getContext()); 1722 Ostream &Str = Func->getContext()->getStrDump(); 1723 1724 Str << "Stack layout:\n"; 1725 uint32_t SPAdjustmentPaddingSize = 1726 SpillAreaSizeBytes - LocalsSpillAreaSize - 1727 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes - 1728 MaxOutArgsSizeBytes; 1729 Str << " in-args = " << InArgsSizeBytes << " bytes\n" 1730 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" 1731 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" 1732 << " globals spill area = " << GlobalsSize << " bytes\n" 1733 << " globals-locals spill areas intermediate padding = " 1734 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" 1735 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" 1736 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n"; 1737 1738 Str << "Stack details:\n" 1739 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n" 1740 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" 1741 << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n" 1742 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes 1743 << " bytes\n" 1744 << " is FP based = " << 1 << "\n"; 1745 } 1746 return; 1747 } 1748 1749 void TargetMIPS32::addEpilog(CfgNode *Node) { 1750 InstList &Insts = Node->getInsts(); 1751 InstList::reverse_iterator RI, E; 1752 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { 1753 if (llvm::isa<InstMIPS32Ret>(*RI)) 1754 break; 1755 } 1756 if (RI == E) 1757 return; 1758 1759 // Convert the reverse_iterator position into its corresponding (forward) 1760 // iterator position. 1761 InstList::iterator InsertPoint = reverseToForwardIterator(RI); 1762 --InsertPoint; 1763 Context.init(Node); 1764 Context.setInsertPoint(InsertPoint); 1765 1766 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); 1767 if (UsesFramePointer) { 1768 Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP); 1769 // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake 1770 // use of SP before the assignment of SP=FP keeps previous SP adjustments 1771 // from being dead-code eliminated. 1772 Context.insert<InstFakeUse>(SP); 1773 Sandboxer(this).reset_sp(FP); 1774 } 1775 1776 VarList::reverse_iterator RIter, END; 1777 1778 if (!PreservedGPRs.empty()) { 1779 uint32_t StackOffset = TotalStackSizeBytes - PreservedRegsSizeBytes; 1780 for (RIter = PreservedGPRs.rbegin(), END = PreservedGPRs.rend(); 1781 RIter != END; ++RIter) { 1782 Type RegType; 1783 if (RegMIPS32::isFPRReg((*RIter)->getRegNum())) 1784 RegType = IceType_f32; 1785 else 1786 RegType = IceType_i32; 1787 auto *PhysicalRegister = makeReg(RegType, (*RIter)->getRegNum()); 1788 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); 1789 OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create( 1790 Func, RegType, SP, 1791 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset))); 1792 _lw(PhysicalRegister, MemoryLocation); 1793 StackOffset += typeWidthInBytesOnStack(PhysicalRegister->getType()); 1794 } 1795 } 1796 1797 if (TotalStackSizeBytes) { 1798 Sandboxer(this).addiu_sp(TotalStackSizeBytes); 1799 } 1800 if (!getFlags().getUseSandboxing()) 1801 return; 1802 1803 Variable *RA = getPhysicalRegister(RegMIPS32::Reg_RA); 1804 Variable *RetValue = nullptr; 1805 if (RI->getSrcSize()) 1806 RetValue = llvm::cast<Variable>(RI->getSrc(0)); 1807 1808 Sandboxer(this).ret(RA, RetValue); 1809 1810 RI->setDeleted(); 1811 } 1812 1813 Variable *TargetMIPS32::PostLoweringLegalizer::newBaseRegister( 1814 Variable *Base, int32_t Offset, RegNumT ScratchRegNum) { 1815 // Legalize will likely need a lui/ori combination, but if the top bits are 1816 // all 0 from negating the offset and subtracting, we could use that instead. 1817 const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0; 1818 Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum); 1819 if (ShouldSub) { 1820 Target->_addi(ScratchReg, Base, -Offset); 1821 } else { 1822 constexpr bool SignExt = true; 1823 if (!OperandMIPS32Mem::canHoldOffset(Base->getType(), SignExt, Offset)) { 1824 const uint32_t UpperBits = (Offset >> 16) & 0xFFFF; 1825 const uint32_t LowerBits = Offset & 0xFFFF; 1826 Target->_lui(ScratchReg, Target->Ctx->getConstantInt32(UpperBits)); 1827 if (LowerBits) 1828 Target->_ori(ScratchReg, ScratchReg, LowerBits); 1829 Target->_addu(ScratchReg, ScratchReg, Base); 1830 } else { 1831 Target->_addiu(ScratchReg, Base, Offset); 1832 } 1833 } 1834 1835 return ScratchReg; 1836 } 1837 1838 void TargetMIPS32::PostLoweringLegalizer::legalizeMovFp( 1839 InstMIPS32MovFP64ToI64 *MovInstr) { 1840 Variable *Dest = MovInstr->getDest(); 1841 Operand *Src = MovInstr->getSrc(0); 1842 const Type SrcTy = Src->getType(); 1843 1844 if (Dest != nullptr && SrcTy == IceType_f64) { 1845 int32_t Offset = Dest->getStackOffset(); 1846 auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg()); 1847 OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create( 1848 Target->Func, IceType_f32, Base, 1849 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset))); 1850 OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr); 1851 auto *SrcV = llvm::cast<Variable>(Src); 1852 Variable *SrcR; 1853 if (MovInstr->getInt64Part() == Int64_Lo) { 1854 SrcR = Target->makeReg( 1855 IceType_f32, RegMIPS32::get64PairFirstRegNum(SrcV->getRegNum())); 1856 } else { 1857 SrcR = Target->makeReg( 1858 IceType_f32, RegMIPS32::get64PairSecondRegNum(SrcV->getRegNum())); 1859 } 1860 Sandboxer(Target).sw(SrcR, Addr); 1861 if (MovInstr->isDestRedefined()) { 1862 Target->_set_dest_redefined(); 1863 } 1864 MovInstr->setDeleted(); 1865 return; 1866 } 1867 1868 llvm::report_fatal_error("legalizeMovFp: Invalid operands"); 1869 } 1870 1871 void TargetMIPS32::PostLoweringLegalizer::legalizeMov(InstMIPS32Mov *MovInstr) { 1872 Variable *Dest = MovInstr->getDest(); 1873 assert(Dest != nullptr); 1874 const Type DestTy = Dest->getType(); 1875 assert(DestTy != IceType_i64); 1876 1877 Operand *Src = MovInstr->getSrc(0); 1878 const Type SrcTy = Src->getType(); 1879 (void)SrcTy; 1880 assert(SrcTy != IceType_i64); 1881 1882 bool Legalized = false; 1883 auto *SrcR = llvm::cast<Variable>(Src); 1884 if (Dest->hasReg() && SrcR->hasReg()) { 1885 // This might be a GP to/from FP move generated due to argument passing. 1886 // Use mtc1/mfc1 instead of mov.[s/d] if src and dst registers are of 1887 // different types. 1888 const bool IsDstGPR = RegMIPS32::isGPRReg(Dest->getRegNum()); 1889 const bool IsSrcGPR = RegMIPS32::isGPRReg(SrcR->getRegNum()); 1890 const RegNumT SRegNum = SrcR->getRegNum(); 1891 const RegNumT DRegNum = Dest->getRegNum(); 1892 if (IsDstGPR != IsSrcGPR) { 1893 if (IsDstGPR) { 1894 // Dest is GPR and SrcR is FPR. Use mfc1. 1895 int32_t TypeWidth = typeWidthInBytes(DestTy); 1896 if (MovInstr->getDestHi() != nullptr) 1897 TypeWidth += typeWidthInBytes(MovInstr->getDestHi()->getType()); 1898 if (TypeWidth == 8) { 1899 // Split it into two mfc1 instructions 1900 Variable *SrcGPRHi = Target->makeReg( 1901 IceType_f32, RegMIPS32::get64PairFirstRegNum(SRegNum)); 1902 Variable *SrcGPRLo = Target->makeReg( 1903 IceType_f32, RegMIPS32::get64PairSecondRegNum(SRegNum)); 1904 Variable *DstFPRHi, *DstFPRLo; 1905 if (MovInstr->getDestHi() != nullptr && Dest != nullptr) { 1906 DstFPRHi = Target->makeReg(IceType_i32, 1907 MovInstr->getDestHi()->getRegNum()); 1908 DstFPRLo = Target->makeReg(IceType_i32, Dest->getRegNum()); 1909 } else { 1910 DstFPRHi = Target->makeReg( 1911 IceType_i32, RegMIPS32::get64PairFirstRegNum(DRegNum)); 1912 DstFPRLo = Target->makeReg( 1913 IceType_i32, RegMIPS32::get64PairSecondRegNum(DRegNum)); 1914 } 1915 Target->_mov(DstFPRHi, SrcGPRHi); 1916 Target->_mov(DstFPRLo, SrcGPRLo); 1917 Legalized = true; 1918 } else { 1919 Variable *SrcGPR = Target->makeReg(IceType_f32, SRegNum); 1920 Variable *DstFPR = Target->makeReg(IceType_i32, DRegNum); 1921 Target->_mov(DstFPR, SrcGPR); 1922 Legalized = true; 1923 } 1924 } else { 1925 // Dest is FPR and SrcR is GPR. Use mtc1. 1926 if (typeWidthInBytes(Dest->getType()) == 8) { 1927 Variable *SrcGPRHi, *SrcGPRLo; 1928 // SrcR could be $zero which is i32 1929 if (SRegNum == RegMIPS32::Reg_ZERO) { 1930 SrcGPRHi = Target->makeReg(IceType_i32, SRegNum); 1931 SrcGPRLo = SrcGPRHi; 1932 } else { 1933 // Split it into two mtc1 instructions 1934 if (MovInstr->getSrcSize() == 2) { 1935 const auto FirstReg = 1936 (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum(); 1937 const auto SecondReg = 1938 (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum(); 1939 SrcGPRHi = Target->makeReg(IceType_i32, FirstReg); 1940 SrcGPRLo = Target->makeReg(IceType_i32, SecondReg); 1941 } else { 1942 SrcGPRLo = Target->makeReg( 1943 IceType_i32, RegMIPS32::get64PairFirstRegNum(SRegNum)); 1944 SrcGPRHi = Target->makeReg( 1945 IceType_i32, RegMIPS32::get64PairSecondRegNum(SRegNum)); 1946 } 1947 } 1948 Variable *DstFPRHi = Target->makeReg( 1949 IceType_f32, RegMIPS32::get64PairFirstRegNum(DRegNum)); 1950 Variable *DstFPRLo = Target->makeReg( 1951 IceType_f32, RegMIPS32::get64PairSecondRegNum(DRegNum)); 1952 Target->_mov(DstFPRHi, SrcGPRLo); 1953 Target->_mov(DstFPRLo, SrcGPRHi); 1954 Legalized = true; 1955 } else { 1956 Variable *SrcGPR = Target->makeReg(IceType_i32, SRegNum); 1957 Variable *DstFPR = Target->makeReg(IceType_f32, DRegNum); 1958 Target->_mov(DstFPR, SrcGPR); 1959 Legalized = true; 1960 } 1961 } 1962 } 1963 if (Legalized) { 1964 if (MovInstr->isDestRedefined()) { 1965 Target->_set_dest_redefined(); 1966 } 1967 MovInstr->setDeleted(); 1968 return; 1969 } 1970 } 1971 1972 if (!Dest->hasReg()) { 1973 auto *SrcR = llvm::cast<Variable>(Src); 1974 assert(SrcR->hasReg()); 1975 assert(!SrcR->isRematerializable()); 1976 int32_t Offset = Dest->getStackOffset(); 1977 1978 // This is a _mov(Mem(), Variable), i.e., a store. 1979 auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg()); 1980 1981 OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create( 1982 Target->Func, DestTy, Base, 1983 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset))); 1984 OperandMIPS32Mem *TAddrHi = OperandMIPS32Mem::create( 1985 Target->Func, DestTy, Base, 1986 llvm::cast<ConstantInteger32>( 1987 Target->Ctx->getConstantInt32(Offset + 4))); 1988 OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr); 1989 1990 // FP arguments are passed in GP reg if first argument is in GP. In this 1991 // case type of the SrcR is still FP thus we need to explicitly generate sw 1992 // instead of swc1. 1993 const RegNumT RegNum = SrcR->getRegNum(); 1994 const bool IsSrcGPReg = RegMIPS32::isGPRReg(SrcR->getRegNum()); 1995 if (SrcTy == IceType_f32 && IsSrcGPReg) { 1996 Variable *SrcGPR = Target->makeReg(IceType_i32, RegNum); 1997 Sandboxer(Target).sw(SrcGPR, Addr); 1998 } else if (SrcTy == IceType_f64 && IsSrcGPReg) { 1999 Variable *SrcGPRHi = 2000 Target->makeReg(IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum)); 2001 Variable *SrcGPRLo = Target->makeReg( 2002 IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum)); 2003 Sandboxer(Target).sw(SrcGPRHi, Addr); 2004 OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi); 2005 Sandboxer(Target).sw(SrcGPRLo, AddrHi); 2006 } else if (DestTy == IceType_f64 && IsSrcGPReg) { 2007 const auto FirstReg = 2008 (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum(); 2009 const auto SecondReg = 2010 (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum(); 2011 Variable *SrcGPRHi = Target->makeReg(IceType_i32, FirstReg); 2012 Variable *SrcGPRLo = Target->makeReg(IceType_i32, SecondReg); 2013 Sandboxer(Target).sw(SrcGPRLo, Addr); 2014 OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi); 2015 Sandboxer(Target).sw(SrcGPRHi, AddrHi); 2016 } else { 2017 Sandboxer(Target).sw(SrcR, Addr); 2018 } 2019 2020 Target->Context.insert<InstFakeDef>(Dest); 2021 Legalized = true; 2022 } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) { 2023 if (Var->isRematerializable()) { 2024 // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable). 2025 2026 // ExtraOffset is only needed for stack-pointer based frames as we have 2027 // to account for spill storage. 2028 const int32_t ExtraOffset = 2029 (Var->getRegNum() == Target->getFrameOrStackReg()) 2030 ? Target->getFrameFixedAllocaOffset() 2031 : 0; 2032 2033 const int32_t Offset = Var->getStackOffset() + ExtraOffset; 2034 Variable *Base = Target->getPhysicalRegister(Var->getRegNum()); 2035 Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum()); 2036 Target->_mov(Dest, T); 2037 Legalized = true; 2038 } else { 2039 if (!Var->hasReg()) { 2040 // This is a _mov(Variable, Mem()), i.e., a load. 2041 const int32_t Offset = Var->getStackOffset(); 2042 auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg()); 2043 const RegNumT RegNum = Dest->getRegNum(); 2044 const bool IsDstGPReg = RegMIPS32::isGPRReg(Dest->getRegNum()); 2045 // If we are moving i64 to a double using stack then the address may 2046 // not be aligned to 8-byte boundary as we split i64 into Hi-Lo parts 2047 // and store them individually with 4-byte alignment. Load the Hi-Lo 2048 // parts in TmpReg and move them to the dest using mtc1. 2049 if (DestTy == IceType_f64 && !Utils::IsAligned(Offset, 8) && 2050 !IsDstGPReg) { 2051 auto *Reg = Target->makeReg(IceType_i32, Target->getReservedTmpReg()); 2052 const RegNumT RegNum = Dest->getRegNum(); 2053 Variable *DestLo = Target->makeReg( 2054 IceType_f32, RegMIPS32::get64PairFirstRegNum(RegNum)); 2055 Variable *DestHi = Target->makeReg( 2056 IceType_f32, RegMIPS32::get64PairSecondRegNum(RegNum)); 2057 OperandMIPS32Mem *AddrLo = OperandMIPS32Mem::create( 2058 Target->Func, IceType_i32, Base, 2059 llvm::cast<ConstantInteger32>( 2060 Target->Ctx->getConstantInt32(Offset))); 2061 OperandMIPS32Mem *AddrHi = OperandMIPS32Mem::create( 2062 Target->Func, IceType_i32, Base, 2063 llvm::cast<ConstantInteger32>( 2064 Target->Ctx->getConstantInt32(Offset + 4))); 2065 Sandboxer(Target).lw(Reg, AddrLo); 2066 Target->_mov(DestLo, Reg); 2067 Sandboxer(Target).lw(Reg, AddrHi); 2068 Target->_mov(DestHi, Reg); 2069 } else { 2070 OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create( 2071 Target->Func, DestTy, Base, 2072 llvm::cast<ConstantInteger32>( 2073 Target->Ctx->getConstantInt32(Offset))); 2074 OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr); 2075 OperandMIPS32Mem *TAddrHi = OperandMIPS32Mem::create( 2076 Target->Func, DestTy, Base, 2077 llvm::cast<ConstantInteger32>( 2078 Target->Ctx->getConstantInt32(Offset + 4))); 2079 // FP arguments are passed in GP reg if first argument is in GP. 2080 // In this case type of the Dest is still FP thus we need to 2081 // explicitly generate lw instead of lwc1. 2082 if (DestTy == IceType_f32 && IsDstGPReg) { 2083 Variable *DstGPR = Target->makeReg(IceType_i32, RegNum); 2084 Sandboxer(Target).lw(DstGPR, Addr); 2085 } else if (DestTy == IceType_f64 && IsDstGPReg) { 2086 Variable *DstGPRHi = Target->makeReg( 2087 IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum)); 2088 Variable *DstGPRLo = Target->makeReg( 2089 IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum)); 2090 Sandboxer(Target).lw(DstGPRHi, Addr); 2091 OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi); 2092 Sandboxer(Target).lw(DstGPRLo, AddrHi); 2093 } else if (DestTy == IceType_f64 && IsDstGPReg) { 2094 const auto FirstReg = 2095 (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum(); 2096 const auto SecondReg = 2097 (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum(); 2098 Variable *DstGPRHi = Target->makeReg(IceType_i32, FirstReg); 2099 Variable *DstGPRLo = Target->makeReg(IceType_i32, SecondReg); 2100 Sandboxer(Target).lw(DstGPRLo, Addr); 2101 OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi); 2102 Sandboxer(Target).lw(DstGPRHi, AddrHi); 2103 } else { 2104 Sandboxer(Target).lw(Dest, Addr); 2105 } 2106 } 2107 Legalized = true; 2108 } 2109 } 2110 } 2111 2112 if (Legalized) { 2113 if (MovInstr->isDestRedefined()) { 2114 Target->_set_dest_redefined(); 2115 } 2116 MovInstr->setDeleted(); 2117 } 2118 } 2119 2120 OperandMIPS32Mem * 2121 TargetMIPS32::PostLoweringLegalizer::legalizeMemOperand(OperandMIPS32Mem *Mem) { 2122 if (llvm::isa<ConstantRelocatable>(Mem->getOffset())) { 2123 return nullptr; 2124 } 2125 Variable *Base = Mem->getBase(); 2126 auto *Ci32 = llvm::cast<ConstantInteger32>(Mem->getOffset()); 2127 int32_t Offset = Ci32->getValue(); 2128 2129 if (Base->isRematerializable()) { 2130 const int32_t ExtraOffset = 2131 (Base->getRegNum() == Target->getFrameOrStackReg()) 2132 ? Target->getFrameFixedAllocaOffset() 2133 : 0; 2134 Offset += Base->getStackOffset() + ExtraOffset; 2135 Base = Target->getPhysicalRegister(Base->getRegNum()); 2136 } 2137 2138 constexpr bool SignExt = true; 2139 if (!OperandMIPS32Mem::canHoldOffset(Mem->getType(), SignExt, Offset)) { 2140 Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg()); 2141 Offset = 0; 2142 } 2143 2144 return OperandMIPS32Mem::create( 2145 Target->Func, Mem->getType(), Base, 2146 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset))); 2147 } 2148 2149 Variable *TargetMIPS32::PostLoweringLegalizer::legalizeImmediate(int32_t Imm) { 2150 Variable *Reg = nullptr; 2151 if (!((std::numeric_limits<int16_t>::min() <= Imm) && 2152 (Imm <= std::numeric_limits<int16_t>::max()))) { 2153 const uint32_t UpperBits = (Imm >> 16) & 0xFFFF; 2154 const uint32_t LowerBits = Imm & 0xFFFF; 2155 Variable *TReg = Target->makeReg(IceType_i32, Target->getReservedTmpReg()); 2156 Reg = Target->makeReg(IceType_i32, Target->getReservedTmpReg()); 2157 if (LowerBits) { 2158 Target->_lui(TReg, Target->Ctx->getConstantInt32(UpperBits)); 2159 Target->_ori(Reg, TReg, LowerBits); 2160 } else { 2161 Target->_lui(Reg, Target->Ctx->getConstantInt32(UpperBits)); 2162 } 2163 } 2164 return Reg; 2165 } 2166 2167 void TargetMIPS32::postLowerLegalization() { 2168 Func->dump("Before postLowerLegalization"); 2169 assert(hasComputedFrame()); 2170 for (CfgNode *Node : Func->getNodes()) { 2171 Context.init(Node); 2172 PostLoweringLegalizer Legalizer(this); 2173 while (!Context.atEnd()) { 2174 PostIncrLoweringContext PostIncrement(Context); 2175 Inst *CurInstr = iteratorToInst(Context.getCur()); 2176 const SizeT NumSrcs = CurInstr->getSrcSize(); 2177 Operand *Src0 = NumSrcs < 1 ? nullptr : CurInstr->getSrc(0); 2178 Operand *Src1 = NumSrcs < 2 ? nullptr : CurInstr->getSrc(1); 2179 auto *Src0V = llvm::dyn_cast_or_null<Variable>(Src0); 2180 auto *Src0M = llvm::dyn_cast_or_null<OperandMIPS32Mem>(Src0); 2181 auto *Src1M = llvm::dyn_cast_or_null<OperandMIPS32Mem>(Src1); 2182 Variable *Dst = CurInstr->getDest(); 2183 if (auto *MovInstr = llvm::dyn_cast<InstMIPS32Mov>(CurInstr)) { 2184 Legalizer.legalizeMov(MovInstr); 2185 continue; 2186 } 2187 if (auto *MovInstr = llvm::dyn_cast<InstMIPS32MovFP64ToI64>(CurInstr)) { 2188 Legalizer.legalizeMovFp(MovInstr); 2189 continue; 2190 } 2191 if (llvm::isa<InstMIPS32Sw>(CurInstr)) { 2192 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) { 2193 Sandboxer(this).sw(Src0V, LegalMem); 2194 CurInstr->setDeleted(); 2195 } 2196 continue; 2197 } 2198 if (llvm::isa<InstMIPS32Swc1>(CurInstr)) { 2199 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) { 2200 _swc1(Src0V, LegalMem); 2201 CurInstr->setDeleted(); 2202 } 2203 continue; 2204 } 2205 if (llvm::isa<InstMIPS32Sdc1>(CurInstr)) { 2206 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) { 2207 _sdc1(Src0V, LegalMem); 2208 CurInstr->setDeleted(); 2209 } 2210 continue; 2211 } 2212 if (llvm::isa<InstMIPS32Lw>(CurInstr)) { 2213 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) { 2214 Sandboxer(this).lw(Dst, LegalMem); 2215 CurInstr->setDeleted(); 2216 } 2217 continue; 2218 } 2219 if (llvm::isa<InstMIPS32Lwc1>(CurInstr)) { 2220 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) { 2221 _lwc1(Dst, LegalMem); 2222 CurInstr->setDeleted(); 2223 } 2224 continue; 2225 } 2226 if (llvm::isa<InstMIPS32Ldc1>(CurInstr)) { 2227 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) { 2228 _ldc1(Dst, LegalMem); 2229 CurInstr->setDeleted(); 2230 } 2231 continue; 2232 } 2233 if (auto *AddiuInstr = llvm::dyn_cast<InstMIPS32Addiu>(CurInstr)) { 2234 if (auto *LegalImm = Legalizer.legalizeImmediate( 2235 static_cast<int32_t>(AddiuInstr->getImmediateValue()))) { 2236 _addu(Dst, Src0V, LegalImm); 2237 CurInstr->setDeleted(); 2238 } 2239 continue; 2240 } 2241 } 2242 } 2243 } 2244 2245 Operand *TargetMIPS32::loOperand(Operand *Operand) { 2246 assert(Operand->getType() == IceType_i64); 2247 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) 2248 return Var64On32->getLo(); 2249 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 2250 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue())); 2251 } 2252 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) { 2253 // Conservatively disallow memory operands with side-effects (pre/post 2254 // increment) in case of duplication. 2255 assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset); 2256 return OperandMIPS32Mem::create(Func, IceType_i32, Mem->getBase(), 2257 Mem->getOffset(), Mem->getAddrMode()); 2258 } 2259 llvm_unreachable("Unsupported operand type"); 2260 return nullptr; 2261 } 2262 2263 Operand *TargetMIPS32::getOperandAtIndex(Operand *Operand, Type BaseType, 2264 uint32_t Index) { 2265 if (!isVectorType(Operand->getType())) { 2266 llvm::report_fatal_error("getOperandAtIndex: Operand is not vector"); 2267 return nullptr; 2268 } 2269 2270 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) { 2271 assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset); 2272 Variable *Base = Mem->getBase(); 2273 auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset()); 2274 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4)); 2275 int32_t NextOffsetVal = 2276 Offset->getValue() + (Index * typeWidthInBytes(BaseType)); 2277 constexpr bool NoSignExt = false; 2278 if (!OperandMIPS32Mem::canHoldOffset(BaseType, NoSignExt, NextOffsetVal)) { 2279 Constant *_4 = Ctx->getConstantInt32(4); 2280 Variable *NewBase = Func->makeVariable(Base->getType()); 2281 lowerArithmetic( 2282 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, Base, _4)); 2283 Base = NewBase; 2284 } else { 2285 Offset = 2286 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal)); 2287 } 2288 return OperandMIPS32Mem::create(Func, BaseType, Base, Offset, 2289 Mem->getAddrMode()); 2290 } 2291 2292 if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(Operand)) 2293 return VarVecOn32->getContainers()[Index]; 2294 2295 llvm_unreachable("Unsupported operand type"); 2296 return nullptr; 2297 } 2298 2299 Operand *TargetMIPS32::hiOperand(Operand *Operand) { 2300 assert(Operand->getType() == IceType_i64); 2301 if (Operand->getType() != IceType_i64) 2302 return Operand; 2303 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) 2304 return Var64On32->getHi(); 2305 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 2306 return Ctx->getConstantInt32( 2307 static_cast<uint32_t>(Const->getValue() >> 32)); 2308 } 2309 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) { 2310 // Conservatively disallow memory operands with side-effects 2311 // in case of duplication. 2312 assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset); 2313 const Type SplitType = IceType_i32; 2314 Variable *Base = Mem->getBase(); 2315 auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset()); 2316 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4)); 2317 int32_t NextOffsetVal = Offset->getValue() + 4; 2318 constexpr bool SignExt = false; 2319 if (!OperandMIPS32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) { 2320 // We have to make a temp variable and add 4 to either Base or Offset. 2321 // If we add 4 to Offset, this will convert a non-RegReg addressing 2322 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows 2323 // RegReg addressing modes, prefer adding to base and replacing instead. 2324 // Thus we leave the old offset alone. 2325 Constant *Four = Ctx->getConstantInt32(4); 2326 Variable *NewBase = Func->makeVariable(Base->getType()); 2327 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase, 2328 Base, Four)); 2329 Base = NewBase; 2330 } else { 2331 Offset = 2332 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal)); 2333 } 2334 return OperandMIPS32Mem::create(Func, SplitType, Base, Offset, 2335 Mem->getAddrMode()); 2336 } 2337 llvm_unreachable("Unsupported operand type"); 2338 return nullptr; 2339 } 2340 2341 SmallBitVector TargetMIPS32::getRegisterSet(RegSetMask Include, 2342 RegSetMask Exclude) const { 2343 SmallBitVector Registers(RegMIPS32::Reg_NUM); 2344 2345 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ 2346 isI64Pair, isFP32, isFP64, isVec128, alias_init) \ 2347 if (scratch && (Include & RegSet_CallerSave)) \ 2348 Registers[RegMIPS32::val] = true; \ 2349 if (preserved && (Include & RegSet_CalleeSave)) \ 2350 Registers[RegMIPS32::val] = true; \ 2351 if (stackptr && (Include & RegSet_StackPointer)) \ 2352 Registers[RegMIPS32::val] = true; \ 2353 if (frameptr && (Include & RegSet_FramePointer)) \ 2354 Registers[RegMIPS32::val] = true; \ 2355 if (scratch && (Exclude & RegSet_CallerSave)) \ 2356 Registers[RegMIPS32::val] = false; \ 2357 if (preserved && (Exclude & RegSet_CalleeSave)) \ 2358 Registers[RegMIPS32::val] = false; \ 2359 if (stackptr && (Exclude & RegSet_StackPointer)) \ 2360 Registers[RegMIPS32::val] = false; \ 2361 if (frameptr && (Exclude & RegSet_FramePointer)) \ 2362 Registers[RegMIPS32::val] = false; 2363 2364 REGMIPS32_TABLE 2365 2366 #undef X 2367 2368 if (NeedSandboxing) { 2369 Registers[RegMIPS32::Reg_T6] = false; 2370 Registers[RegMIPS32::Reg_T7] = false; 2371 Registers[RegMIPS32::Reg_T8] = false; 2372 } 2373 return Registers; 2374 } 2375 2376 void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) { 2377 // Conservatively require the stack to be aligned. Some stack adjustment 2378 // operations implemented below assume that the stack is aligned before the 2379 // alloca. All the alloca code ensures that the stack alignment is preserved 2380 // after the alloca. The stack alignment restriction can be relaxed in some 2381 // cases. 2382 NeedsStackAlignment = true; 2383 2384 // For default align=0, set it to the real value 1, to avoid any 2385 // bit-manipulation problems below. 2386 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes()); 2387 2388 // LLVM enforces power of 2 alignment. 2389 assert(llvm::isPowerOf2_32(AlignmentParam)); 2390 assert(llvm::isPowerOf2_32(MIPS32_STACK_ALIGNMENT_BYTES)); 2391 2392 const uint32_t Alignment = 2393 std::max(AlignmentParam, MIPS32_STACK_ALIGNMENT_BYTES); 2394 const bool OverAligned = Alignment > MIPS32_STACK_ALIGNMENT_BYTES; 2395 const bool OptM1 = Func->getOptLevel() == Opt_m1; 2396 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset(); 2397 const bool UseFramePointer = 2398 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; 2399 2400 if (UseFramePointer) 2401 setHasFramePointer(); 2402 2403 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); 2404 2405 Variable *Dest = Instr->getDest(); 2406 Operand *TotalSize = Instr->getSizeInBytes(); 2407 2408 if (const auto *ConstantTotalSize = 2409 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { 2410 const uint32_t Value = 2411 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment); 2412 FixedAllocaSizeBytes += Value; 2413 // Constant size alloca. 2414 if (!UseFramePointer) { 2415 // If we don't need a Frame Pointer, this alloca has a known offset to the 2416 // stack pointer. We don't need adjust the stack pointer, nor assign any 2417 // value to Dest, as Dest is rematerializable. 2418 assert(Dest->isRematerializable()); 2419 Context.insert<InstFakeDef>(Dest); 2420 return; 2421 } 2422 2423 if (Alignment > MIPS32_STACK_ALIGNMENT_BYTES) { 2424 CurrentAllocaOffset = 2425 Utils::applyAlignment(CurrentAllocaOffset, Alignment); 2426 } 2427 auto *T = I32Reg(); 2428 _addiu(T, SP, CurrentAllocaOffset); 2429 _mov(Dest, T); 2430 CurrentAllocaOffset += Value; 2431 return; 2432 2433 } else { 2434 // Non-constant sizes need to be adjusted to the next highest multiple of 2435 // the required alignment at runtime. 2436 VariableAllocaUsed = true; 2437 VariableAllocaAlignBytes = AlignmentParam; 2438 Variable *AlignAmount; 2439 auto *TotalSizeR = legalizeToReg(TotalSize, Legal_Reg); 2440 auto *T1 = I32Reg(); 2441 auto *T2 = I32Reg(); 2442 auto *T3 = I32Reg(); 2443 auto *T4 = I32Reg(); 2444 auto *T5 = I32Reg(); 2445 _addiu(T1, TotalSizeR, MIPS32_STACK_ALIGNMENT_BYTES - 1); 2446 _addiu(T2, getZero(), -MIPS32_STACK_ALIGNMENT_BYTES); 2447 _and(T3, T1, T2); 2448 _subu(T4, SP, T3); 2449 if (Instr->getAlignInBytes()) { 2450 AlignAmount = 2451 legalizeToReg(Ctx->getConstantInt32(-AlignmentParam), Legal_Reg); 2452 _and(T5, T4, AlignAmount); 2453 _mov(Dest, T5); 2454 } else { 2455 _mov(Dest, T4); 2456 } 2457 if (OptM1) 2458 _mov(SP, Dest); 2459 else 2460 Sandboxer(this).reset_sp(Dest); 2461 return; 2462 } 2463 } 2464 2465 void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr, 2466 Variable *Dest, Operand *Src0, 2467 Operand *Src1) { 2468 InstArithmetic::OpKind Op = Instr->getOp(); 2469 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2470 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2471 Variable *Src0LoR = nullptr; 2472 Variable *Src1LoR = nullptr; 2473 Variable *Src0HiR = nullptr; 2474 Variable *Src1HiR = nullptr; 2475 2476 switch (Op) { 2477 case InstArithmetic::_num: 2478 llvm::report_fatal_error("Unknown arithmetic operator"); 2479 return; 2480 case InstArithmetic::Add: { 2481 Src0LoR = legalizeToReg(loOperand(Src0)); 2482 Src1LoR = legalizeToReg(loOperand(Src1)); 2483 Src0HiR = legalizeToReg(hiOperand(Src0)); 2484 Src1HiR = legalizeToReg(hiOperand(Src1)); 2485 auto *T_Carry = I32Reg(), *T_Lo = I32Reg(), *T_Hi = I32Reg(), 2486 *T_Hi2 = I32Reg(); 2487 _addu(T_Lo, Src0LoR, Src1LoR); 2488 _mov(DestLo, T_Lo); 2489 _sltu(T_Carry, T_Lo, Src0LoR); 2490 _addu(T_Hi, T_Carry, Src0HiR); 2491 _addu(T_Hi2, Src1HiR, T_Hi); 2492 _mov(DestHi, T_Hi2); 2493 return; 2494 } 2495 case InstArithmetic::And: { 2496 Src0LoR = legalizeToReg(loOperand(Src0)); 2497 Src1LoR = legalizeToReg(loOperand(Src1)); 2498 Src0HiR = legalizeToReg(hiOperand(Src0)); 2499 Src1HiR = legalizeToReg(hiOperand(Src1)); 2500 auto *T_Lo = I32Reg(), *T_Hi = I32Reg(); 2501 _and(T_Lo, Src0LoR, Src1LoR); 2502 _mov(DestLo, T_Lo); 2503 _and(T_Hi, Src0HiR, Src1HiR); 2504 _mov(DestHi, T_Hi); 2505 return; 2506 } 2507 case InstArithmetic::Sub: { 2508 Src0LoR = legalizeToReg(loOperand(Src0)); 2509 Src1LoR = legalizeToReg(loOperand(Src1)); 2510 Src0HiR = legalizeToReg(hiOperand(Src0)); 2511 Src1HiR = legalizeToReg(hiOperand(Src1)); 2512 auto *T_Borrow = I32Reg(), *T_Lo = I32Reg(), *T_Hi = I32Reg(), 2513 *T_Hi2 = I32Reg(); 2514 _subu(T_Lo, Src0LoR, Src1LoR); 2515 _mov(DestLo, T_Lo); 2516 _sltu(T_Borrow, Src0LoR, Src1LoR); 2517 _addu(T_Hi, T_Borrow, Src1HiR); 2518 _subu(T_Hi2, Src0HiR, T_Hi); 2519 _mov(DestHi, T_Hi2); 2520 return; 2521 } 2522 case InstArithmetic::Or: { 2523 Src0LoR = legalizeToReg(loOperand(Src0)); 2524 Src1LoR = legalizeToReg(loOperand(Src1)); 2525 Src0HiR = legalizeToReg(hiOperand(Src0)); 2526 Src1HiR = legalizeToReg(hiOperand(Src1)); 2527 auto *T_Lo = I32Reg(), *T_Hi = I32Reg(); 2528 _or(T_Lo, Src0LoR, Src1LoR); 2529 _mov(DestLo, T_Lo); 2530 _or(T_Hi, Src0HiR, Src1HiR); 2531 _mov(DestHi, T_Hi); 2532 return; 2533 } 2534 case InstArithmetic::Xor: { 2535 Src0LoR = legalizeToReg(loOperand(Src0)); 2536 Src1LoR = legalizeToReg(loOperand(Src1)); 2537 Src0HiR = legalizeToReg(hiOperand(Src0)); 2538 Src1HiR = legalizeToReg(hiOperand(Src1)); 2539 auto *T_Lo = I32Reg(), *T_Hi = I32Reg(); 2540 _xor(T_Lo, Src0LoR, Src1LoR); 2541 _mov(DestLo, T_Lo); 2542 _xor(T_Hi, Src0HiR, Src1HiR); 2543 _mov(DestHi, T_Hi); 2544 return; 2545 } 2546 case InstArithmetic::Mul: { 2547 // TODO(rkotler): Make sure that mul has the side effect of clobbering 2548 // LO, HI. Check for any other LO, HI quirkiness in this section. 2549 Src0LoR = legalizeToReg(loOperand(Src0)); 2550 Src1LoR = legalizeToReg(loOperand(Src1)); 2551 Src0HiR = legalizeToReg(hiOperand(Src0)); 2552 Src1HiR = legalizeToReg(hiOperand(Src1)); 2553 auto *T_Lo = I32Reg(RegMIPS32::Reg_LO), *T_Hi = I32Reg(RegMIPS32::Reg_HI); 2554 auto *T1 = I32Reg(), *T2 = I32Reg(); 2555 auto *TM1 = I32Reg(), *TM2 = I32Reg(), *TM3 = I32Reg(), *TM4 = I32Reg(); 2556 _multu(T_Lo, Src0LoR, Src1LoR); 2557 Context.insert<InstFakeDef>(T_Hi, T_Lo); 2558 _mflo(T1, T_Lo); 2559 _mfhi(T2, T_Hi); 2560 _mov(DestLo, T1); 2561 _mul(TM1, Src0HiR, Src1LoR); 2562 _mul(TM2, Src0LoR, Src1HiR); 2563 _addu(TM3, TM1, T2); 2564 _addu(TM4, TM3, TM2); 2565 _mov(DestHi, TM4); 2566 return; 2567 } 2568 case InstArithmetic::Shl: { 2569 auto *T_Lo = I32Reg(); 2570 auto *T_Hi = I32Reg(); 2571 auto *T1_Lo = I32Reg(); 2572 auto *T1_Hi = I32Reg(); 2573 auto *T1 = I32Reg(); 2574 auto *T2 = I32Reg(); 2575 auto *T3 = I32Reg(); 2576 auto *T4 = I32Reg(); 2577 auto *T5 = I32Reg(); 2578 2579 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) { 2580 Src0LoR = legalizeToReg(loOperand(Src0)); 2581 int64_t ShiftAmount = Const->getValue(); 2582 if (ShiftAmount == 1) { 2583 Src0HiR = legalizeToReg(hiOperand(Src0)); 2584 _addu(T_Lo, Src0LoR, Src0LoR); 2585 _sltu(T1, T_Lo, Src0LoR); 2586 _addu(T2, T1, Src0HiR); 2587 _addu(T_Hi, Src0HiR, T2); 2588 } else if (ShiftAmount < INT32_BITS) { 2589 Src0HiR = legalizeToReg(hiOperand(Src0)); 2590 _srl(T1, Src0LoR, INT32_BITS - ShiftAmount); 2591 _sll(T2, Src0HiR, ShiftAmount); 2592 _or(T_Hi, T1, T2); 2593 _sll(T_Lo, Src0LoR, ShiftAmount); 2594 } else if (ShiftAmount == INT32_BITS) { 2595 _addiu(T_Lo, getZero(), 0); 2596 _mov(T_Hi, Src0LoR); 2597 } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) { 2598 _sll(T_Hi, Src0LoR, ShiftAmount - INT32_BITS); 2599 _addiu(T_Lo, getZero(), 0); 2600 } 2601 _mov(DestLo, T_Lo); 2602 _mov(DestHi, T_Hi); 2603 return; 2604 } 2605 2606 Src0LoR = legalizeToReg(loOperand(Src0)); 2607 Src1LoR = legalizeToReg(loOperand(Src1)); 2608 Src0HiR = legalizeToReg(hiOperand(Src0)); 2609 2610 _sllv(T1, Src0HiR, Src1LoR); 2611 _not(T2, Src1LoR); 2612 _srl(T3, Src0LoR, 1); 2613 _srlv(T4, T3, T2); 2614 _or(T_Hi, T1, T4); 2615 _sllv(T_Lo, Src0LoR, Src1LoR); 2616 2617 _mov(T1_Hi, T_Hi); 2618 _mov(T1_Lo, T_Lo); 2619 _andi(T5, Src1LoR, INT32_BITS); 2620 _movn(T1_Hi, T_Lo, T5); 2621 _movn(T1_Lo, getZero(), T5); 2622 _mov(DestHi, T1_Hi); 2623 _mov(DestLo, T1_Lo); 2624 return; 2625 } 2626 case InstArithmetic::Lshr: { 2627 2628 auto *T_Lo = I32Reg(); 2629 auto *T_Hi = I32Reg(); 2630 auto *T1_Lo = I32Reg(); 2631 auto *T1_Hi = I32Reg(); 2632 auto *T1 = I32Reg(); 2633 auto *T2 = I32Reg(); 2634 auto *T3 = I32Reg(); 2635 auto *T4 = I32Reg(); 2636 auto *T5 = I32Reg(); 2637 2638 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) { 2639 Src0HiR = legalizeToReg(hiOperand(Src0)); 2640 int64_t ShiftAmount = Const->getValue(); 2641 if (ShiftAmount < INT32_BITS) { 2642 Src0LoR = legalizeToReg(loOperand(Src0)); 2643 _sll(T1, Src0HiR, INT32_BITS - ShiftAmount); 2644 _srl(T2, Src0LoR, ShiftAmount); 2645 _or(T_Lo, T1, T2); 2646 _srl(T_Hi, Src0HiR, ShiftAmount); 2647 } else if (ShiftAmount == INT32_BITS) { 2648 _mov(T_Lo, Src0HiR); 2649 _addiu(T_Hi, getZero(), 0); 2650 } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) { 2651 _srl(T_Lo, Src0HiR, ShiftAmount - INT32_BITS); 2652 _addiu(T_Hi, getZero(), 0); 2653 } 2654 _mov(DestLo, T_Lo); 2655 _mov(DestHi, T_Hi); 2656 return; 2657 } 2658 2659 Src0LoR = legalizeToReg(loOperand(Src0)); 2660 Src1LoR = legalizeToReg(loOperand(Src1)); 2661 Src0HiR = legalizeToReg(hiOperand(Src0)); 2662 2663 _srlv(T1, Src0LoR, Src1LoR); 2664 _not(T2, Src1LoR); 2665 _sll(T3, Src0HiR, 1); 2666 _sllv(T4, T3, T2); 2667 _or(T_Lo, T1, T4); 2668 _srlv(T_Hi, Src0HiR, Src1LoR); 2669 2670 _mov(T1_Hi, T_Hi); 2671 _mov(T1_Lo, T_Lo); 2672 _andi(T5, Src1LoR, INT32_BITS); 2673 _movn(T1_Lo, T_Hi, T5); 2674 _movn(T1_Hi, getZero(), T5); 2675 _mov(DestHi, T1_Hi); 2676 _mov(DestLo, T1_Lo); 2677 return; 2678 } 2679 case InstArithmetic::Ashr: { 2680 2681 auto *T_Lo = I32Reg(); 2682 auto *T_Hi = I32Reg(); 2683 auto *T1_Lo = I32Reg(); 2684 auto *T1_Hi = I32Reg(); 2685 auto *T1 = I32Reg(); 2686 auto *T2 = I32Reg(); 2687 auto *T3 = I32Reg(); 2688 auto *T4 = I32Reg(); 2689 auto *T5 = I32Reg(); 2690 auto *T6 = I32Reg(); 2691 2692 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) { 2693 Src0HiR = legalizeToReg(hiOperand(Src0)); 2694 int64_t ShiftAmount = Const->getValue(); 2695 if (ShiftAmount < INT32_BITS) { 2696 Src0LoR = legalizeToReg(loOperand(Src0)); 2697 _sll(T1, Src0HiR, INT32_BITS - ShiftAmount); 2698 _srl(T2, Src0LoR, ShiftAmount); 2699 _or(T_Lo, T1, T2); 2700 _sra(T_Hi, Src0HiR, ShiftAmount); 2701 } else if (ShiftAmount == INT32_BITS) { 2702 _sra(T_Hi, Src0HiR, INT32_BITS - 1); 2703 _mov(T_Lo, Src0HiR); 2704 } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) { 2705 _sra(T_Lo, Src0HiR, ShiftAmount - INT32_BITS); 2706 _sra(T_Hi, Src0HiR, INT32_BITS - 1); 2707 } 2708 _mov(DestLo, T_Lo); 2709 _mov(DestHi, T_Hi); 2710 return; 2711 } 2712 2713 Src0LoR = legalizeToReg(loOperand(Src0)); 2714 Src1LoR = legalizeToReg(loOperand(Src1)); 2715 Src0HiR = legalizeToReg(hiOperand(Src0)); 2716 2717 _srlv(T1, Src0LoR, Src1LoR); 2718 _not(T2, Src1LoR); 2719 _sll(T3, Src0HiR, 1); 2720 _sllv(T4, T3, T2); 2721 _or(T_Lo, T1, T4); 2722 _srav(T_Hi, Src0HiR, Src1LoR); 2723 2724 _mov(T1_Hi, T_Hi); 2725 _mov(T1_Lo, T_Lo); 2726 _andi(T5, Src1LoR, INT32_BITS); 2727 _movn(T1_Lo, T_Hi, T5); 2728 _sra(T6, Src0HiR, INT32_BITS - 1); 2729 _movn(T1_Hi, T6, T5); 2730 _mov(DestHi, T1_Hi); 2731 _mov(DestLo, T1_Lo); 2732 return; 2733 } 2734 case InstArithmetic::Fadd: 2735 case InstArithmetic::Fsub: 2736 case InstArithmetic::Fmul: 2737 case InstArithmetic::Fdiv: 2738 case InstArithmetic::Frem: 2739 llvm::report_fatal_error("FP instruction with i64 type"); 2740 return; 2741 case InstArithmetic::Udiv: 2742 case InstArithmetic::Sdiv: 2743 case InstArithmetic::Urem: 2744 case InstArithmetic::Srem: 2745 llvm::report_fatal_error("64-bit div and rem should have been prelowered"); 2746 return; 2747 } 2748 } 2749 2750 void TargetMIPS32::lowerArithmetic(const InstArithmetic *Instr) { 2751 Variable *Dest = Instr->getDest(); 2752 2753 if (Dest->isRematerializable()) { 2754 Context.insert<InstFakeDef>(Dest); 2755 return; 2756 } 2757 2758 // We need to signal all the UnimplementedLoweringError errors before any 2759 // legalization into new variables, otherwise Om1 register allocation may fail 2760 // when it sees variables that are defined but not used. 2761 Type DestTy = Dest->getType(); 2762 Operand *Src0 = legalizeUndef(Instr->getSrc(0)); 2763 Operand *Src1 = legalizeUndef(Instr->getSrc(1)); 2764 if (DestTy == IceType_i64) { 2765 lowerInt64Arithmetic(Instr, Instr->getDest(), Src0, Src1); 2766 return; 2767 } 2768 if (isVectorType(Dest->getType())) { 2769 llvm::report_fatal_error("Arithmetic: Destination type is vector"); 2770 return; 2771 } 2772 2773 Variable *T = makeReg(Dest->getType()); 2774 Variable *Src0R = legalizeToReg(Src0); 2775 Variable *Src1R = nullptr; 2776 uint32_t Value = 0; 2777 bool IsSrc1Imm16 = false; 2778 2779 switch (Instr->getOp()) { 2780 case InstArithmetic::Add: 2781 case InstArithmetic::Sub: { 2782 auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1); 2783 if (Const32 != nullptr && isInt<16>(int32_t(Const32->getValue()))) { 2784 IsSrc1Imm16 = true; 2785 Value = Const32->getValue(); 2786 } else { 2787 Src1R = legalizeToReg(Src1); 2788 } 2789 break; 2790 } 2791 case InstArithmetic::And: 2792 case InstArithmetic::Or: 2793 case InstArithmetic::Xor: 2794 case InstArithmetic::Shl: 2795 case InstArithmetic::Lshr: 2796 case InstArithmetic::Ashr: { 2797 auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1); 2798 if (Const32 != nullptr && llvm::isUInt<16>(uint32_t(Const32->getValue()))) { 2799 IsSrc1Imm16 = true; 2800 Value = Const32->getValue(); 2801 } else { 2802 Src1R = legalizeToReg(Src1); 2803 } 2804 break; 2805 } 2806 default: 2807 Src1R = legalizeToReg(Src1); 2808 break; 2809 } 2810 constexpr uint32_t DivideByZeroTrapCode = 7; 2811 2812 switch (Instr->getOp()) { 2813 case InstArithmetic::_num: 2814 break; 2815 case InstArithmetic::Add: { 2816 auto *T0R = Src0R; 2817 auto *T1R = Src1R; 2818 if (Dest->getType() != IceType_i32) { 2819 T0R = makeReg(IceType_i32); 2820 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R)); 2821 if (!IsSrc1Imm16) { 2822 T1R = makeReg(IceType_i32); 2823 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R)); 2824 } 2825 } 2826 if (IsSrc1Imm16) { 2827 _addiu(T, T0R, Value); 2828 } else { 2829 _addu(T, T0R, T1R); 2830 } 2831 _mov(Dest, T); 2832 return; 2833 } 2834 case InstArithmetic::And: 2835 if (IsSrc1Imm16) { 2836 _andi(T, Src0R, Value); 2837 } else { 2838 _and(T, Src0R, Src1R); 2839 } 2840 _mov(Dest, T); 2841 return; 2842 case InstArithmetic::Or: 2843 if (IsSrc1Imm16) { 2844 _ori(T, Src0R, Value); 2845 } else { 2846 _or(T, Src0R, Src1R); 2847 } 2848 _mov(Dest, T); 2849 return; 2850 case InstArithmetic::Xor: 2851 if (IsSrc1Imm16) { 2852 _xori(T, Src0R, Value); 2853 } else { 2854 _xor(T, Src0R, Src1R); 2855 } 2856 _mov(Dest, T); 2857 return; 2858 case InstArithmetic::Sub: { 2859 auto *T0R = Src0R; 2860 auto *T1R = Src1R; 2861 if (Dest->getType() != IceType_i32) { 2862 T0R = makeReg(IceType_i32); 2863 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R)); 2864 if (!IsSrc1Imm16) { 2865 T1R = makeReg(IceType_i32); 2866 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R)); 2867 } 2868 } 2869 if (IsSrc1Imm16) { 2870 _addiu(T, T0R, -Value); 2871 } else { 2872 _subu(T, T0R, T1R); 2873 } 2874 _mov(Dest, T); 2875 return; 2876 } 2877 case InstArithmetic::Mul: { 2878 _mul(T, Src0R, Src1R); 2879 _mov(Dest, T); 2880 return; 2881 } 2882 case InstArithmetic::Shl: { 2883 if (IsSrc1Imm16) { 2884 _sll(T, Src0R, Value); 2885 } else { 2886 _sllv(T, Src0R, Src1R); 2887 } 2888 _mov(Dest, T); 2889 return; 2890 } 2891 case InstArithmetic::Lshr: { 2892 auto *T0R = Src0R; 2893 auto *T1R = Src1R; 2894 if (Dest->getType() != IceType_i32) { 2895 T0R = makeReg(IceType_i32); 2896 lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R)); 2897 if (!IsSrc1Imm16) { 2898 T1R = makeReg(IceType_i32); 2899 lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R)); 2900 } 2901 } 2902 if (IsSrc1Imm16) { 2903 _srl(T, T0R, Value); 2904 } else { 2905 _srlv(T, T0R, T1R); 2906 } 2907 _mov(Dest, T); 2908 return; 2909 } 2910 case InstArithmetic::Ashr: { 2911 auto *T0R = Src0R; 2912 auto *T1R = Src1R; 2913 if (Dest->getType() != IceType_i32) { 2914 T0R = makeReg(IceType_i32); 2915 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R)); 2916 if (!IsSrc1Imm16) { 2917 T1R = makeReg(IceType_i32); 2918 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R)); 2919 } 2920 } 2921 if (IsSrc1Imm16) { 2922 _sra(T, T0R, Value); 2923 } else { 2924 _srav(T, T0R, T1R); 2925 } 2926 _mov(Dest, T); 2927 return; 2928 } 2929 case InstArithmetic::Udiv: { 2930 auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO); 2931 auto *T0R = Src0R; 2932 auto *T1R = Src1R; 2933 if (Dest->getType() != IceType_i32) { 2934 T0R = makeReg(IceType_i32); 2935 lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R)); 2936 T1R = makeReg(IceType_i32); 2937 lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R)); 2938 } 2939 _divu(T_Zero, T0R, T1R); 2940 _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero 2941 _mflo(T, T_Zero); 2942 _mov(Dest, T); 2943 return; 2944 } 2945 case InstArithmetic::Sdiv: { 2946 auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO); 2947 auto *T0R = Src0R; 2948 auto *T1R = Src1R; 2949 if (Dest->getType() != IceType_i32) { 2950 T0R = makeReg(IceType_i32); 2951 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R)); 2952 T1R = makeReg(IceType_i32); 2953 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R)); 2954 } 2955 _div(T_Zero, T0R, T1R); 2956 _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero 2957 _mflo(T, T_Zero); 2958 _mov(Dest, T); 2959 return; 2960 } 2961 case InstArithmetic::Urem: { 2962 auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO); 2963 auto *T0R = Src0R; 2964 auto *T1R = Src1R; 2965 if (Dest->getType() != IceType_i32) { 2966 T0R = makeReg(IceType_i32); 2967 lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R)); 2968 T1R = makeReg(IceType_i32); 2969 lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R)); 2970 } 2971 _divu(T_Zero, T0R, T1R); 2972 _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero 2973 _mfhi(T, T_Zero); 2974 _mov(Dest, T); 2975 return; 2976 } 2977 case InstArithmetic::Srem: { 2978 auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO); 2979 auto *T0R = Src0R; 2980 auto *T1R = Src1R; 2981 if (Dest->getType() != IceType_i32) { 2982 T0R = makeReg(IceType_i32); 2983 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R)); 2984 T1R = makeReg(IceType_i32); 2985 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R)); 2986 } 2987 _div(T_Zero, T0R, T1R); 2988 _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero 2989 _mfhi(T, T_Zero); 2990 _mov(Dest, T); 2991 return; 2992 } 2993 case InstArithmetic::Fadd: { 2994 if (DestTy == IceType_f32) { 2995 _add_s(T, Src0R, Src1R); 2996 _mov(Dest, T); 2997 return; 2998 } 2999 if (DestTy == IceType_f64) { 3000 _add_d(T, Src0R, Src1R); 3001 _mov(Dest, T); 3002 return; 3003 } 3004 break; 3005 } 3006 case InstArithmetic::Fsub: 3007 if (DestTy == IceType_f32) { 3008 _sub_s(T, Src0R, Src1R); 3009 _mov(Dest, T); 3010 return; 3011 } 3012 if (DestTy == IceType_f64) { 3013 _sub_d(T, Src0R, Src1R); 3014 _mov(Dest, T); 3015 return; 3016 } 3017 break; 3018 case InstArithmetic::Fmul: 3019 if (DestTy == IceType_f32) { 3020 _mul_s(T, Src0R, Src1R); 3021 _mov(Dest, T); 3022 return; 3023 } 3024 if (DestTy == IceType_f64) { 3025 _mul_d(T, Src0R, Src1R); 3026 _mov(Dest, T); 3027 return; 3028 } 3029 break; 3030 case InstArithmetic::Fdiv: 3031 if (DestTy == IceType_f32) { 3032 _div_s(T, Src0R, Src1R); 3033 _mov(Dest, T); 3034 return; 3035 } 3036 if (DestTy == IceType_f64) { 3037 _div_d(T, Src0R, Src1R); 3038 _mov(Dest, T); 3039 return; 3040 } 3041 break; 3042 case InstArithmetic::Frem: 3043 llvm::report_fatal_error("frem should have been prelowered."); 3044 break; 3045 } 3046 llvm::report_fatal_error("Unknown arithmetic operator"); 3047 } 3048 3049 void TargetMIPS32::lowerAssign(const InstAssign *Instr) { 3050 Variable *Dest = Instr->getDest(); 3051 3052 if (Dest->isRematerializable()) { 3053 Context.insert<InstFakeDef>(Dest); 3054 return; 3055 } 3056 3057 // Source type may not be same as destination 3058 if (isVectorType(Dest->getType())) { 3059 Operand *Src0 = legalizeUndef(Instr->getSrc(0)); 3060 auto *DstVec = llvm::dyn_cast<VariableVecOn32>(Dest); 3061 for (SizeT i = 0; i < DstVec->ContainersPerVector; ++i) { 3062 auto *DCont = DstVec->getContainers()[i]; 3063 auto *SCont = 3064 legalize(getOperandAtIndex(Src0, IceType_i32, i), Legal_Reg); 3065 auto *TReg = makeReg(IceType_i32); 3066 _mov(TReg, SCont); 3067 _mov(DCont, TReg); 3068 } 3069 return; 3070 } 3071 Operand *Src0 = Instr->getSrc(0); 3072 assert(Dest->getType() == Src0->getType()); 3073 if (Dest->getType() == IceType_i64) { 3074 Src0 = legalizeUndef(Src0); 3075 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg); 3076 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg); 3077 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3078 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3079 auto *T_Lo = I32Reg(), *T_Hi = I32Reg(); 3080 _mov(T_Lo, Src0Lo); 3081 _mov(DestLo, T_Lo); 3082 _mov(T_Hi, Src0Hi); 3083 _mov(DestHi, T_Hi); 3084 return; 3085 } 3086 Operand *SrcR; 3087 if (Dest->hasReg()) { 3088 // If Dest already has a physical register, then legalize the Src operand 3089 // into a Variable with the same register assignment. This especially 3090 // helps allow the use of Flex operands. 3091 SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum()); 3092 } else { 3093 // Dest could be a stack operand. Since we could potentially need 3094 // to do a Store (and store can only have Register operands), 3095 // legalize this to a register. 3096 SrcR = legalize(Src0, Legal_Reg); 3097 } 3098 _mov(Dest, SrcR); 3099 } 3100 3101 void TargetMIPS32::lowerBr(const InstBr *Instr) { 3102 if (Instr->isUnconditional()) { 3103 _br(Instr->getTargetUnconditional()); 3104 return; 3105 } 3106 CfgNode *TargetTrue = Instr->getTargetTrue(); 3107 CfgNode *TargetFalse = Instr->getTargetFalse(); 3108 Operand *Boolean = Instr->getCondition(); 3109 const Inst *Producer = Computations.getProducerOf(Boolean); 3110 if (Producer == nullptr) { 3111 // Since we don't know the producer of this boolean we will assume its 3112 // producer will keep it in positive logic and just emit beqz with this 3113 // Boolean as an operand. 3114 auto *BooleanR = legalizeToReg(Boolean); 3115 _br(TargetTrue, TargetFalse, BooleanR, CondMIPS32::Cond::EQZ); 3116 return; 3117 } 3118 if (Producer->getKind() == Inst::Icmp) { 3119 const InstIcmp *CompareInst = llvm::cast<InstIcmp>(Producer); 3120 Operand *Src0 = CompareInst->getSrc(0); 3121 Operand *Src1 = CompareInst->getSrc(1); 3122 const Type Src0Ty = Src0->getType(); 3123 assert(Src0Ty == Src1->getType()); 3124 3125 Variable *Src0R = nullptr; 3126 Variable *Src1R = nullptr; 3127 Variable *Src0HiR = nullptr; 3128 Variable *Src1HiR = nullptr; 3129 if (Src0Ty == IceType_i64) { 3130 Src0R = legalizeToReg(loOperand(Src0)); 3131 Src1R = legalizeToReg(loOperand(Src1)); 3132 Src0HiR = legalizeToReg(hiOperand(Src0)); 3133 Src1HiR = legalizeToReg(hiOperand(Src1)); 3134 } else { 3135 auto *Src0RT = legalizeToReg(Src0); 3136 auto *Src1RT = legalizeToReg(Src1); 3137 // Sign/Zero extend the source operands 3138 if (Src0Ty != IceType_i32) { 3139 InstCast::OpKind CastKind; 3140 switch (CompareInst->getCondition()) { 3141 case InstIcmp::Eq: 3142 case InstIcmp::Ne: 3143 case InstIcmp::Sgt: 3144 case InstIcmp::Sge: 3145 case InstIcmp::Slt: 3146 case InstIcmp::Sle: 3147 CastKind = InstCast::Sext; 3148 break; 3149 default: 3150 CastKind = InstCast::Zext; 3151 break; 3152 } 3153 Src0R = makeReg(IceType_i32); 3154 Src1R = makeReg(IceType_i32); 3155 lowerCast(InstCast::create(Func, CastKind, Src0R, Src0RT)); 3156 lowerCast(InstCast::create(Func, CastKind, Src1R, Src1RT)); 3157 } else { 3158 Src0R = Src0RT; 3159 Src1R = Src1RT; 3160 } 3161 } 3162 auto *DestT = makeReg(IceType_i32); 3163 3164 switch (CompareInst->getCondition()) { 3165 default: 3166 llvm_unreachable("unexpected condition"); 3167 return; 3168 case InstIcmp::Eq: { 3169 if (Src0Ty == IceType_i64) { 3170 auto *T1 = I32Reg(); 3171 auto *T2 = I32Reg(); 3172 auto *T3 = I32Reg(); 3173 _xor(T1, Src0HiR, Src1HiR); 3174 _xor(T2, Src0R, Src1R); 3175 _or(T3, T1, T2); 3176 _mov(DestT, T3); 3177 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ); 3178 } else { 3179 _br(TargetTrue, TargetFalse, Src0R, Src1R, CondMIPS32::Cond::NE); 3180 } 3181 return; 3182 } 3183 case InstIcmp::Ne: { 3184 if (Src0Ty == IceType_i64) { 3185 auto *T1 = I32Reg(); 3186 auto *T2 = I32Reg(); 3187 auto *T3 = I32Reg(); 3188 _xor(T1, Src0HiR, Src1HiR); 3189 _xor(T2, Src0R, Src1R); 3190 _or(T3, T1, T2); 3191 _mov(DestT, T3); 3192 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ); 3193 } else { 3194 _br(TargetTrue, TargetFalse, Src0R, Src1R, CondMIPS32::Cond::EQ); 3195 } 3196 return; 3197 } 3198 case InstIcmp::Ugt: { 3199 if (Src0Ty == IceType_i64) { 3200 auto *T1 = I32Reg(); 3201 auto *T2 = I32Reg(); 3202 auto *T3 = I32Reg(); 3203 auto *T4 = I32Reg(); 3204 auto *T5 = I32Reg(); 3205 _xor(T1, Src0HiR, Src1HiR); 3206 _sltu(T2, Src1HiR, Src0HiR); 3207 _xori(T3, T2, 1); 3208 _sltu(T4, Src1R, Src0R); 3209 _xori(T5, T4, 1); 3210 _movz(T3, T5, T1); 3211 _mov(DestT, T3); 3212 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ); 3213 } else { 3214 _sltu(DestT, Src1R, Src0R); 3215 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ); 3216 } 3217 return; 3218 } 3219 case InstIcmp::Uge: { 3220 if (Src0Ty == IceType_i64) { 3221 auto *T1 = I32Reg(); 3222 auto *T2 = I32Reg(); 3223 auto *T3 = I32Reg(); 3224 _xor(T1, Src0HiR, Src1HiR); 3225 _sltu(T2, Src0HiR, Src1HiR); 3226 _sltu(T3, Src0R, Src1R); 3227 _movz(T2, T3, T1); 3228 _mov(DestT, T2); 3229 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ); 3230 } else { 3231 _sltu(DestT, Src0R, Src1R); 3232 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ); 3233 } 3234 return; 3235 } 3236 case InstIcmp::Ult: { 3237 if (Src0Ty == IceType_i64) { 3238 auto *T1 = I32Reg(); 3239 auto *T2 = I32Reg(); 3240 auto *T3 = I32Reg(); 3241 auto *T4 = I32Reg(); 3242 auto *T5 = I32Reg(); 3243 _xor(T1, Src0HiR, Src1HiR); 3244 _sltu(T2, Src0HiR, Src1HiR); 3245 _xori(T3, T2, 1); 3246 _sltu(T4, Src0R, Src1R); 3247 _xori(T5, T4, 1); 3248 _movz(T3, T5, T1); 3249 _mov(DestT, T3); 3250 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ); 3251 } else { 3252 _sltu(DestT, Src0R, Src1R); 3253 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ); 3254 } 3255 return; 3256 } 3257 case InstIcmp::Ule: { 3258 if (Src0Ty == IceType_i64) { 3259 auto *T1 = I32Reg(); 3260 auto *T2 = I32Reg(); 3261 auto *T3 = I32Reg(); 3262 _xor(T1, Src0HiR, Src1HiR); 3263 _sltu(T2, Src1HiR, Src0HiR); 3264 _sltu(T3, Src1R, Src0R); 3265 _movz(T2, T3, T1); 3266 _mov(DestT, T2); 3267 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ); 3268 } else { 3269 _sltu(DestT, Src1R, Src0R); 3270 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ); 3271 } 3272 return; 3273 } 3274 case InstIcmp::Sgt: { 3275 if (Src0Ty == IceType_i64) { 3276 auto *T1 = I32Reg(); 3277 auto *T2 = I32Reg(); 3278 auto *T3 = I32Reg(); 3279 auto *T4 = I32Reg(); 3280 auto *T5 = I32Reg(); 3281 _xor(T1, Src0HiR, Src1HiR); 3282 _slt(T2, Src1HiR, Src0HiR); 3283 _xori(T3, T2, 1); 3284 _sltu(T4, Src1R, Src0R); 3285 _xori(T5, T4, 1); 3286 _movz(T3, T5, T1); 3287 _mov(DestT, T3); 3288 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ); 3289 } else { 3290 _slt(DestT, Src1R, Src0R); 3291 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ); 3292 } 3293 return; 3294 } 3295 case InstIcmp::Sge: { 3296 if (Src0Ty == IceType_i64) { 3297 auto *T1 = I32Reg(); 3298 auto *T2 = I32Reg(); 3299 auto *T3 = I32Reg(); 3300 _xor(T1, Src0HiR, Src1HiR); 3301 _slt(T2, Src0HiR, Src1HiR); 3302 _sltu(T3, Src0R, Src1R); 3303 _movz(T2, T3, T1); 3304 _mov(DestT, T2); 3305 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ); 3306 } else { 3307 _slt(DestT, Src0R, Src1R); 3308 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ); 3309 } 3310 return; 3311 } 3312 case InstIcmp::Slt: { 3313 if (Src0Ty == IceType_i64) { 3314 auto *T1 = I32Reg(); 3315 auto *T2 = I32Reg(); 3316 auto *T3 = I32Reg(); 3317 auto *T4 = I32Reg(); 3318 auto *T5 = I32Reg(); 3319 _xor(T1, Src0HiR, Src1HiR); 3320 _slt(T2, Src0HiR, Src1HiR); 3321 _xori(T3, T2, 1); 3322 _sltu(T4, Src0R, Src1R); 3323 _xori(T5, T4, 1); 3324 _movz(T3, T5, T1); 3325 _mov(DestT, T3); 3326 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ); 3327 } else { 3328 _slt(DestT, Src0R, Src1R); 3329 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ); 3330 } 3331 return; 3332 } 3333 case InstIcmp::Sle: { 3334 if (Src0Ty == IceType_i64) { 3335 auto *T1 = I32Reg(); 3336 auto *T2 = I32Reg(); 3337 auto *T3 = I32Reg(); 3338 _xor(T1, Src0HiR, Src1HiR); 3339 _slt(T2, Src1HiR, Src0HiR); 3340 _sltu(T3, Src1R, Src0R); 3341 _movz(T2, T3, T1); 3342 _mov(DestT, T2); 3343 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ); 3344 } else { 3345 _slt(DestT, Src1R, Src0R); 3346 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ); 3347 } 3348 return; 3349 } 3350 } 3351 } 3352 } 3353 3354 void TargetMIPS32::lowerCall(const InstCall *Instr) { 3355 CfgVector<Variable *> RegArgs; 3356 NeedsStackAlignment = true; 3357 3358 // Assign arguments to registers and stack. Also reserve stack. 3359 TargetMIPS32::CallingConv CC; 3360 3361 // Pair of Arg Operand -> GPR number assignments. 3362 llvm::SmallVector<std::pair<Operand *, RegNumT>, MIPS32_MAX_GPR_ARG> GPRArgs; 3363 llvm::SmallVector<std::pair<Operand *, RegNumT>, MIPS32_MAX_FP_ARG> FPArgs; 3364 // Pair of Arg Operand -> stack offset. 3365 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs; 3366 size_t ParameterAreaSizeBytes = 16; 3367 3368 // Classify each argument operand according to the location where the 3369 // argument is passed. 3370 3371 // v4f32 is returned through stack. $4 is setup by the caller and passed as 3372 // first argument implicitly. Callee then copies the return vector at $4. 3373 SizeT ArgNum = 0; 3374 Variable *Dest = Instr->getDest(); 3375 Variable *RetVecFloat = nullptr; 3376 if (Dest && isVectorFloatingType(Dest->getType())) { 3377 ArgNum = 1; 3378 CC.discardReg(RegMIPS32::Reg_A0); 3379 RetVecFloat = Func->makeVariable(IceType_i32); 3380 auto *ByteCount = ConstantInteger32::create(Ctx, IceType_i32, 16); 3381 constexpr SizeT Alignment = 4; 3382 lowerAlloca(InstAlloca::create(Func, RetVecFloat, ByteCount, Alignment)); 3383 RegArgs.emplace_back( 3384 legalizeToReg(RetVecFloat, RegNumT::fixme(RegMIPS32::Reg_A0))); 3385 } 3386 3387 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { 3388 Operand *Arg = legalizeUndef(Instr->getArg(i)); 3389 const Type Ty = Arg->getType(); 3390 bool InReg = false; 3391 RegNumT Reg; 3392 3393 InReg = CC.argInReg(Ty, i, &Reg); 3394 3395 if (!InReg) { 3396 if (isVectorType(Ty)) { 3397 auto *ArgVec = llvm::cast<VariableVecOn32>(Arg); 3398 ParameterAreaSizeBytes = 3399 applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64); 3400 for (Variable *Elem : ArgVec->getContainers()) { 3401 StackArgs.push_back(std::make_pair(Elem, ParameterAreaSizeBytes)); 3402 ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32); 3403 } 3404 } else { 3405 ParameterAreaSizeBytes = 3406 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty); 3407 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes)); 3408 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty); 3409 } 3410 ++ArgNum; 3411 continue; 3412 } 3413 3414 if (isVectorType(Ty)) { 3415 auto *ArgVec = llvm::cast<VariableVecOn32>(Arg); 3416 Operand *Elem0 = ArgVec->getContainers()[0]; 3417 Operand *Elem1 = ArgVec->getContainers()[1]; 3418 GPRArgs.push_back( 3419 std::make_pair(Elem0, RegNumT::fixme((unsigned)Reg + 0))); 3420 GPRArgs.push_back( 3421 std::make_pair(Elem1, RegNumT::fixme((unsigned)Reg + 1))); 3422 Operand *Elem2 = ArgVec->getContainers()[2]; 3423 Operand *Elem3 = ArgVec->getContainers()[3]; 3424 // First argument is passed in $4:$5:$6:$7 3425 // Second and rest arguments are passed in $6:$7:stack:stack 3426 if (ArgNum == 0) { 3427 GPRArgs.push_back( 3428 std::make_pair(Elem2, RegNumT::fixme((unsigned)Reg + 2))); 3429 GPRArgs.push_back( 3430 std::make_pair(Elem3, RegNumT::fixme((unsigned)Reg + 3))); 3431 } else { 3432 ParameterAreaSizeBytes = 3433 applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64); 3434 StackArgs.push_back(std::make_pair(Elem2, ParameterAreaSizeBytes)); 3435 ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32); 3436 StackArgs.push_back(std::make_pair(Elem3, ParameterAreaSizeBytes)); 3437 ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32); 3438 } 3439 } else if (Ty == IceType_i64) { 3440 Operand *Lo = loOperand(Arg); 3441 Operand *Hi = hiOperand(Arg); 3442 GPRArgs.push_back( 3443 std::make_pair(Lo, RegMIPS32::get64PairFirstRegNum(Reg))); 3444 GPRArgs.push_back( 3445 std::make_pair(Hi, RegMIPS32::get64PairSecondRegNum(Reg))); 3446 } else if (isScalarIntegerType(Ty)) { 3447 GPRArgs.push_back(std::make_pair(Arg, Reg)); 3448 } else { 3449 FPArgs.push_back(std::make_pair(Arg, Reg)); 3450 } 3451 ++ArgNum; 3452 } 3453 3454 // Adjust the parameter area so that the stack is aligned. It is assumed that 3455 // the stack is already aligned at the start of the calling sequence. 3456 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); 3457 3458 // Copy arguments that are passed on the stack to the appropriate stack 3459 // locations. 3460 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); 3461 for (auto &StackArg : StackArgs) { 3462 ConstantInteger32 *Loc = 3463 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second)); 3464 Type Ty = StackArg.first->getType(); 3465 OperandMIPS32Mem *Addr; 3466 constexpr bool SignExt = false; 3467 if (OperandMIPS32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) { 3468 Addr = OperandMIPS32Mem::create(Func, Ty, SP, Loc); 3469 } else { 3470 Variable *NewBase = Func->makeVariable(SP->getType()); 3471 lowerArithmetic( 3472 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc)); 3473 Addr = formMemoryOperand(NewBase, Ty); 3474 } 3475 lowerStore(InstStore::create(Func, StackArg.first, Addr)); 3476 } 3477 3478 // Generate the call instruction. Assign its result to a temporary with high 3479 // register allocation weight. 3480 3481 // ReturnReg doubles as ReturnRegLo as necessary. 3482 Variable *ReturnReg = nullptr; 3483 Variable *ReturnRegHi = nullptr; 3484 if (Dest) { 3485 switch (Dest->getType()) { 3486 case IceType_NUM: 3487 llvm_unreachable("Invalid Call dest type"); 3488 return; 3489 case IceType_void: 3490 break; 3491 case IceType_i1: 3492 case IceType_i8: 3493 case IceType_i16: 3494 case IceType_i32: 3495 ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0); 3496 break; 3497 case IceType_i64: 3498 ReturnReg = I32Reg(RegMIPS32::Reg_V0); 3499 ReturnRegHi = I32Reg(RegMIPS32::Reg_V1); 3500 break; 3501 case IceType_f32: 3502 ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_F0); 3503 break; 3504 case IceType_f64: 3505 ReturnReg = makeReg(IceType_f64, RegMIPS32::Reg_F0); 3506 break; 3507 case IceType_v4i1: 3508 case IceType_v8i1: 3509 case IceType_v16i1: 3510 case IceType_v16i8: 3511 case IceType_v8i16: 3512 case IceType_v4i32: { 3513 ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0); 3514 auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg); 3515 RetVec->initVecElement(Func); 3516 for (SizeT i = 0; i < RetVec->ContainersPerVector; ++i) { 3517 auto *Var = RetVec->getContainers()[i]; 3518 Var->setRegNum(RegNumT::fixme(RegMIPS32::Reg_V0 + i)); 3519 } 3520 break; 3521 } 3522 case IceType_v4f32: 3523 ReturnReg = makeReg(IceType_i32, RegMIPS32::Reg_V0); 3524 break; 3525 } 3526 } 3527 Operand *CallTarget = Instr->getCallTarget(); 3528 // Allow ConstantRelocatable to be left alone as a direct call, 3529 // but force other constants like ConstantInteger32 to be in 3530 // a register and make it an indirect call. 3531 if (!llvm::isa<ConstantRelocatable>(CallTarget)) { 3532 CallTarget = legalize(CallTarget, Legal_Reg); 3533 } 3534 3535 // Copy arguments to be passed in registers to the appropriate registers. 3536 for (auto &FPArg : FPArgs) { 3537 RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second)); 3538 } 3539 for (auto &GPRArg : GPRArgs) { 3540 RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second)); 3541 } 3542 3543 // Generate a FakeUse of register arguments so that they do not get dead code 3544 // eliminated as a result of the FakeKill of scratch registers after the call. 3545 // These fake-uses need to be placed here to avoid argument registers from 3546 // being used during the legalizeToReg() calls above. 3547 for (auto *RegArg : RegArgs) { 3548 Context.insert<InstFakeUse>(RegArg); 3549 } 3550 3551 // If variable alloca is used the extra 16 bytes for argument build area 3552 // will be allocated on stack before a call. 3553 if (VariableAllocaUsed) 3554 Sandboxer(this).addiu_sp(-MaxOutArgsSizeBytes); 3555 3556 Inst *NewCall; 3557 3558 // We don't need to define the return register if it is a vector. 3559 // We have inserted fake defs of it just after the call. 3560 if (ReturnReg && isVectorIntegerType(ReturnReg->getType())) { 3561 Variable *RetReg = nullptr; 3562 NewCall = InstMIPS32Call::create(Func, RetReg, CallTarget); 3563 Context.insert(NewCall); 3564 } else { 3565 NewCall = Sandboxer(this, InstBundleLock::Opt_AlignToEnd) 3566 .jal(ReturnReg, CallTarget); 3567 } 3568 3569 if (VariableAllocaUsed) 3570 Sandboxer(this).addiu_sp(MaxOutArgsSizeBytes); 3571 3572 // Insert a fake use of stack pointer to avoid dead code elimination of addiu 3573 // instruction. 3574 Context.insert<InstFakeUse>(SP); 3575 3576 if (ReturnRegHi) 3577 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); 3578 3579 if (ReturnReg) { 3580 if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) { 3581 for (Variable *Var : RetVec->getContainers()) { 3582 Context.insert(InstFakeDef::create(Func, Var)); 3583 } 3584 } 3585 } 3586 3587 // Insert a register-kill pseudo instruction. 3588 Context.insert(InstFakeKill::create(Func, NewCall)); 3589 3590 // Generate a FakeUse to keep the call live if necessary. 3591 if (Instr->hasSideEffects() && ReturnReg) { 3592 if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) { 3593 for (Variable *Var : RetVec->getContainers()) { 3594 Context.insert<InstFakeUse>(Var); 3595 } 3596 } else { 3597 Context.insert<InstFakeUse>(ReturnReg); 3598 } 3599 } 3600 3601 if (Dest == nullptr) 3602 return; 3603 3604 // Assign the result of the call to Dest. 3605 if (ReturnReg) { 3606 if (RetVecFloat) { 3607 auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest); 3608 auto *TBase = legalizeToReg(RetVecFloat); 3609 for (SizeT i = 0; i < DestVecOn32->ContainersPerVector; ++i) { 3610 auto *Var = DestVecOn32->getContainers()[i]; 3611 auto *TVar = makeReg(IceType_i32); 3612 OperandMIPS32Mem *Mem = OperandMIPS32Mem::create( 3613 Func, IceType_i32, TBase, 3614 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4))); 3615 _lw(TVar, Mem); 3616 _mov(Var, TVar); 3617 } 3618 } else if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) { 3619 auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest); 3620 for (SizeT i = 0; i < DestVecOn32->ContainersPerVector; ++i) { 3621 _mov(DestVecOn32->getContainers()[i], RetVec->getContainers()[i]); 3622 } 3623 } else if (ReturnRegHi) { 3624 assert(Dest->getType() == IceType_i64); 3625 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); 3626 Variable *DestLo = Dest64On32->getLo(); 3627 Variable *DestHi = Dest64On32->getHi(); 3628 _mov(DestLo, ReturnReg); 3629 _mov(DestHi, ReturnRegHi); 3630 } else { 3631 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 || 3632 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || 3633 isScalarFloatingType(Dest->getType()) || 3634 isVectorType(Dest->getType())); 3635 _mov(Dest, ReturnReg); 3636 } 3637 } 3638 } 3639 3640 void TargetMIPS32::lowerCast(const InstCast *Instr) { 3641 InstCast::OpKind CastKind = Instr->getCastKind(); 3642 Variable *Dest = Instr->getDest(); 3643 Operand *Src0 = legalizeUndef(Instr->getSrc(0)); 3644 const Type DestTy = Dest->getType(); 3645 const Type Src0Ty = Src0->getType(); 3646 const uint32_t ShiftAmount = 3647 (Src0Ty == IceType_i1 3648 ? INT32_BITS - 1 3649 : INT32_BITS - (CHAR_BITS * typeWidthInBytes(Src0Ty))); 3650 const uint32_t Mask = 3651 (Src0Ty == IceType_i1 3652 ? 1 3653 : (1 << (CHAR_BITS * typeWidthInBytes(Src0Ty))) - 1); 3654 3655 if (isVectorType(DestTy)) { 3656 llvm::report_fatal_error("Cast: Destination type is vector"); 3657 return; 3658 } 3659 switch (CastKind) { 3660 default: 3661 Func->setError("Cast type not supported"); 3662 return; 3663 case InstCast::Sext: { 3664 if (DestTy == IceType_i64) { 3665 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3666 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3667 Variable *Src0R = legalizeToReg(Src0); 3668 Variable *T1_Lo = I32Reg(); 3669 Variable *T2_Lo = I32Reg(); 3670 Variable *T_Hi = I32Reg(); 3671 if (Src0Ty == IceType_i1) { 3672 _sll(T1_Lo, Src0R, INT32_BITS - 1); 3673 _sra(T2_Lo, T1_Lo, INT32_BITS - 1); 3674 _mov(DestHi, T2_Lo); 3675 _mov(DestLo, T2_Lo); 3676 } else if (Src0Ty == IceType_i8 || Src0Ty == IceType_i16) { 3677 _sll(T1_Lo, Src0R, ShiftAmount); 3678 _sra(T2_Lo, T1_Lo, ShiftAmount); 3679 _sra(T_Hi, T2_Lo, INT32_BITS - 1); 3680 _mov(DestHi, T_Hi); 3681 _mov(DestLo, T2_Lo); 3682 } else if (Src0Ty == IceType_i32) { 3683 _mov(T1_Lo, Src0R); 3684 _sra(T_Hi, T1_Lo, INT32_BITS - 1); 3685 _mov(DestHi, T_Hi); 3686 _mov(DestLo, T1_Lo); 3687 } 3688 } else { 3689 Variable *Src0R = legalizeToReg(Src0); 3690 Variable *T1 = makeReg(DestTy); 3691 Variable *T2 = makeReg(DestTy); 3692 if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 || 3693 Src0Ty == IceType_i16) { 3694 _sll(T1, Src0R, ShiftAmount); 3695 _sra(T2, T1, ShiftAmount); 3696 _mov(Dest, T2); 3697 } 3698 } 3699 break; 3700 } 3701 case InstCast::Zext: { 3702 if (DestTy == IceType_i64) { 3703 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3704 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3705 Variable *Src0R = legalizeToReg(Src0); 3706 Variable *T_Lo = I32Reg(); 3707 Variable *T_Hi = I32Reg(); 3708 3709 if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 || Src0Ty == IceType_i16) 3710 _andi(T_Lo, Src0R, Mask); 3711 else if (Src0Ty == IceType_i32) 3712 _mov(T_Lo, Src0R); 3713 else 3714 assert(Src0Ty != IceType_i64); 3715 _mov(DestLo, T_Lo); 3716 3717 auto *Zero = getZero(); 3718 _addiu(T_Hi, Zero, 0); 3719 _mov(DestHi, T_Hi); 3720 } else { 3721 Variable *Src0R = legalizeToReg(Src0); 3722 Variable *T = makeReg(DestTy); 3723 if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 || 3724 Src0Ty == IceType_i16) { 3725 _andi(T, Src0R, Mask); 3726 _mov(Dest, T); 3727 } 3728 } 3729 break; 3730 } 3731 case InstCast::Trunc: { 3732 if (Src0Ty == IceType_i64) 3733 Src0 = loOperand(Src0); 3734 Variable *Src0R = legalizeToReg(Src0); 3735 Variable *T = makeReg(DestTy); 3736 switch (DestTy) { 3737 case IceType_i1: 3738 _andi(T, Src0R, 0x1); 3739 break; 3740 case IceType_i8: 3741 _andi(T, Src0R, 0xff); 3742 break; 3743 case IceType_i16: 3744 _andi(T, Src0R, 0xffff); 3745 break; 3746 default: 3747 _mov(T, Src0R); 3748 break; 3749 } 3750 _mov(Dest, T); 3751 break; 3752 } 3753 case InstCast::Fptrunc: { 3754 assert(Dest->getType() == IceType_f32); 3755 assert(Src0->getType() == IceType_f64); 3756 auto *DestR = legalizeToReg(Dest); 3757 auto *Src0R = legalizeToReg(Src0); 3758 _cvt_s_d(DestR, Src0R); 3759 _mov(Dest, DestR); 3760 break; 3761 } 3762 case InstCast::Fpext: { 3763 assert(Dest->getType() == IceType_f64); 3764 assert(Src0->getType() == IceType_f32); 3765 auto *DestR = legalizeToReg(Dest); 3766 auto *Src0R = legalizeToReg(Src0); 3767 _cvt_d_s(DestR, Src0R); 3768 _mov(Dest, DestR); 3769 break; 3770 } 3771 case InstCast::Fptosi: 3772 case InstCast::Fptoui: { 3773 if (llvm::isa<Variable64On32>(Dest)) { 3774 llvm::report_fatal_error("fp-to-i64 should have been prelowered."); 3775 return; 3776 } 3777 if (DestTy != IceType_i64) { 3778 if (Src0Ty == IceType_f32 && isScalarIntegerType(DestTy)) { 3779 Variable *Src0R = legalizeToReg(Src0); 3780 Variable *FTmp = makeReg(IceType_f32); 3781 _trunc_w_s(FTmp, Src0R); 3782 _mov(Dest, FTmp); 3783 return; 3784 } 3785 if (Src0Ty == IceType_f64 && isScalarIntegerType(DestTy)) { 3786 Variable *Src0R = legalizeToReg(Src0); 3787 Variable *FTmp = makeReg(IceType_f64); 3788 _trunc_w_d(FTmp, Src0R); 3789 _mov(Dest, FTmp); 3790 return; 3791 } 3792 } 3793 llvm::report_fatal_error("Destination is i64 in fp-to-i32"); 3794 break; 3795 } 3796 case InstCast::Sitofp: 3797 case InstCast::Uitofp: { 3798 if (llvm::isa<Variable64On32>(Dest)) { 3799 llvm::report_fatal_error("i64-to-fp should have been prelowered."); 3800 return; 3801 } 3802 if (Src0Ty != IceType_i64) { 3803 Variable *Src0R = legalizeToReg(Src0); 3804 auto *T0R = Src0R; 3805 if (Src0Ty != IceType_i32) { 3806 T0R = makeReg(IceType_i32); 3807 if (CastKind == InstCast::Uitofp) 3808 lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R)); 3809 else 3810 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R)); 3811 } 3812 if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f32) { 3813 Variable *FTmp1 = makeReg(IceType_f32); 3814 Variable *FTmp2 = makeReg(IceType_f32); 3815 _mtc1(FTmp1, T0R); 3816 _cvt_s_w(FTmp2, FTmp1); 3817 _mov(Dest, FTmp2); 3818 return; 3819 } 3820 if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f64) { 3821 Variable *FTmp1 = makeReg(IceType_f64); 3822 Variable *FTmp2 = makeReg(IceType_f64); 3823 _mtc1(FTmp1, T0R); 3824 _cvt_d_w(FTmp2, FTmp1); 3825 _mov(Dest, FTmp2); 3826 return; 3827 } 3828 } 3829 llvm::report_fatal_error("Source is i64 in i32-to-fp"); 3830 break; 3831 } 3832 case InstCast::Bitcast: { 3833 Operand *Src0 = Instr->getSrc(0); 3834 if (DestTy == Src0->getType()) { 3835 auto *Assign = InstAssign::create(Func, Dest, Src0); 3836 lowerAssign(Assign); 3837 return; 3838 } 3839 if (isVectorType(DestTy) || isVectorType(Src0->getType())) { 3840 llvm::report_fatal_error( 3841 "Bitcast: vector type should have been prelowered."); 3842 return; 3843 } 3844 switch (DestTy) { 3845 case IceType_NUM: 3846 case IceType_void: 3847 llvm::report_fatal_error("Unexpected bitcast."); 3848 case IceType_i1: 3849 UnimplementedLoweringError(this, Instr); 3850 break; 3851 case IceType_i8: 3852 assert(Src0->getType() == IceType_v8i1); 3853 llvm::report_fatal_error( 3854 "i8 to v8i1 conversion should have been prelowered."); 3855 break; 3856 case IceType_i16: 3857 assert(Src0->getType() == IceType_v16i1); 3858 llvm::report_fatal_error( 3859 "i16 to v16i1 conversion should have been prelowered."); 3860 break; 3861 case IceType_i32: 3862 case IceType_f32: { 3863 Variable *Src0R = legalizeToReg(Src0); 3864 _mov(Dest, Src0R); 3865 break; 3866 } 3867 case IceType_i64: { 3868 assert(Src0->getType() == IceType_f64); 3869 Variable *Src0R = legalizeToReg(Src0); 3870 auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); 3871 T->initHiLo(Func); 3872 T->getHi()->setMustNotHaveReg(); 3873 T->getLo()->setMustNotHaveReg(); 3874 Context.insert<InstFakeDef>(T->getHi()); 3875 Context.insert<InstFakeDef>(T->getLo()); 3876 _mov_fp64_to_i64(T->getHi(), Src0R, Int64_Hi); 3877 _mov_fp64_to_i64(T->getLo(), Src0R, Int64_Lo); 3878 lowerAssign(InstAssign::create(Func, Dest, T)); 3879 break; 3880 } 3881 case IceType_f64: { 3882 assert(Src0->getType() == IceType_i64); 3883 const uint32_t Mask = 0xFFFFFFFF; 3884 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src0)) { 3885 Variable *RegHi, *RegLo; 3886 const uint64_t Value = C64->getValue(); 3887 uint64_t Upper32Bits = (Value >> INT32_BITS) & Mask; 3888 uint64_t Lower32Bits = Value & Mask; 3889 RegLo = legalizeToReg(Ctx->getConstantInt32(Lower32Bits)); 3890 RegHi = legalizeToReg(Ctx->getConstantInt32(Upper32Bits)); 3891 _mov(Dest, RegHi, RegLo); 3892 } else { 3893 auto *Var64On32 = llvm::cast<Variable64On32>(Src0); 3894 auto *RegLo = legalizeToReg(loOperand(Var64On32)); 3895 auto *RegHi = legalizeToReg(hiOperand(Var64On32)); 3896 _mov(Dest, RegHi, RegLo); 3897 } 3898 break; 3899 } 3900 default: 3901 llvm::report_fatal_error("Unexpected bitcast."); 3902 } 3903 break; 3904 } 3905 } 3906 } 3907 3908 void TargetMIPS32::lowerExtractElement(const InstExtractElement *Instr) { 3909 Variable *Dest = Instr->getDest(); 3910 const Type DestTy = Dest->getType(); 3911 Operand *Src1 = Instr->getSrc(1); 3912 if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) { 3913 const uint32_t Index = Imm->getValue(); 3914 Variable *TDest = makeReg(DestTy); 3915 Variable *TReg = makeReg(DestTy); 3916 auto *Src0 = legalizeUndef(Instr->getSrc(0)); 3917 auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0); 3918 // Number of elements in each container 3919 uint32_t ElemPerCont = 3920 typeNumElements(Src0->getType()) / Src0R->ContainersPerVector; 3921 auto *Src = Src0R->getContainers()[Index / ElemPerCont]; 3922 auto *SrcE = legalizeToReg(Src); 3923 // Position of the element in the container 3924 uint32_t PosInCont = Index % ElemPerCont; 3925 if (ElemPerCont == 1) { 3926 _mov(TDest, SrcE); 3927 } else if (ElemPerCont == 2) { 3928 switch (PosInCont) { 3929 case 0: 3930 _andi(TDest, SrcE, 0xffff); 3931 break; 3932 case 1: 3933 _srl(TDest, SrcE, 16); 3934 break; 3935 default: 3936 llvm::report_fatal_error("ExtractElement: Invalid PosInCont"); 3937 break; 3938 } 3939 } else if (ElemPerCont == 4) { 3940 switch (PosInCont) { 3941 case 0: 3942 _andi(TDest, SrcE, 0xff); 3943 break; 3944 case 1: 3945 _srl(TReg, SrcE, 8); 3946 _andi(TDest, TReg, 0xff); 3947 break; 3948 case 2: 3949 _srl(TReg, SrcE, 16); 3950 _andi(TDest, TReg, 0xff); 3951 break; 3952 case 3: 3953 _srl(TDest, SrcE, 24); 3954 break; 3955 default: 3956 llvm::report_fatal_error("ExtractElement: Invalid PosInCont"); 3957 break; 3958 } 3959 } 3960 if (typeElementType(Src0R->getType()) == IceType_i1) { 3961 Variable *TReg1 = makeReg(DestTy); 3962 _andi(TReg1, TDest, 0x1); 3963 _mov(Dest, TReg1); 3964 } else { 3965 _mov(Dest, TDest); 3966 } 3967 return; 3968 } 3969 llvm::report_fatal_error("ExtractElement requires a constant index"); 3970 } 3971 3972 void TargetMIPS32::lowerFcmp(const InstFcmp *Instr) { 3973 Variable *Dest = Instr->getDest(); 3974 if (isVectorType(Dest->getType())) { 3975 llvm::report_fatal_error("Fcmp: Destination type is vector"); 3976 return; 3977 } 3978 3979 auto *Src0 = Instr->getSrc(0); 3980 auto *Src1 = Instr->getSrc(1); 3981 auto *Zero = getZero(); 3982 3983 InstFcmp::FCond Cond = Instr->getCondition(); 3984 auto *DestR = makeReg(IceType_i32); 3985 auto *Src0R = legalizeToReg(Src0); 3986 auto *Src1R = legalizeToReg(Src1); 3987 const Type Src0Ty = Src0->getType(); 3988 3989 Operand *FCC0 = OperandMIPS32FCC::create(getFunc(), OperandMIPS32FCC::FCC0); 3990 3991 switch (Cond) { 3992 default: { 3993 llvm::report_fatal_error("Unhandled fp comparison."); 3994 return; 3995 } 3996 case InstFcmp::False: { 3997 Context.insert<InstFakeUse>(Src0R); 3998 Context.insert<InstFakeUse>(Src1R); 3999 _addiu(DestR, Zero, 0); 4000 _mov(Dest, DestR); 4001 break; 4002 } 4003 case InstFcmp::Oeq: { 4004 if (Src0Ty == IceType_f32) { 4005 _c_eq_s(Src0R, Src1R); 4006 } else { 4007 _c_eq_d(Src0R, Src1R); 4008 } 4009 _addiu(DestR, Zero, 1); 4010 _movf(DestR, Zero, FCC0); 4011 _mov(Dest, DestR); 4012 break; 4013 } 4014 case InstFcmp::Ogt: { 4015 if (Src0Ty == IceType_f32) { 4016 _c_ule_s(Src0R, Src1R); 4017 } else { 4018 _c_ule_d(Src0R, Src1R); 4019 } 4020 _addiu(DestR, Zero, 1); 4021 _movt(DestR, Zero, FCC0); 4022 _mov(Dest, DestR); 4023 break; 4024 } 4025 case InstFcmp::Oge: { 4026 if (Src0Ty == IceType_f32) { 4027 _c_ult_s(Src0R, Src1R); 4028 } else { 4029 _c_ult_d(Src0R, Src1R); 4030 } 4031 _addiu(DestR, Zero, 1); 4032 _movt(DestR, Zero, FCC0); 4033 _mov(Dest, DestR); 4034 break; 4035 } 4036 case InstFcmp::Olt: { 4037 if (Src0Ty == IceType_f32) { 4038 _c_olt_s(Src0R, Src1R); 4039 } else { 4040 _c_olt_d(Src0R, Src1R); 4041 } 4042 _addiu(DestR, Zero, 1); 4043 _movf(DestR, Zero, FCC0); 4044 _mov(Dest, DestR); 4045 break; 4046 } 4047 case InstFcmp::Ole: { 4048 if (Src0Ty == IceType_f32) { 4049 _c_ole_s(Src0R, Src1R); 4050 } else { 4051 _c_ole_d(Src0R, Src1R); 4052 } 4053 _addiu(DestR, Zero, 1); 4054 _movf(DestR, Zero, FCC0); 4055 _mov(Dest, DestR); 4056 break; 4057 } 4058 case InstFcmp::One: { 4059 if (Src0Ty == IceType_f32) { 4060 _c_ueq_s(Src0R, Src1R); 4061 } else { 4062 _c_ueq_d(Src0R, Src1R); 4063 } 4064 _addiu(DestR, Zero, 1); 4065 _movt(DestR, Zero, FCC0); 4066 _mov(Dest, DestR); 4067 break; 4068 } 4069 case InstFcmp::Ord: { 4070 if (Src0Ty == IceType_f32) { 4071 _c_un_s(Src0R, Src1R); 4072 } else { 4073 _c_un_d(Src0R, Src1R); 4074 } 4075 _addiu(DestR, Zero, 1); 4076 _movt(DestR, Zero, FCC0); 4077 _mov(Dest, DestR); 4078 break; 4079 } 4080 case InstFcmp::Ueq: { 4081 if (Src0Ty == IceType_f32) { 4082 _c_ueq_s(Src0R, Src1R); 4083 } else { 4084 _c_ueq_d(Src0R, Src1R); 4085 } 4086 _addiu(DestR, Zero, 1); 4087 _movf(DestR, Zero, FCC0); 4088 _mov(Dest, DestR); 4089 break; 4090 } 4091 case InstFcmp::Ugt: { 4092 if (Src0Ty == IceType_f32) { 4093 _c_ole_s(Src0R, Src1R); 4094 } else { 4095 _c_ole_d(Src0R, Src1R); 4096 } 4097 _addiu(DestR, Zero, 1); 4098 _movt(DestR, Zero, FCC0); 4099 _mov(Dest, DestR); 4100 break; 4101 } 4102 case InstFcmp::Uge: { 4103 if (Src0Ty == IceType_f32) { 4104 _c_olt_s(Src0R, Src1R); 4105 } else { 4106 _c_olt_d(Src0R, Src1R); 4107 } 4108 _addiu(DestR, Zero, 1); 4109 _movt(DestR, Zero, FCC0); 4110 _mov(Dest, DestR); 4111 break; 4112 } 4113 case InstFcmp::Ult: { 4114 if (Src0Ty == IceType_f32) { 4115 _c_ult_s(Src0R, Src1R); 4116 } else { 4117 _c_ult_d(Src0R, Src1R); 4118 } 4119 _addiu(DestR, Zero, 1); 4120 _movf(DestR, Zero, FCC0); 4121 _mov(Dest, DestR); 4122 break; 4123 } 4124 case InstFcmp::Ule: { 4125 if (Src0Ty == IceType_f32) { 4126 _c_ule_s(Src0R, Src1R); 4127 } else { 4128 _c_ule_d(Src0R, Src1R); 4129 } 4130 _addiu(DestR, Zero, 1); 4131 _movf(DestR, Zero, FCC0); 4132 _mov(Dest, DestR); 4133 break; 4134 } 4135 case InstFcmp::Une: { 4136 if (Src0Ty == IceType_f32) { 4137 _c_eq_s(Src0R, Src1R); 4138 } else { 4139 _c_eq_d(Src0R, Src1R); 4140 } 4141 _addiu(DestR, Zero, 1); 4142 _movt(DestR, Zero, FCC0); 4143 _mov(Dest, DestR); 4144 break; 4145 } 4146 case InstFcmp::Uno: { 4147 if (Src0Ty == IceType_f32) { 4148 _c_un_s(Src0R, Src1R); 4149 } else { 4150 _c_un_d(Src0R, Src1R); 4151 } 4152 _addiu(DestR, Zero, 1); 4153 _movf(DestR, Zero, FCC0); 4154 _mov(Dest, DestR); 4155 break; 4156 } 4157 case InstFcmp::True: { 4158 Context.insert<InstFakeUse>(Src0R); 4159 Context.insert<InstFakeUse>(Src1R); 4160 _addiu(DestR, Zero, 1); 4161 _mov(Dest, DestR); 4162 break; 4163 } 4164 } 4165 } 4166 4167 void TargetMIPS32::lower64Icmp(const InstIcmp *Instr) { 4168 Operand *Src0 = legalize(Instr->getSrc(0)); 4169 Operand *Src1 = legalize(Instr->getSrc(1)); 4170 Variable *Dest = Instr->getDest(); 4171 InstIcmp::ICond Condition = Instr->getCondition(); 4172 4173 Variable *Src0LoR = legalizeToReg(loOperand(Src0)); 4174 Variable *Src0HiR = legalizeToReg(hiOperand(Src0)); 4175 Variable *Src1LoR = legalizeToReg(loOperand(Src1)); 4176 Variable *Src1HiR = legalizeToReg(hiOperand(Src1)); 4177 4178 switch (Condition) { 4179 default: 4180 llvm_unreachable("unexpected condition"); 4181 return; 4182 case InstIcmp::Eq: { 4183 auto *T1 = I32Reg(); 4184 auto *T2 = I32Reg(); 4185 auto *T3 = I32Reg(); 4186 auto *T4 = I32Reg(); 4187 _xor(T1, Src0HiR, Src1HiR); 4188 _xor(T2, Src0LoR, Src1LoR); 4189 _or(T3, T1, T2); 4190 _sltiu(T4, T3, 1); 4191 _mov(Dest, T4); 4192 return; 4193 } 4194 case InstIcmp::Ne: { 4195 auto *T1 = I32Reg(); 4196 auto *T2 = I32Reg(); 4197 auto *T3 = I32Reg(); 4198 auto *T4 = I32Reg(); 4199 _xor(T1, Src0HiR, Src1HiR); 4200 _xor(T2, Src0LoR, Src1LoR); 4201 _or(T3, T1, T2); 4202 _sltu(T4, getZero(), T3); 4203 _mov(Dest, T4); 4204 return; 4205 } 4206 case InstIcmp::Sgt: { 4207 auto *T1 = I32Reg(); 4208 auto *T2 = I32Reg(); 4209 auto *T3 = I32Reg(); 4210 _xor(T1, Src0HiR, Src1HiR); 4211 _slt(T2, Src1HiR, Src0HiR); 4212 _sltu(T3, Src1LoR, Src0LoR); 4213 _movz(T2, T3, T1); 4214 _mov(Dest, T2); 4215 return; 4216 } 4217 case InstIcmp::Ugt: { 4218 auto *T1 = I32Reg(); 4219 auto *T2 = I32Reg(); 4220 auto *T3 = I32Reg(); 4221 _xor(T1, Src0HiR, Src1HiR); 4222 _sltu(T2, Src1HiR, Src0HiR); 4223 _sltu(T3, Src1LoR, Src0LoR); 4224 _movz(T2, T3, T1); 4225 _mov(Dest, T2); 4226 return; 4227 } 4228 case InstIcmp::Sge: { 4229 auto *T1 = I32Reg(); 4230 auto *T2 = I32Reg(); 4231 auto *T3 = I32Reg(); 4232 auto *T4 = I32Reg(); 4233 auto *T5 = I32Reg(); 4234 _xor(T1, Src0HiR, Src1HiR); 4235 _slt(T2, Src0HiR, Src1HiR); 4236 _xori(T3, T2, 1); 4237 _sltu(T4, Src0LoR, Src1LoR); 4238 _xori(T5, T4, 1); 4239 _movz(T3, T5, T1); 4240 _mov(Dest, T3); 4241 return; 4242 } 4243 case InstIcmp::Uge: { 4244 auto *T1 = I32Reg(); 4245 auto *T2 = I32Reg(); 4246 auto *T3 = I32Reg(); 4247 auto *T4 = I32Reg(); 4248 auto *T5 = I32Reg(); 4249 _xor(T1, Src0HiR, Src1HiR); 4250 _sltu(T2, Src0HiR, Src1HiR); 4251 _xori(T3, T2, 1); 4252 _sltu(T4, Src0LoR, Src1LoR); 4253 _xori(T5, T4, 1); 4254 _movz(T3, T5, T1); 4255 _mov(Dest, T3); 4256 return; 4257 } 4258 case InstIcmp::Slt: { 4259 auto *T1 = I32Reg(); 4260 auto *T2 = I32Reg(); 4261 auto *T3 = I32Reg(); 4262 _xor(T1, Src0HiR, Src1HiR); 4263 _slt(T2, Src0HiR, Src1HiR); 4264 _sltu(T3, Src0LoR, Src1LoR); 4265 _movz(T2, T3, T1); 4266 _mov(Dest, T2); 4267 return; 4268 } 4269 case InstIcmp::Ult: { 4270 auto *T1 = I32Reg(); 4271 auto *T2 = I32Reg(); 4272 auto *T3 = I32Reg(); 4273 _xor(T1, Src0HiR, Src1HiR); 4274 _sltu(T2, Src0HiR, Src1HiR); 4275 _sltu(T3, Src0LoR, Src1LoR); 4276 _movz(T2, T3, T1); 4277 _mov(Dest, T2); 4278 return; 4279 } 4280 case InstIcmp::Sle: { 4281 auto *T1 = I32Reg(); 4282 auto *T2 = I32Reg(); 4283 auto *T3 = I32Reg(); 4284 auto *T4 = I32Reg(); 4285 auto *T5 = I32Reg(); 4286 _xor(T1, Src0HiR, Src1HiR); 4287 _slt(T2, Src1HiR, Src0HiR); 4288 _xori(T3, T2, 1); 4289 _sltu(T4, Src1LoR, Src0LoR); 4290 _xori(T5, T4, 1); 4291 _movz(T3, T5, T1); 4292 _mov(Dest, T3); 4293 return; 4294 } 4295 case InstIcmp::Ule: { 4296 auto *T1 = I32Reg(); 4297 auto *T2 = I32Reg(); 4298 auto *T3 = I32Reg(); 4299 auto *T4 = I32Reg(); 4300 auto *T5 = I32Reg(); 4301 _xor(T1, Src0HiR, Src1HiR); 4302 _sltu(T2, Src1HiR, Src0HiR); 4303 _xori(T3, T2, 1); 4304 _sltu(T4, Src1LoR, Src0LoR); 4305 _xori(T5, T4, 1); 4306 _movz(T3, T5, T1); 4307 _mov(Dest, T3); 4308 return; 4309 } 4310 } 4311 } 4312 4313 void TargetMIPS32::lowerIcmp(const InstIcmp *Instr) { 4314 auto *Src0 = Instr->getSrc(0); 4315 auto *Src1 = Instr->getSrc(1); 4316 if (Src0->getType() == IceType_i64) { 4317 lower64Icmp(Instr); 4318 return; 4319 } 4320 Variable *Dest = Instr->getDest(); 4321 if (isVectorType(Dest->getType())) { 4322 llvm::report_fatal_error("Icmp: Destination type is vector"); 4323 return; 4324 } 4325 InstIcmp::ICond Cond = Instr->getCondition(); 4326 auto *Src0R = legalizeToReg(Src0); 4327 auto *Src1R = legalizeToReg(Src1); 4328 const Type Src0Ty = Src0R->getType(); 4329 const uint32_t ShAmt = INT32_BITS - getScalarIntBitWidth(Src0->getType()); 4330 Variable *Src0RT = I32Reg(); 4331 Variable *Src1RT = I32Reg(); 4332 4333 if (Src0Ty != IceType_i32) { 4334 _sll(Src0RT, Src0R, ShAmt); 4335 _sll(Src1RT, Src1R, ShAmt); 4336 } else { 4337 _mov(Src0RT, Src0R); 4338 _mov(Src1RT, Src1R); 4339 } 4340 4341 switch (Cond) { 4342 case InstIcmp::Eq: { 4343 auto *DestT = I32Reg(); 4344 auto *T = I32Reg(); 4345 _xor(T, Src0RT, Src1RT); 4346 _sltiu(DestT, T, 1); 4347 _mov(Dest, DestT); 4348 return; 4349 } 4350 case InstIcmp::Ne: { 4351 auto *DestT = I32Reg(); 4352 auto *T = I32Reg(); 4353 auto *Zero = getZero(); 4354 _xor(T, Src0RT, Src1RT); 4355 _sltu(DestT, Zero, T); 4356 _mov(Dest, DestT); 4357 return; 4358 } 4359 case InstIcmp::Ugt: { 4360 auto *DestT = I32Reg(); 4361 _sltu(DestT, Src1RT, Src0RT); 4362 _mov(Dest, DestT); 4363 return; 4364 } 4365 case InstIcmp::Uge: { 4366 auto *DestT = I32Reg(); 4367 auto *T = I32Reg(); 4368 _sltu(T, Src0RT, Src1RT); 4369 _xori(DestT, T, 1); 4370 _mov(Dest, DestT); 4371 return; 4372 } 4373 case InstIcmp::Ult: { 4374 auto *DestT = I32Reg(); 4375 _sltu(DestT, Src0RT, Src1RT); 4376 _mov(Dest, DestT); 4377 return; 4378 } 4379 case InstIcmp::Ule: { 4380 auto *DestT = I32Reg(); 4381 auto *T = I32Reg(); 4382 _sltu(T, Src1RT, Src0RT); 4383 _xori(DestT, T, 1); 4384 _mov(Dest, DestT); 4385 return; 4386 } 4387 case InstIcmp::Sgt: { 4388 auto *DestT = I32Reg(); 4389 _slt(DestT, Src1RT, Src0RT); 4390 _mov(Dest, DestT); 4391 return; 4392 } 4393 case InstIcmp::Sge: { 4394 auto *DestT = I32Reg(); 4395 auto *T = I32Reg(); 4396 _slt(T, Src0RT, Src1RT); 4397 _xori(DestT, T, 1); 4398 _mov(Dest, DestT); 4399 return; 4400 } 4401 case InstIcmp::Slt: { 4402 auto *DestT = I32Reg(); 4403 _slt(DestT, Src0RT, Src1RT); 4404 _mov(Dest, DestT); 4405 return; 4406 } 4407 case InstIcmp::Sle: { 4408 auto *DestT = I32Reg(); 4409 auto *T = I32Reg(); 4410 _slt(T, Src1RT, Src0RT); 4411 _xori(DestT, T, 1); 4412 _mov(Dest, DestT); 4413 return; 4414 } 4415 default: 4416 llvm_unreachable("Invalid ICmp operator"); 4417 return; 4418 } 4419 } 4420 4421 void TargetMIPS32::lowerInsertElement(const InstInsertElement *Instr) { 4422 Variable *Dest = Instr->getDest(); 4423 const Type DestTy = Dest->getType(); 4424 Operand *Src2 = Instr->getSrc(2); 4425 if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) { 4426 const uint32_t Index = Imm->getValue(); 4427 // Vector to insert in 4428 auto *Src0 = legalizeUndef(Instr->getSrc(0)); 4429 auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0); 4430 // Number of elements in each container 4431 uint32_t ElemPerCont = 4432 typeNumElements(Src0->getType()) / Src0R->ContainersPerVector; 4433 // Source Element 4434 auto *Src = Src0R->getContainers()[Index / ElemPerCont]; 4435 auto *SrcE = Src; 4436 if (ElemPerCont > 1) 4437 SrcE = legalizeToReg(Src); 4438 // Dest is a vector 4439 auto *VDest = llvm::dyn_cast<VariableVecOn32>(Dest); 4440 VDest->initVecElement(Func); 4441 // Temp vector variable 4442 auto *TDest = makeReg(DestTy); 4443 auto *TVDest = llvm::dyn_cast<VariableVecOn32>(TDest); 4444 TVDest->initVecElement(Func); 4445 // Destination element 4446 auto *DstE = TVDest->getContainers()[Index / ElemPerCont]; 4447 // Element to insert 4448 auto *Src1R = legalizeToReg(Instr->getSrc(1)); 4449 auto *TReg1 = makeReg(IceType_i32); 4450 auto *TReg2 = makeReg(IceType_i32); 4451 auto *TReg3 = makeReg(IceType_i32); 4452 auto *TReg4 = makeReg(IceType_i32); 4453 auto *TReg5 = makeReg(IceType_i32); 4454 auto *TDReg = makeReg(IceType_i32); 4455 // Position of the element in the container 4456 uint32_t PosInCont = Index % ElemPerCont; 4457 // Load source vector in a temporary vector 4458 for (SizeT i = 0; i < TVDest->ContainersPerVector; ++i) { 4459 auto *DCont = TVDest->getContainers()[i]; 4460 // Do not define DstE as we are going to redefine it 4461 if (DCont == DstE) 4462 continue; 4463 auto *SCont = Src0R->getContainers()[i]; 4464 auto *TReg = makeReg(IceType_i32); 4465 _mov(TReg, SCont); 4466 _mov(DCont, TReg); 4467 } 4468 // Insert the element 4469 if (ElemPerCont == 1) { 4470 _mov(DstE, Src1R); 4471 } else if (ElemPerCont == 2) { 4472 switch (PosInCont) { 4473 case 0: 4474 _andi(TReg1, Src1R, 0xffff); // Clear upper 16-bits of source 4475 _srl(TReg2, SrcE, 16); 4476 _sll(TReg3, TReg2, 16); // Clear lower 16-bits of element 4477 _or(TDReg, TReg1, TReg3); 4478 _mov(DstE, TDReg); 4479 break; 4480 case 1: 4481 _sll(TReg1, Src1R, 16); // Clear lower 16-bits of source 4482 _sll(TReg2, SrcE, 16); 4483 _srl(TReg3, TReg2, 16); // Clear upper 16-bits of element 4484 _or(TDReg, TReg1, TReg3); 4485 _mov(DstE, TDReg); 4486 break; 4487 default: 4488 llvm::report_fatal_error("InsertElement: Invalid PosInCont"); 4489 break; 4490 } 4491 } else if (ElemPerCont == 4) { 4492 switch (PosInCont) { 4493 case 0: 4494 _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source 4495 _srl(TReg2, SrcE, 8); 4496 _sll(TReg3, TReg2, 8); // Clear bits[7:0] of element 4497 _or(TDReg, TReg1, TReg3); 4498 _mov(DstE, TDReg); 4499 break; 4500 case 1: 4501 _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source 4502 _sll(TReg5, TReg1, 8); // Position in the destination 4503 _lui(TReg2, Ctx->getConstantInt32(0xffff)); 4504 _ori(TReg3, TReg2, 0x00ff); 4505 _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element 4506 _or(TDReg, TReg5, TReg4); 4507 _mov(DstE, TDReg); 4508 break; 4509 case 2: 4510 _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source 4511 _sll(TReg5, TReg1, 16); // Position in the destination 4512 _lui(TReg2, Ctx->getConstantInt32(0xff00)); 4513 _ori(TReg3, TReg2, 0xffff); 4514 _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element 4515 _or(TDReg, TReg5, TReg4); 4516 _mov(DstE, TDReg); 4517 break; 4518 case 3: 4519 _sll(TReg1, Src1R, 24); // Position in the destination 4520 _sll(TReg2, SrcE, 8); 4521 _srl(TReg3, TReg2, 8); // Clear bits[31:24] of element 4522 _or(TDReg, TReg1, TReg3); 4523 _mov(DstE, TDReg); 4524 break; 4525 default: 4526 llvm::report_fatal_error("InsertElement: Invalid PosInCont"); 4527 break; 4528 } 4529 } 4530 // Write back temporary vector to the destination 4531 auto *Assign = InstAssign::create(Func, Dest, TDest); 4532 lowerAssign(Assign); 4533 return; 4534 } 4535 llvm::report_fatal_error("InsertElement requires a constant index"); 4536 } 4537 4538 void TargetMIPS32::createArithInst(Intrinsics::AtomicRMWOperation Operation, 4539 Variable *Dest, Variable *Src0, 4540 Variable *Src1) { 4541 switch (Operation) { 4542 default: 4543 llvm::report_fatal_error("Unknown AtomicRMW operation"); 4544 case Intrinsics::AtomicExchange: 4545 llvm::report_fatal_error("Can't handle Atomic xchg operation"); 4546 case Intrinsics::AtomicAdd: 4547 _addu(Dest, Src0, Src1); 4548 break; 4549 case Intrinsics::AtomicAnd: 4550 _and(Dest, Src0, Src1); 4551 break; 4552 case Intrinsics::AtomicSub: 4553 _subu(Dest, Src0, Src1); 4554 break; 4555 case Intrinsics::AtomicOr: 4556 _or(Dest, Src0, Src1); 4557 break; 4558 case Intrinsics::AtomicXor: 4559 _xor(Dest, Src0, Src1); 4560 break; 4561 } 4562 } 4563 4564 void TargetMIPS32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { 4565 Variable *Dest = Instr->getDest(); 4566 Type DestTy = (Dest == nullptr) ? IceType_void : Dest->getType(); 4567 4568 Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID; 4569 switch (ID) { 4570 case Intrinsics::AtomicLoad: { 4571 assert(isScalarIntegerType(DestTy)); 4572 // We require the memory address to be naturally aligned. Given that is the 4573 // case, then normal loads are atomic. 4574 if (!Intrinsics::isMemoryOrderValid( 4575 ID, getConstantMemoryOrder(Instr->getArg(1)))) { 4576 Func->setError("Unexpected memory ordering for AtomicLoad"); 4577 return; 4578 } 4579 if (DestTy == IceType_i64) { 4580 llvm::report_fatal_error("AtomicLoad.i64 should have been prelowered."); 4581 return; 4582 } else if (DestTy == IceType_i32) { 4583 auto *T1 = makeReg(DestTy); 4584 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT); 4585 auto *Base = legalizeToReg(Instr->getArg(0)); 4586 auto *Addr = formMemoryOperand(Base, DestTy); 4587 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this); 4588 InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this); 4589 constexpr CfgNode *NoTarget = nullptr; 4590 _sync(); 4591 Context.insert(Retry); 4592 Sandboxer(this).ll(T1, Addr); 4593 _br(NoTarget, NoTarget, T1, getZero(), Exit, CondMIPS32::Cond::NE); 4594 _addiu(RegAt, getZero(), 0); // Loaded value is zero here, writeback zero 4595 Sandboxer(this).sc(RegAt, Addr); 4596 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ); 4597 Context.insert(Exit); 4598 _sync(); 4599 _mov(Dest, T1); 4600 Context.insert<InstFakeUse>(T1); 4601 } else { 4602 const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1; 4603 auto *Base = legalizeToReg(Instr->getArg(0)); 4604 auto *T1 = makeReg(IceType_i32); 4605 auto *T2 = makeReg(IceType_i32); 4606 auto *T3 = makeReg(IceType_i32); 4607 auto *T4 = makeReg(IceType_i32); 4608 auto *T5 = makeReg(IceType_i32); 4609 auto *T6 = makeReg(IceType_i32); 4610 auto *SrcMask = makeReg(IceType_i32); 4611 auto *Tdest = makeReg(IceType_i32); 4612 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT); 4613 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this); 4614 InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this); 4615 constexpr CfgNode *NoTarget = nullptr; 4616 _sync(); 4617 _addiu(T1, getZero(), -4); // Address mask 0xFFFFFFFC 4618 _andi(T2, Base, 3); // Last two bits of the address 4619 _and(T3, Base, T1); // Align the address 4620 _sll(T4, T2, 3); 4621 _ori(T5, getZero(), Mask); 4622 _sllv(SrcMask, T5, T4); // Source mask 4623 auto *Addr = formMemoryOperand(T3, IceType_i32); 4624 Context.insert(Retry); 4625 Sandboxer(this).ll(T6, Addr); 4626 _and(Tdest, T6, SrcMask); 4627 _br(NoTarget, NoTarget, T6, getZero(), Exit, CondMIPS32::Cond::NE); 4628 _addiu(RegAt, getZero(), 0); // Loaded value is zero here, writeback zero 4629 Sandboxer(this).sc(RegAt, Addr); 4630 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ); 4631 Context.insert(Exit); 4632 auto *T7 = makeReg(IceType_i32); 4633 auto *T8 = makeReg(IceType_i32); 4634 _srlv(T7, Tdest, T4); 4635 _andi(T8, T7, Mask); 4636 _sync(); 4637 _mov(Dest, T8); 4638 Context.insert<InstFakeUse>(T6); 4639 Context.insert<InstFakeUse>(SrcMask); 4640 } 4641 return; 4642 } 4643 case Intrinsics::AtomicStore: { 4644 // We require the memory address to be naturally aligned. Given that is the 4645 // case, then normal stores are atomic. 4646 if (!Intrinsics::isMemoryOrderValid( 4647 ID, getConstantMemoryOrder(Instr->getArg(2)))) { 4648 Func->setError("Unexpected memory ordering for AtomicStore"); 4649 return; 4650 } 4651 auto *Val = Instr->getArg(0); 4652 auto Ty = Val->getType(); 4653 if (Ty == IceType_i64) { 4654 llvm::report_fatal_error("AtomicStore.i64 should have been prelowered."); 4655 return; 4656 } else if (Ty == IceType_i32) { 4657 auto *Val = legalizeToReg(Instr->getArg(0)); 4658 auto *Base = legalizeToReg(Instr->getArg(1)); 4659 auto *Addr = formMemoryOperand(Base, Ty); 4660 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this); 4661 constexpr CfgNode *NoTarget = nullptr; 4662 auto *T1 = makeReg(IceType_i32); 4663 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT); 4664 _sync(); 4665 Context.insert(Retry); 4666 Sandboxer(this).ll(T1, Addr); 4667 _mov(RegAt, Val); 4668 Sandboxer(this).sc(RegAt, Addr); 4669 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ); 4670 Context.insert<InstFakeUse>(T1); // To keep LL alive 4671 _sync(); 4672 } else { 4673 auto *Val = legalizeToReg(Instr->getArg(0)); 4674 auto *Base = legalizeToReg(Instr->getArg(1)); 4675 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this); 4676 constexpr CfgNode *NoTarget = nullptr; 4677 auto *T1 = makeReg(IceType_i32); 4678 auto *T2 = makeReg(IceType_i32); 4679 auto *T3 = makeReg(IceType_i32); 4680 auto *T4 = makeReg(IceType_i32); 4681 auto *T5 = makeReg(IceType_i32); 4682 auto *T6 = makeReg(IceType_i32); 4683 auto *T7 = makeReg(IceType_i32); 4684 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT); 4685 auto *SrcMask = makeReg(IceType_i32); 4686 auto *DstMask = makeReg(IceType_i32); 4687 const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(Ty))) - 1; 4688 _sync(); 4689 _addiu(T1, getZero(), -4); 4690 _and(T7, Base, T1); 4691 auto *Addr = formMemoryOperand(T7, Ty); 4692 _andi(T2, Base, 3); 4693 _sll(T3, T2, 3); 4694 _ori(T4, getZero(), Mask); 4695 _sllv(T5, T4, T3); 4696 _sllv(T6, Val, T3); 4697 _nor(SrcMask, getZero(), T5); 4698 _and(DstMask, T6, T5); 4699 Context.insert(Retry); 4700 Sandboxer(this).ll(RegAt, Addr); 4701 _and(RegAt, RegAt, SrcMask); 4702 _or(RegAt, RegAt, DstMask); 4703 Sandboxer(this).sc(RegAt, Addr); 4704 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ); 4705 Context.insert<InstFakeUse>(SrcMask); 4706 Context.insert<InstFakeUse>(DstMask); 4707 _sync(); 4708 } 4709 return; 4710 } 4711 case Intrinsics::AtomicCmpxchg: { 4712 assert(isScalarIntegerType(DestTy)); 4713 // We require the memory address to be naturally aligned. Given that is the 4714 // case, then normal loads are atomic. 4715 if (!Intrinsics::isMemoryOrderValid( 4716 ID, getConstantMemoryOrder(Instr->getArg(3)), 4717 getConstantMemoryOrder(Instr->getArg(4)))) { 4718 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); 4719 return; 4720 } 4721 4722 InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this); 4723 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this); 4724 constexpr CfgNode *NoTarget = nullptr; 4725 auto *New = Instr->getArg(2); 4726 auto *Expected = Instr->getArg(1); 4727 auto *ActualAddress = Instr->getArg(0); 4728 4729 if (DestTy == IceType_i64) { 4730 llvm::report_fatal_error( 4731 "AtomicCmpxchg.i64 should have been prelowered."); 4732 return; 4733 } else if (DestTy == IceType_i8 || DestTy == IceType_i16) { 4734 auto *NewR = legalizeToReg(New); 4735 auto *ExpectedR = legalizeToReg(Expected); 4736 auto *ActualAddressR = legalizeToReg(ActualAddress); 4737 const uint32_t ShiftAmount = 4738 (INT32_BITS - CHAR_BITS * typeWidthInBytes(DestTy)); 4739 const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1; 4740 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT); 4741 auto *T1 = I32Reg(); 4742 auto *T2 = I32Reg(); 4743 auto *T3 = I32Reg(); 4744 auto *T4 = I32Reg(); 4745 auto *T5 = I32Reg(); 4746 auto *T6 = I32Reg(); 4747 auto *T7 = I32Reg(); 4748 auto *T8 = I32Reg(); 4749 auto *T9 = I32Reg(); 4750 _addiu(RegAt, getZero(), -4); 4751 _and(T1, ActualAddressR, RegAt); 4752 auto *Addr = formMemoryOperand(T1, DestTy); 4753 _andi(RegAt, ActualAddressR, 3); 4754 _sll(T2, RegAt, 3); 4755 _ori(RegAt, getZero(), Mask); 4756 _sllv(T3, RegAt, T2); 4757 _nor(T4, getZero(), T3); 4758 _andi(RegAt, ExpectedR, Mask); 4759 _sllv(T5, RegAt, T2); 4760 _andi(RegAt, NewR, Mask); 4761 _sllv(T6, RegAt, T2); 4762 _sync(); 4763 Context.insert(Retry); 4764 Sandboxer(this).ll(T7, Addr); 4765 _and(T8, T7, T3); 4766 _br(NoTarget, NoTarget, T8, T5, Exit, CondMIPS32::Cond::NE); 4767 _and(RegAt, T7, T4); 4768 _or(T9, RegAt, T6); 4769 Sandboxer(this).sc(T9, Addr); 4770 _br(NoTarget, NoTarget, getZero(), T9, Retry, CondMIPS32::Cond::EQ); 4771 Context.insert<InstFakeUse>(getZero()); 4772 Context.insert(Exit); 4773 _srlv(RegAt, T8, T2); 4774 _sll(RegAt, RegAt, ShiftAmount); 4775 _sra(RegAt, RegAt, ShiftAmount); 4776 _mov(Dest, RegAt); 4777 _sync(); 4778 Context.insert<InstFakeUse>(T3); 4779 Context.insert<InstFakeUse>(T4); 4780 Context.insert<InstFakeUse>(T5); 4781 Context.insert<InstFakeUse>(T6); 4782 Context.insert<InstFakeUse>(T8); 4783 Context.insert<InstFakeUse>(ExpectedR); 4784 Context.insert<InstFakeUse>(NewR); 4785 } else { 4786 auto *T1 = I32Reg(); 4787 auto *T2 = I32Reg(); 4788 auto *NewR = legalizeToReg(New); 4789 auto *ExpectedR = legalizeToReg(Expected); 4790 auto *ActualAddressR = legalizeToReg(ActualAddress); 4791 _sync(); 4792 Context.insert(Retry); 4793 Sandboxer(this).ll(T1, formMemoryOperand(ActualAddressR, DestTy)); 4794 _br(NoTarget, NoTarget, T1, ExpectedR, Exit, CondMIPS32::Cond::NE); 4795 _mov(T2, NewR); 4796 Sandboxer(this).sc(T2, formMemoryOperand(ActualAddressR, DestTy)); 4797 _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ); 4798 Context.insert<InstFakeUse>(getZero()); 4799 Context.insert(Exit); 4800 _mov(Dest, T1); 4801 _sync(); 4802 Context.insert<InstFakeUse>(ExpectedR); 4803 Context.insert<InstFakeUse>(NewR); 4804 } 4805 return; 4806 } 4807 case Intrinsics::AtomicRMW: { 4808 assert(isScalarIntegerType(DestTy)); 4809 // We require the memory address to be naturally aligned. Given that is the 4810 // case, then normal loads are atomic. 4811 if (!Intrinsics::isMemoryOrderValid( 4812 ID, getConstantMemoryOrder(Instr->getArg(3)))) { 4813 Func->setError("Unexpected memory ordering for AtomicRMW"); 4814 return; 4815 } 4816 4817 constexpr CfgNode *NoTarget = nullptr; 4818 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this); 4819 auto Operation = static_cast<Intrinsics::AtomicRMWOperation>( 4820 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()); 4821 auto *New = Instr->getArg(2); 4822 auto *ActualAddress = Instr->getArg(1); 4823 4824 if (DestTy == IceType_i64) { 4825 llvm::report_fatal_error("AtomicRMW.i64 should have been prelowered."); 4826 return; 4827 } else if (DestTy == IceType_i8 || DestTy == IceType_i16) { 4828 const uint32_t ShiftAmount = 4829 INT32_BITS - (CHAR_BITS * typeWidthInBytes(DestTy)); 4830 const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1; 4831 auto *NewR = legalizeToReg(New); 4832 auto *ActualAddressR = legalizeToReg(ActualAddress); 4833 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT); 4834 auto *T1 = I32Reg(); 4835 auto *T2 = I32Reg(); 4836 auto *T3 = I32Reg(); 4837 auto *T4 = I32Reg(); 4838 auto *T5 = I32Reg(); 4839 auto *T6 = I32Reg(); 4840 auto *T7 = I32Reg(); 4841 _sync(); 4842 _addiu(RegAt, getZero(), -4); 4843 _and(T1, ActualAddressR, RegAt); 4844 _andi(RegAt, ActualAddressR, 3); 4845 _sll(T2, RegAt, 3); 4846 _ori(RegAt, getZero(), Mask); 4847 _sllv(T3, RegAt, T2); 4848 _nor(T4, getZero(), T3); 4849 _sllv(T5, NewR, T2); 4850 Context.insert(Retry); 4851 Sandboxer(this).ll(T6, formMemoryOperand(T1, DestTy)); 4852 if (Operation != Intrinsics::AtomicExchange) { 4853 createArithInst(Operation, RegAt, T6, T5); 4854 _and(RegAt, RegAt, T3); 4855 } 4856 _and(T7, T6, T4); 4857 if (Operation == Intrinsics::AtomicExchange) { 4858 _or(RegAt, T7, T5); 4859 } else { 4860 _or(RegAt, T7, RegAt); 4861 } 4862 Sandboxer(this).sc(RegAt, formMemoryOperand(T1, DestTy)); 4863 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ); 4864 Context.insert<InstFakeUse>(getZero()); 4865 _and(RegAt, T6, T3); 4866 _srlv(RegAt, RegAt, T2); 4867 _sll(RegAt, RegAt, ShiftAmount); 4868 _sra(RegAt, RegAt, ShiftAmount); 4869 _mov(Dest, RegAt); 4870 _sync(); 4871 Context.insert<InstFakeUse>(NewR); 4872 Context.insert<InstFakeUse>(Dest); 4873 } else { 4874 auto *T1 = I32Reg(); 4875 auto *T2 = I32Reg(); 4876 auto *NewR = legalizeToReg(New); 4877 auto *ActualAddressR = legalizeToReg(ActualAddress); 4878 _sync(); 4879 Context.insert(Retry); 4880 Sandboxer(this).ll(T1, formMemoryOperand(ActualAddressR, DestTy)); 4881 if (Operation == Intrinsics::AtomicExchange) { 4882 _mov(T2, NewR); 4883 } else { 4884 createArithInst(Operation, T2, T1, NewR); 4885 } 4886 Sandboxer(this).sc(T2, formMemoryOperand(ActualAddressR, DestTy)); 4887 _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ); 4888 Context.insert<InstFakeUse>(getZero()); 4889 _mov(Dest, T1); 4890 _sync(); 4891 Context.insert<InstFakeUse>(NewR); 4892 Context.insert<InstFakeUse>(Dest); 4893 } 4894 return; 4895 } 4896 case Intrinsics::AtomicFence: 4897 case Intrinsics::AtomicFenceAll: 4898 assert(Dest == nullptr); 4899 _sync(); 4900 return; 4901 case Intrinsics::AtomicIsLockFree: { 4902 Operand *ByteSize = Instr->getArg(0); 4903 auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize); 4904 auto *T = I32Reg(); 4905 if (CI == nullptr) { 4906 // The PNaCl ABI requires the byte size to be a compile-time constant. 4907 Func->setError("AtomicIsLockFree byte size should be compile-time const"); 4908 return; 4909 } 4910 static constexpr int32_t NotLockFree = 0; 4911 static constexpr int32_t LockFree = 1; 4912 int32_t Result = NotLockFree; 4913 switch (CI->getValue()) { 4914 case 1: 4915 case 2: 4916 case 4: 4917 Result = LockFree; 4918 break; 4919 } 4920 _addiu(T, getZero(), Result); 4921 _mov(Dest, T); 4922 return; 4923 } 4924 case Intrinsics::Bswap: { 4925 auto *Src = Instr->getArg(0); 4926 const Type SrcTy = Src->getType(); 4927 assert(SrcTy == IceType_i16 || SrcTy == IceType_i32 || 4928 SrcTy == IceType_i64); 4929 switch (SrcTy) { 4930 case IceType_i16: { 4931 auto *T1 = I32Reg(); 4932 auto *T2 = I32Reg(); 4933 auto *T3 = I32Reg(); 4934 auto *T4 = I32Reg(); 4935 auto *SrcR = legalizeToReg(Src); 4936 _sll(T1, SrcR, 8); 4937 _lui(T2, Ctx->getConstantInt32(255)); 4938 _and(T1, T1, T2); 4939 _sll(T3, SrcR, 24); 4940 _or(T1, T3, T1); 4941 _srl(T4, T1, 16); 4942 _mov(Dest, T4); 4943 return; 4944 } 4945 case IceType_i32: { 4946 auto *T1 = I32Reg(); 4947 auto *T2 = I32Reg(); 4948 auto *T3 = I32Reg(); 4949 auto *T4 = I32Reg(); 4950 auto *T5 = I32Reg(); 4951 auto *SrcR = legalizeToReg(Src); 4952 _srl(T1, SrcR, 24); 4953 _srl(T2, SrcR, 8); 4954 _andi(T2, T2, 0xFF00); 4955 _or(T1, T2, T1); 4956 _sll(T4, SrcR, 8); 4957 _lui(T3, Ctx->getConstantInt32(255)); 4958 _and(T4, T4, T3); 4959 _sll(T5, SrcR, 24); 4960 _or(T4, T5, T4); 4961 _or(T4, T4, T1); 4962 _mov(Dest, T4); 4963 return; 4964 } 4965 case IceType_i64: { 4966 auto *T1 = I32Reg(); 4967 auto *T2 = I32Reg(); 4968 auto *T3 = I32Reg(); 4969 auto *T4 = I32Reg(); 4970 auto *T5 = I32Reg(); 4971 auto *T6 = I32Reg(); 4972 auto *T7 = I32Reg(); 4973 auto *T8 = I32Reg(); 4974 auto *T9 = I32Reg(); 4975 auto *T10 = I32Reg(); 4976 auto *T11 = I32Reg(); 4977 auto *T12 = I32Reg(); 4978 auto *T13 = I32Reg(); 4979 auto *T14 = I32Reg(); 4980 auto *T15 = I32Reg(); 4981 auto *T16 = I32Reg(); 4982 auto *T17 = I32Reg(); 4983 auto *T18 = I32Reg(); 4984 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 4985 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 4986 Src = legalizeUndef(Src); 4987 auto *SrcLoR = legalizeToReg(loOperand(Src)); 4988 auto *SrcHiR = legalizeToReg(hiOperand(Src)); 4989 _sll(T1, SrcHiR, 8); 4990 _srl(T2, SrcHiR, 24); 4991 _srl(T3, SrcHiR, 8); 4992 _andi(T3, T3, 0xFF00); 4993 _lui(T4, Ctx->getConstantInt32(255)); 4994 _or(T5, T3, T2); 4995 _and(T6, T1, T4); 4996 _sll(T7, SrcHiR, 24); 4997 _or(T8, T7, T6); 4998 _srl(T9, SrcLoR, 24); 4999 _srl(T10, SrcLoR, 8); 5000 _andi(T11, T10, 0xFF00); 5001 _or(T12, T8, T5); 5002 _or(T13, T11, T9); 5003 _sll(T14, SrcLoR, 8); 5004 _and(T15, T14, T4); 5005 _sll(T16, SrcLoR, 24); 5006 _or(T17, T16, T15); 5007 _or(T18, T17, T13); 5008 _mov(DestLo, T12); 5009 _mov(DestHi, T18); 5010 return; 5011 } 5012 default: 5013 llvm::report_fatal_error("Control flow should never have reached here."); 5014 } 5015 return; 5016 } 5017 case Intrinsics::Ctpop: { 5018 llvm::report_fatal_error("Ctpop should have been prelowered."); 5019 return; 5020 } 5021 case Intrinsics::Ctlz: { 5022 auto *Src = Instr->getArg(0); 5023 const Type SrcTy = Src->getType(); 5024 assert(SrcTy == IceType_i32 || SrcTy == IceType_i64); 5025 switch (SrcTy) { 5026 case IceType_i32: { 5027 auto *T = I32Reg(); 5028 auto *SrcR = legalizeToReg(Src); 5029 _clz(T, SrcR); 5030 _mov(Dest, T); 5031 break; 5032 } 5033 case IceType_i64: { 5034 auto *T1 = I32Reg(); 5035 auto *T2 = I32Reg(); 5036 auto *T3 = I32Reg(); 5037 auto *T4 = I32Reg(); 5038 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 5039 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 5040 Variable *SrcHiR = legalizeToReg(hiOperand(Src)); 5041 Variable *SrcLoR = legalizeToReg(loOperand(Src)); 5042 _clz(T1, SrcHiR); 5043 _clz(T2, SrcLoR); 5044 _addiu(T3, T2, 32); 5045 _movn(T3, T1, SrcHiR); 5046 _addiu(T4, getZero(), 0); 5047 _mov(DestHi, T4); 5048 _mov(DestLo, T3); 5049 break; 5050 } 5051 default: 5052 llvm::report_fatal_error("Control flow should never have reached here."); 5053 } 5054 break; 5055 } 5056 case Intrinsics::Cttz: { 5057 auto *Src = Instr->getArg(0); 5058 const Type SrcTy = Src->getType(); 5059 assert(SrcTy == IceType_i32 || SrcTy == IceType_i64); 5060 switch (SrcTy) { 5061 case IceType_i32: { 5062 auto *T1 = I32Reg(); 5063 auto *T2 = I32Reg(); 5064 auto *T3 = I32Reg(); 5065 auto *T4 = I32Reg(); 5066 auto *T5 = I32Reg(); 5067 auto *T6 = I32Reg(); 5068 auto *SrcR = legalizeToReg(Src); 5069 _addiu(T1, SrcR, -1); 5070 _not(T2, SrcR); 5071 _and(T3, T2, T1); 5072 _clz(T4, T3); 5073 _addiu(T5, getZero(), 32); 5074 _subu(T6, T5, T4); 5075 _mov(Dest, T6); 5076 break; 5077 } 5078 case IceType_i64: { 5079 auto *THi1 = I32Reg(); 5080 auto *THi2 = I32Reg(); 5081 auto *THi3 = I32Reg(); 5082 auto *THi4 = I32Reg(); 5083 auto *THi5 = I32Reg(); 5084 auto *THi6 = I32Reg(); 5085 auto *TLo1 = I32Reg(); 5086 auto *TLo2 = I32Reg(); 5087 auto *TLo3 = I32Reg(); 5088 auto *TLo4 = I32Reg(); 5089 auto *TLo5 = I32Reg(); 5090 auto *TLo6 = I32Reg(); 5091 auto *TResHi = I32Reg(); 5092 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 5093 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 5094 Variable *SrcHiR = legalizeToReg(hiOperand(Src)); 5095 Variable *SrcLoR = legalizeToReg(loOperand(Src)); 5096 _addiu(THi1, SrcHiR, -1); 5097 _not(THi2, SrcHiR); 5098 _and(THi3, THi2, THi1); 5099 _clz(THi4, THi3); 5100 _addiu(THi5, getZero(), 64); 5101 _subu(THi6, THi5, THi4); 5102 _addiu(TLo1, SrcLoR, -1); 5103 _not(TLo2, SrcLoR); 5104 _and(TLo3, TLo2, TLo1); 5105 _clz(TLo4, TLo3); 5106 _addiu(TLo5, getZero(), 32); 5107 _subu(TLo6, TLo5, TLo4); 5108 _movn(THi6, TLo6, SrcLoR); 5109 _addiu(TResHi, getZero(), 0); 5110 _mov(DestHi, TResHi); 5111 _mov(DestLo, THi6); 5112 break; 5113 } 5114 default: 5115 llvm::report_fatal_error("Control flow should never have reached here."); 5116 } 5117 return; 5118 } 5119 case Intrinsics::Fabs: { 5120 if (isScalarFloatingType(DestTy)) { 5121 Variable *T = makeReg(DestTy); 5122 if (DestTy == IceType_f32) { 5123 _abs_s(T, legalizeToReg(Instr->getArg(0))); 5124 } else { 5125 _abs_d(T, legalizeToReg(Instr->getArg(0))); 5126 } 5127 _mov(Dest, T); 5128 } 5129 return; 5130 } 5131 case Intrinsics::Longjmp: { 5132 llvm::report_fatal_error("longjmp should have been prelowered."); 5133 return; 5134 } 5135 case Intrinsics::Memcpy: { 5136 llvm::report_fatal_error("memcpy should have been prelowered."); 5137 return; 5138 } 5139 case Intrinsics::Memmove: { 5140 llvm::report_fatal_error("memmove should have been prelowered."); 5141 return; 5142 } 5143 case Intrinsics::Memset: { 5144 llvm::report_fatal_error("memset should have been prelowered."); 5145 return; 5146 } 5147 case Intrinsics::NaClReadTP: { 5148 if (SandboxingType != ST_NaCl) 5149 llvm::report_fatal_error("nacl-read-tp should have been prelowered."); 5150 else { 5151 auto *T8 = makeReg(IceType_i32, RegMIPS32::Reg_T8); 5152 Context.insert<InstFakeDef>(T8); 5153 Variable *TP = legalizeToReg(OperandMIPS32Mem::create( 5154 Func, getPointerType(), T8, 5155 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)))); 5156 _mov(Dest, TP); 5157 } 5158 return; 5159 } 5160 case Intrinsics::Setjmp: { 5161 llvm::report_fatal_error("setjmp should have been prelowered."); 5162 return; 5163 } 5164 case Intrinsics::Sqrt: { 5165 if (isScalarFloatingType(DestTy)) { 5166 Variable *T = makeReg(DestTy); 5167 if (DestTy == IceType_f32) { 5168 _sqrt_s(T, legalizeToReg(Instr->getArg(0))); 5169 } else { 5170 _sqrt_d(T, legalizeToReg(Instr->getArg(0))); 5171 } 5172 _mov(Dest, T); 5173 } else { 5174 assert(getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl); 5175 UnimplementedLoweringError(this, Instr); // Not required for PNaCl 5176 } 5177 return; 5178 } 5179 case Intrinsics::Stacksave: { 5180 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP); 5181 _mov(Dest, SP); 5182 return; 5183 } 5184 case Intrinsics::Stackrestore: { 5185 Variable *Val = legalizeToReg(Instr->getArg(0)); 5186 Sandboxer(this).reset_sp(Val); 5187 return; 5188 } 5189 case Intrinsics::Trap: { 5190 const uint32_t TrapCodeZero = 0; 5191 _teq(getZero(), getZero(), TrapCodeZero); 5192 return; 5193 } 5194 case Intrinsics::LoadSubVector: { 5195 UnimplementedLoweringError(this, Instr); // Not required for PNaCl 5196 return; 5197 } 5198 case Intrinsics::StoreSubVector: { 5199 UnimplementedLoweringError(this, Instr); // Not required for PNaCl 5200 return; 5201 } 5202 default: // UnknownIntrinsic 5203 Func->setError("Unexpected intrinsic"); 5204 return; 5205 } 5206 return; 5207 } 5208 5209 void TargetMIPS32::lowerLoad(const InstLoad *Instr) { 5210 // A Load instruction can be treated the same as an Assign instruction, after 5211 // the source operand is transformed into an OperandMIPS32Mem operand. 5212 Type Ty = Instr->getDest()->getType(); 5213 Operand *Src0 = formMemoryOperand(Instr->getSourceAddress(), Ty); 5214 Variable *DestLoad = Instr->getDest(); 5215 auto *Assign = InstAssign::create(Func, DestLoad, Src0); 5216 lowerAssign(Assign); 5217 } 5218 5219 namespace { 5220 void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset, 5221 const Inst *Reason) { 5222 if (!BuildDefs::dump()) 5223 return; 5224 if (!Func->isVerbose(IceV_AddrOpt)) 5225 return; 5226 OstreamLocker _(Func->getContext()); 5227 Ostream &Str = Func->getContext()->getStrDump(); 5228 Str << "Instruction: "; 5229 Reason->dumpDecorated(Func); 5230 Str << " results in Base="; 5231 if (Base) 5232 Base->dump(Func); 5233 else 5234 Str << "<null>"; 5235 Str << ", Offset=" << Offset << "\n"; 5236 } 5237 5238 bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var, 5239 int32_t *Offset, const Inst **Reason) { 5240 // Var originates from Var=SrcVar ==> set Var:=SrcVar 5241 if (*Var == nullptr) 5242 return false; 5243 const Inst *VarAssign = VMetadata->getSingleDefinition(*Var); 5244 if (!VarAssign) 5245 return false; 5246 assert(!VMetadata->isMultiDef(*Var)); 5247 if (!llvm::isa<InstAssign>(VarAssign)) 5248 return false; 5249 5250 Operand *SrcOp = VarAssign->getSrc(0); 5251 bool Optimized = false; 5252 if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) { 5253 if (!VMetadata->isMultiDef(SrcVar) || 5254 // TODO: ensure SrcVar stays single-BB 5255 false) { 5256 Optimized = true; 5257 *Var = SrcVar; 5258 } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) { 5259 int32_t MoreOffset = Const->getValue(); 5260 int32_t NewOffset = MoreOffset + *Offset; 5261 if (Utils::WouldOverflowAdd(*Offset, MoreOffset)) 5262 return false; 5263 *Var = nullptr; 5264 *Offset += NewOffset; 5265 Optimized = true; 5266 } 5267 } 5268 5269 if (Optimized) { 5270 *Reason = VarAssign; 5271 } 5272 5273 return Optimized; 5274 } 5275 5276 bool isAddOrSub(const Inst *Instr, InstArithmetic::OpKind *Kind) { 5277 if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) { 5278 switch (Arith->getOp()) { 5279 default: 5280 return false; 5281 case InstArithmetic::Add: 5282 case InstArithmetic::Sub: 5283 *Kind = Arith->getOp(); 5284 return true; 5285 } 5286 } 5287 return false; 5288 } 5289 5290 bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base, 5291 int32_t *Offset, const Inst **Reason) { 5292 // Base is Base=Var+Const || Base is Base=Const+Var ==> 5293 // set Base=Var, Offset+=Const 5294 // Base is Base=Var-Const ==> 5295 // set Base=Var, Offset-=Const 5296 if (*Base == nullptr) 5297 return false; 5298 const Inst *BaseInst = VMetadata->getSingleDefinition(*Base); 5299 if (BaseInst == nullptr) { 5300 return false; 5301 } 5302 assert(!VMetadata->isMultiDef(*Base)); 5303 5304 auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst); 5305 if (ArithInst == nullptr) 5306 return false; 5307 InstArithmetic::OpKind Kind; 5308 if (!isAddOrSub(ArithInst, &Kind)) 5309 return false; 5310 bool IsAdd = Kind == InstArithmetic::Add; 5311 Operand *Src0 = ArithInst->getSrc(0); 5312 Operand *Src1 = ArithInst->getSrc(1); 5313 auto *Var0 = llvm::dyn_cast<Variable>(Src0); 5314 auto *Var1 = llvm::dyn_cast<Variable>(Src1); 5315 auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0); 5316 auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1); 5317 Variable *NewBase = nullptr; 5318 int32_t NewOffset = *Offset; 5319 5320 if (Var0 == nullptr && Const0 == nullptr) { 5321 assert(llvm::isa<ConstantRelocatable>(Src0)); 5322 return false; 5323 } 5324 5325 if (Var1 == nullptr && Const1 == nullptr) { 5326 assert(llvm::isa<ConstantRelocatable>(Src1)); 5327 return false; 5328 } 5329 5330 if (Var0 && Var1) 5331 // TODO(jpp): merge base/index splitting into here. 5332 return false; 5333 if (!IsAdd && Var1) 5334 return false; 5335 if (Var0) 5336 NewBase = Var0; 5337 else if (Var1) 5338 NewBase = Var1; 5339 // Compute the updated constant offset. 5340 if (Const0) { 5341 int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue(); 5342 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset)) 5343 return false; 5344 NewOffset += MoreOffset; 5345 } 5346 if (Const1) { 5347 int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue(); 5348 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset)) 5349 return false; 5350 NewOffset += MoreOffset; 5351 } 5352 5353 // Update the computed address parameters once we are sure optimization 5354 // is valid. 5355 *Base = NewBase; 5356 *Offset = NewOffset; 5357 *Reason = BaseInst; 5358 return true; 5359 } 5360 } // end of anonymous namespace 5361 5362 OperandMIPS32Mem *TargetMIPS32::formAddressingMode(Type Ty, Cfg *Func, 5363 const Inst *LdSt, 5364 Operand *Base) { 5365 assert(Base != nullptr); 5366 int32_t OffsetImm = 0; 5367 5368 Func->resetCurrentNode(); 5369 if (Func->isVerbose(IceV_AddrOpt)) { 5370 OstreamLocker _(Func->getContext()); 5371 Ostream &Str = Func->getContext()->getStrDump(); 5372 Str << "\nAddress mode formation:\t"; 5373 LdSt->dumpDecorated(Func); 5374 } 5375 5376 if (isVectorType(Ty)) { 5377 return nullptr; 5378 } 5379 5380 auto *BaseVar = llvm::dyn_cast<Variable>(Base); 5381 if (BaseVar == nullptr) 5382 return nullptr; 5383 5384 const VariablesMetadata *VMetadata = Func->getVMetadata(); 5385 const Inst *Reason = nullptr; 5386 5387 do { 5388 if (Reason != nullptr) { 5389 dumpAddressOpt(Func, BaseVar, OffsetImm, Reason); 5390 Reason = nullptr; 5391 } 5392 5393 if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) { 5394 continue; 5395 } 5396 5397 if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) { 5398 continue; 5399 } 5400 } while (Reason); 5401 5402 if (BaseVar == nullptr) { 5403 // We need base register rather than just OffsetImm. Move the OffsetImm to 5404 // BaseVar and form 0(BaseVar) addressing. 5405 const Type PointerType = getPointerType(); 5406 BaseVar = makeReg(PointerType); 5407 Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm)); 5408 OffsetImm = 0; 5409 } else if (OffsetImm != 0) { 5410 // If the OffsetImm is more than signed 16-bit value then add it in the 5411 // BaseVar and form 0(BaseVar) addressing. 5412 const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm; 5413 const InstArithmetic::OpKind Op = 5414 OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub; 5415 constexpr bool ZeroExt = false; 5416 if (!OperandMIPS32Mem::canHoldOffset(Ty, ZeroExt, OffsetImm)) { 5417 const Type PointerType = getPointerType(); 5418 Variable *T = makeReg(PointerType); 5419 Context.insert<InstArithmetic>(Op, T, BaseVar, 5420 Ctx->getConstantInt32(PositiveOffset)); 5421 BaseVar = T; 5422 OffsetImm = 0; 5423 } 5424 } 5425 5426 assert(BaseVar != nullptr); 5427 assert(OffsetImm < 0 ? (-OffsetImm & 0x0000ffff) == -OffsetImm 5428 : (OffsetImm & 0x0000ffff) == OffsetImm); 5429 5430 return OperandMIPS32Mem::create( 5431 Func, Ty, BaseVar, 5432 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm))); 5433 } 5434 5435 void TargetMIPS32::doAddressOptLoad() { 5436 Inst *Instr = iteratorToInst(Context.getCur()); 5437 assert(llvm::isa<InstLoad>(Instr)); 5438 Variable *Dest = Instr->getDest(); 5439 Operand *Addr = Instr->getSrc(0); 5440 if (OperandMIPS32Mem *Mem = 5441 formAddressingMode(Dest->getType(), Func, Instr, Addr)) { 5442 Instr->setDeleted(); 5443 Context.insert<InstLoad>(Dest, Mem); 5444 } 5445 } 5446 5447 void TargetMIPS32::randomlyInsertNop(float Probability, 5448 RandomNumberGenerator &RNG) { 5449 RandomNumberGeneratorWrapper RNGW(RNG); 5450 if (RNGW.getTrueWithProbability(Probability)) { 5451 _nop(); 5452 } 5453 } 5454 5455 void TargetMIPS32::lowerPhi(const InstPhi * /*Instr*/) { 5456 Func->setError("Phi found in regular instruction list"); 5457 } 5458 5459 void TargetMIPS32::lowerRet(const InstRet *Instr) { 5460 Variable *Reg = nullptr; 5461 if (Instr->hasRetValue()) { 5462 Operand *Src0 = Instr->getRetValue(); 5463 switch (Src0->getType()) { 5464 case IceType_f32: { 5465 Operand *Src0F = legalizeToReg(Src0); 5466 Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_F0); 5467 _mov(Reg, Src0F); 5468 break; 5469 } 5470 case IceType_f64: { 5471 Operand *Src0F = legalizeToReg(Src0); 5472 Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_F0F1); 5473 _mov(Reg, Src0F); 5474 break; 5475 } 5476 case IceType_i1: 5477 case IceType_i8: 5478 case IceType_i16: 5479 case IceType_i32: { 5480 Operand *Src0F = legalizeToReg(Src0); 5481 Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_V0); 5482 _mov(Reg, Src0F); 5483 break; 5484 } 5485 case IceType_i64: { 5486 Src0 = legalizeUndef(Src0); 5487 Variable *R0 = legalizeToReg(loOperand(Src0), RegMIPS32::Reg_V0); 5488 Variable *R1 = legalizeToReg(hiOperand(Src0), RegMIPS32::Reg_V1); 5489 Reg = R0; 5490 Context.insert<InstFakeUse>(R1); 5491 break; 5492 } 5493 case IceType_v4i1: 5494 case IceType_v8i1: 5495 case IceType_v16i1: 5496 case IceType_v16i8: 5497 case IceType_v8i16: 5498 case IceType_v4i32: { 5499 auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(legalizeUndef(Src0)); 5500 Variable *V0 = 5501 legalizeToReg(SrcVec->getContainers()[0], RegMIPS32::Reg_V0); 5502 Variable *V1 = 5503 legalizeToReg(SrcVec->getContainers()[1], RegMIPS32::Reg_V1); 5504 Variable *A0 = 5505 legalizeToReg(SrcVec->getContainers()[2], RegMIPS32::Reg_A0); 5506 Variable *A1 = 5507 legalizeToReg(SrcVec->getContainers()[3], RegMIPS32::Reg_A1); 5508 Reg = V0; 5509 Context.insert<InstFakeUse>(V1); 5510 Context.insert<InstFakeUse>(A0); 5511 Context.insert<InstFakeUse>(A1); 5512 break; 5513 } 5514 case IceType_v4f32: { 5515 auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(legalizeUndef(Src0)); 5516 Reg = getImplicitRet(); 5517 auto *RegT = legalizeToReg(Reg); 5518 // Return the vector through buffer in implicit argument a0 5519 for (SizeT i = 0; i < SrcVec->ContainersPerVector; ++i) { 5520 OperandMIPS32Mem *Mem = OperandMIPS32Mem::create( 5521 Func, IceType_f32, RegT, 5522 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4))); 5523 Variable *Var = legalizeToReg(SrcVec->getContainers()[i]); 5524 _sw(Var, Mem); 5525 } 5526 Variable *V0 = makeReg(IceType_i32, RegMIPS32::Reg_V0); 5527 _mov(V0, Reg); // move v0,a0 5528 Context.insert<InstFakeUse>(Reg); 5529 Context.insert<InstFakeUse>(V0); 5530 break; 5531 } 5532 default: 5533 llvm::report_fatal_error("Ret: Invalid type."); 5534 break; 5535 } 5536 } 5537 _ret(getPhysicalRegister(RegMIPS32::Reg_RA), Reg); 5538 } 5539 5540 void TargetMIPS32::lowerSelect(const InstSelect *Instr) { 5541 Variable *Dest = Instr->getDest(); 5542 const Type DestTy = Dest->getType(); 5543 5544 if (isVectorType(DestTy)) { 5545 llvm::report_fatal_error("Select: Destination type is vector"); 5546 return; 5547 } 5548 5549 Variable *DestR = nullptr; 5550 Variable *DestHiR = nullptr; 5551 Variable *SrcTR = nullptr; 5552 Variable *SrcTHiR = nullptr; 5553 Variable *SrcFR = nullptr; 5554 Variable *SrcFHiR = nullptr; 5555 5556 if (DestTy == IceType_i64) { 5557 DestR = llvm::cast<Variable>(loOperand(Dest)); 5558 DestHiR = llvm::cast<Variable>(hiOperand(Dest)); 5559 SrcTR = legalizeToReg(loOperand(legalizeUndef(Instr->getTrueOperand()))); 5560 SrcTHiR = legalizeToReg(hiOperand(legalizeUndef(Instr->getTrueOperand()))); 5561 SrcFR = legalizeToReg(loOperand(legalizeUndef(Instr->getFalseOperand()))); 5562 SrcFHiR = legalizeToReg(hiOperand(legalizeUndef(Instr->getFalseOperand()))); 5563 } else { 5564 SrcTR = legalizeToReg(legalizeUndef(Instr->getTrueOperand())); 5565 SrcFR = legalizeToReg(legalizeUndef(Instr->getFalseOperand())); 5566 } 5567 5568 Variable *ConditionR = legalizeToReg(Instr->getCondition()); 5569 5570 assert(Instr->getCondition()->getType() == IceType_i1); 5571 5572 switch (DestTy) { 5573 case IceType_i1: 5574 case IceType_i8: 5575 case IceType_i16: 5576 case IceType_i32: 5577 _movn(SrcFR, SrcTR, ConditionR); 5578 _mov(Dest, SrcFR); 5579 break; 5580 case IceType_i64: 5581 _movn(SrcFR, SrcTR, ConditionR); 5582 _movn(SrcFHiR, SrcTHiR, ConditionR); 5583 _mov(DestR, SrcFR); 5584 _mov(DestHiR, SrcFHiR); 5585 break; 5586 case IceType_f32: 5587 _movn_s(SrcFR, SrcTR, ConditionR); 5588 _mov(Dest, SrcFR); 5589 break; 5590 case IceType_f64: 5591 _movn_d(SrcFR, SrcTR, ConditionR); 5592 _mov(Dest, SrcFR); 5593 break; 5594 default: 5595 llvm::report_fatal_error("Select: Invalid type."); 5596 } 5597 } 5598 5599 void TargetMIPS32::lowerShuffleVector(const InstShuffleVector *Instr) { 5600 UnimplementedLoweringError(this, Instr); 5601 } 5602 5603 void TargetMIPS32::lowerStore(const InstStore *Instr) { 5604 Operand *Value = Instr->getData(); 5605 Operand *Addr = Instr->getAddr(); 5606 OperandMIPS32Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); 5607 Type Ty = NewAddr->getType(); 5608 5609 if (Ty == IceType_i64) { 5610 Value = legalizeUndef(Value); 5611 Variable *ValueHi = legalizeToReg(hiOperand(Value)); 5612 Variable *ValueLo = legalizeToReg(loOperand(Value)); 5613 _sw(ValueHi, llvm::cast<OperandMIPS32Mem>(hiOperand(NewAddr))); 5614 _sw(ValueLo, llvm::cast<OperandMIPS32Mem>(loOperand(NewAddr))); 5615 } else if (isVectorType(Value->getType())) { 5616 auto *DataVec = llvm::dyn_cast<VariableVecOn32>(Value); 5617 for (SizeT i = 0; i < DataVec->ContainersPerVector; ++i) { 5618 auto *DCont = legalizeToReg(DataVec->getContainers()[i]); 5619 auto *MCont = llvm::cast<OperandMIPS32Mem>( 5620 getOperandAtIndex(NewAddr, IceType_i32, i)); 5621 _sw(DCont, MCont); 5622 } 5623 } else { 5624 Variable *ValueR = legalizeToReg(Value); 5625 _sw(ValueR, NewAddr); 5626 } 5627 } 5628 5629 void TargetMIPS32::doAddressOptStore() { 5630 Inst *Instr = iteratorToInst(Context.getCur()); 5631 assert(llvm::isa<InstStore>(Instr)); 5632 Operand *Src = Instr->getSrc(0); 5633 Operand *Addr = Instr->getSrc(1); 5634 if (OperandMIPS32Mem *Mem = 5635 formAddressingMode(Src->getType(), Func, Instr, Addr)) { 5636 Instr->setDeleted(); 5637 Context.insert<InstStore>(Src, Mem); 5638 } 5639 } 5640 5641 void TargetMIPS32::lowerSwitch(const InstSwitch *Instr) { 5642 Operand *Src = Instr->getComparison(); 5643 SizeT NumCases = Instr->getNumCases(); 5644 if (Src->getType() == IceType_i64) { 5645 Src = legalizeUndef(Src); 5646 Variable *Src0Lo = legalizeToReg(loOperand(Src)); 5647 Variable *Src0Hi = legalizeToReg(hiOperand(Src)); 5648 for (SizeT I = 0; I < NumCases; ++I) { 5649 Operand *ValueLo = Ctx->getConstantInt32(Instr->getValue(I)); 5650 Operand *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32); 5651 CfgNode *TargetTrue = Instr->getLabel(I); 5652 constexpr CfgNode *NoTarget = nullptr; 5653 ValueHi = legalizeToReg(ValueHi); 5654 InstMIPS32Label *IntraLabel = InstMIPS32Label::create(Func, this); 5655 _br(NoTarget, NoTarget, Src0Hi, ValueHi, IntraLabel, 5656 CondMIPS32::Cond::NE); 5657 ValueLo = legalizeToReg(ValueLo); 5658 _br(NoTarget, TargetTrue, Src0Lo, ValueLo, CondMIPS32::Cond::EQ); 5659 Context.insert(IntraLabel); 5660 } 5661 _br(Instr->getLabelDefault()); 5662 return; 5663 } 5664 Variable *SrcVar = legalizeToReg(Src); 5665 assert(SrcVar->mustHaveReg()); 5666 for (SizeT I = 0; I < NumCases; ++I) { 5667 Operand *Value = Ctx->getConstantInt32(Instr->getValue(I)); 5668 CfgNode *TargetTrue = Instr->getLabel(I); 5669 constexpr CfgNode *NoTargetFalse = nullptr; 5670 Value = legalizeToReg(Value); 5671 _br(NoTargetFalse, TargetTrue, SrcVar, Value, CondMIPS32::Cond::EQ); 5672 } 5673 _br(Instr->getLabelDefault()); 5674 } 5675 5676 void TargetMIPS32::lowerBreakpoint(const InstBreakpoint *Instr) { 5677 UnimplementedLoweringError(this, Instr); 5678 } 5679 5680 void TargetMIPS32::lowerUnreachable(const InstUnreachable *) { 5681 const uint32_t TrapCodeZero = 0; 5682 _teq(getZero(), getZero(), TrapCodeZero); 5683 } 5684 5685 void TargetMIPS32::lowerOther(const Inst *Instr) { 5686 if (llvm::isa<InstMIPS32Sync>(Instr)) { 5687 _sync(); 5688 } else { 5689 TargetLowering::lowerOther(Instr); 5690 } 5691 } 5692 5693 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve 5694 // integrity of liveness analysis. Undef values are also turned into zeroes, 5695 // since loOperand() and hiOperand() don't expect Undef input. 5696 void TargetMIPS32::prelowerPhis() { 5697 PhiLowering::prelowerPhis32Bit<TargetMIPS32>(this, Context.getNode(), Func); 5698 } 5699 5700 void TargetMIPS32::postLower() { 5701 if (Func->getOptLevel() == Opt_m1) 5702 return; 5703 markRedefinitions(); 5704 Context.availabilityUpdate(); 5705 } 5706 5707 void TargetMIPS32::makeRandomRegisterPermutation( 5708 llvm::SmallVectorImpl<RegNumT> &Permutation, 5709 const SmallBitVector &ExcludeRegisters, uint64_t Salt) const { 5710 (void)Permutation; 5711 (void)ExcludeRegisters; 5712 (void)Salt; 5713 UnimplementedError(getFlags()); 5714 } 5715 5716 /* TODO(jvoung): avoid duplicate symbols with multiple targets. 5717 void ConstantUndef::emitWithoutDollar(GlobalContext *) const { 5718 llvm_unreachable("Not expecting to emitWithoutDollar undef"); 5719 } 5720 5721 void ConstantUndef::emit(GlobalContext *) const { 5722 llvm_unreachable("undef value encountered by emitter."); 5723 } 5724 */ 5725 5726 TargetDataMIPS32::TargetDataMIPS32(GlobalContext *Ctx) 5727 : TargetDataLowering(Ctx) {} 5728 5729 // Generate .MIPS.abiflags section. This section contains a versioned data 5730 // structure with essential information required for loader to determine the 5731 // requirements of the application. 5732 void TargetDataMIPS32::emitTargetRODataSections() { 5733 struct MipsABIFlagsSection Flags; 5734 ELFObjectWriter *Writer = Ctx->getObjectWriter(); 5735 const std::string Name = ".MIPS.abiflags"; 5736 const llvm::ELF::Elf64_Word ShType = llvm::ELF::SHT_MIPS_ABIFLAGS; 5737 const llvm::ELF::Elf64_Xword ShFlags = llvm::ELF::SHF_ALLOC; 5738 const llvm::ELF::Elf64_Xword ShAddralign = 8; 5739 const llvm::ELF::Elf64_Xword ShEntsize = sizeof(Flags); 5740 Writer->writeTargetRODataSection( 5741 Name, ShType, ShFlags, ShAddralign, ShEntsize, 5742 llvm::StringRef(reinterpret_cast<const char *>(&Flags), sizeof(Flags))); 5743 } 5744 5745 void TargetDataMIPS32::lowerGlobals(const VariableDeclarationList &Vars, 5746 const std::string &SectionSuffix) { 5747 const bool IsPIC = getFlags().getUseNonsfi(); 5748 switch (getFlags().getOutFileType()) { 5749 case FT_Elf: { 5750 ELFObjectWriter *Writer = Ctx->getObjectWriter(); 5751 Writer->writeDataSection(Vars, llvm::ELF::R_MIPS_32, SectionSuffix, IsPIC); 5752 } break; 5753 case FT_Asm: 5754 case FT_Iasm: { 5755 OstreamLocker L(Ctx); 5756 for (const VariableDeclaration *Var : Vars) { 5757 if (getFlags().matchTranslateOnly(Var->getName(), 0)) { 5758 emitGlobal(*Var, SectionSuffix); 5759 } 5760 } 5761 } break; 5762 } 5763 } 5764 5765 namespace { 5766 template <typename T> struct ConstantPoolEmitterTraits; 5767 5768 static_assert(sizeof(uint64_t) == 8, 5769 "uint64_t is supposed to be 8 bytes wide."); 5770 5771 // TODO(jaydeep.patil): implement the following when implementing constant 5772 // randomization: 5773 // * template <> struct ConstantPoolEmitterTraits<uint8_t> 5774 // * template <> struct ConstantPoolEmitterTraits<uint16_t> 5775 // * template <> struct ConstantPoolEmitterTraits<uint32_t> 5776 template <> struct ConstantPoolEmitterTraits<float> { 5777 using ConstantType = ConstantFloat; 5778 static constexpr Type IceType = IceType_f32; 5779 // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy 5780 // about them being constexpr. 5781 static const char AsmTag[]; 5782 static const char TypeName[]; 5783 static uint64_t bitcastToUint64(float Value) { 5784 static_assert(sizeof(Value) == sizeof(uint32_t), 5785 "Float should be 4 bytes."); 5786 const uint32_t IntValue = Utils::bitCopy<uint32_t>(Value); 5787 return static_cast<uint64_t>(IntValue); 5788 } 5789 }; 5790 const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".word"; 5791 const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32"; 5792 5793 template <> struct ConstantPoolEmitterTraits<double> { 5794 using ConstantType = ConstantDouble; 5795 static constexpr Type IceType = IceType_f64; 5796 static const char AsmTag[]; 5797 static const char TypeName[]; 5798 static uint64_t bitcastToUint64(double Value) { 5799 static_assert(sizeof(double) == sizeof(uint64_t), 5800 "Double should be 8 bytes."); 5801 return Utils::bitCopy<uint64_t>(Value); 5802 } 5803 }; 5804 const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad"; 5805 const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64"; 5806 5807 template <typename T> 5808 void emitConstant( 5809 Ostream &Str, 5810 const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) { 5811 if (!BuildDefs::dump()) 5812 return; 5813 using Traits = ConstantPoolEmitterTraits<T>; 5814 Str << Const->getLabelName(); 5815 T Value = Const->getValue(); 5816 Str << ":\n\t" << Traits::AsmTag << "\t0x"; 5817 Str.write_hex(Traits::bitcastToUint64(Value)); 5818 Str << "\t/* " << Traits::TypeName << " " << Value << " */\n"; 5819 } 5820 5821 template <typename T> void emitConstantPool(GlobalContext *Ctx) { 5822 if (!BuildDefs::dump()) 5823 return; 5824 using Traits = ConstantPoolEmitterTraits<T>; 5825 static constexpr size_t MinimumAlignment = 4; 5826 SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType)); 5827 assert((Align % 4) == 0 && "Constants should be aligned"); 5828 Ostream &Str = Ctx->getStrEmit(); 5829 ConstantList Pool = Ctx->getConstantPool(Traits::IceType); 5830 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align 5831 << "\n" 5832 << "\t.align\t" << (Align == 4 ? 2 : 3) << "\n"; 5833 if (getFlags().getReorderPooledConstants()) { 5834 // TODO(jaydeep.patil): add constant pooling. 5835 UnimplementedError(getFlags()); 5836 } 5837 for (Constant *C : Pool) { 5838 if (!C->getShouldBePooled()) { 5839 continue; 5840 } 5841 emitConstant<T>(Str, llvm::dyn_cast<typename Traits::ConstantType>(C)); 5842 } 5843 } 5844 } // end of anonymous namespace 5845 5846 void TargetDataMIPS32::lowerConstants() { 5847 if (getFlags().getDisableTranslation()) 5848 return; 5849 switch (getFlags().getOutFileType()) { 5850 case FT_Elf: { 5851 ELFObjectWriter *Writer = Ctx->getObjectWriter(); 5852 Writer->writeConstantPool<ConstantFloat>(IceType_f32); 5853 Writer->writeConstantPool<ConstantDouble>(IceType_f64); 5854 } break; 5855 case FT_Asm: 5856 case FT_Iasm: { 5857 OstreamLocker _(Ctx); 5858 emitConstantPool<float>(Ctx); 5859 emitConstantPool<double>(Ctx); 5860 break; 5861 } 5862 } 5863 } 5864 5865 void TargetDataMIPS32::lowerJumpTables() { 5866 if (getFlags().getDisableTranslation()) 5867 return; 5868 } 5869 5870 // Helper for legalize() to emit the right code to lower an operand to a 5871 // register of the appropriate type. 5872 Variable *TargetMIPS32::copyToReg(Operand *Src, RegNumT RegNum) { 5873 Type Ty = Src->getType(); 5874 Variable *Reg = makeReg(Ty, RegNum); 5875 if (isVectorType(Ty)) { 5876 llvm::report_fatal_error("Invalid copy from vector type."); 5877 } else { 5878 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Src)) { 5879 _lw(Reg, Mem); 5880 } else { 5881 _mov(Reg, Src); 5882 } 5883 } 5884 return Reg; 5885 } 5886 5887 Operand *TargetMIPS32::legalize(Operand *From, LegalMask Allowed, 5888 RegNumT RegNum) { 5889 Type Ty = From->getType(); 5890 // Assert that a physical register is allowed. To date, all calls 5891 // to legalize() allow a physical register. Legal_Flex converts 5892 // registers to the right type OperandMIPS32FlexReg as needed. 5893 assert(Allowed & Legal_Reg); 5894 5895 if (RegNum.hasNoValue()) { 5896 if (Variable *Subst = getContext().availabilityGet(From)) { 5897 // At this point we know there is a potential substitution available. 5898 if (!Subst->isRematerializable() && Subst->mustHaveReg() && 5899 !Subst->hasReg()) { 5900 // At this point we know the substitution will have a register. 5901 if (From->getType() == Subst->getType()) { 5902 // At this point we know the substitution's register is compatible. 5903 return Subst; 5904 } 5905 } 5906 } 5907 } 5908 5909 // Go through the various types of operands: 5910 // OperandMIPS32Mem, Constant, and Variable. 5911 // Given the above assertion, if type of operand is not legal 5912 // (e.g., OperandMIPS32Mem and !Legal_Mem), we can always copy 5913 // to a register. 5914 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(From)) { 5915 // Base must be in a physical register. 5916 Variable *Base = Mem->getBase(); 5917 ConstantInteger32 *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset()); 5918 Variable *RegBase = nullptr; 5919 assert(Base); 5920 5921 RegBase = llvm::cast<Variable>( 5922 legalize(Base, Legal_Reg | Legal_Rematerializable)); 5923 5924 if (Offset != nullptr && Offset->getValue() != 0) { 5925 static constexpr bool ZeroExt = false; 5926 if (!OperandMIPS32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) { 5927 llvm::report_fatal_error("Invalid memory offset."); 5928 } 5929 } 5930 5931 // Create a new operand if there was a change. 5932 if (Base != RegBase) { 5933 Mem = OperandMIPS32Mem::create(Func, Ty, RegBase, Offset, 5934 Mem->getAddrMode()); 5935 } 5936 5937 if (Allowed & Legal_Mem) { 5938 From = Mem; 5939 } else { 5940 Variable *Reg = makeReg(Ty, RegNum); 5941 _lw(Reg, Mem); 5942 From = Reg; 5943 } 5944 return From; 5945 } 5946 5947 if (llvm::isa<Constant>(From)) { 5948 if (llvm::isa<ConstantUndef>(From)) { 5949 From = legalizeUndef(From, RegNum); 5950 if (isVectorType(Ty)) 5951 return From; 5952 } 5953 if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { 5954 Variable *Reg = makeReg(Ty, RegNum); 5955 Variable *TReg = makeReg(Ty, RegNum); 5956 _lui(TReg, C, RO_Hi); 5957 _addiu(Reg, TReg, C, RO_Lo); 5958 return Reg; 5959 } else if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) { 5960 const uint32_t Value = C32->getValue(); 5961 // Use addiu if the immediate is a 16bit value. Otherwise load it 5962 // using a lui-ori instructions. 5963 Variable *Reg = makeReg(Ty, RegNum); 5964 if (isInt<16>(int32_t(Value))) { 5965 Variable *Zero = makeReg(Ty, RegMIPS32::Reg_ZERO); 5966 Context.insert<InstFakeDef>(Zero); 5967 _addiu(Reg, Zero, Value); 5968 } else { 5969 uint32_t UpperBits = (Value >> 16) & 0xFFFF; 5970 uint32_t LowerBits = Value & 0xFFFF; 5971 if (LowerBits) { 5972 Variable *TReg = makeReg(Ty, RegNum); 5973 _lui(TReg, Ctx->getConstantInt32(UpperBits)); 5974 _ori(Reg, TReg, LowerBits); 5975 } else { 5976 _lui(Reg, Ctx->getConstantInt32(UpperBits)); 5977 } 5978 } 5979 return Reg; 5980 } else if (isScalarFloatingType(Ty)) { 5981 auto *CFrom = llvm::cast<Constant>(From); 5982 Variable *TReg = makeReg(Ty); 5983 if (!CFrom->getShouldBePooled()) { 5984 // Float/Double constant 0 is not pooled. 5985 Context.insert<InstFakeDef>(TReg); 5986 _mov(TReg, getZero()); 5987 } else { 5988 // Load floats/doubles from literal pool. 5989 Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName()); 5990 Variable *TReg1 = makeReg(getPointerType()); 5991 _lui(TReg1, Offset, RO_Hi); 5992 OperandMIPS32Mem *Addr = 5993 OperandMIPS32Mem::create(Func, Ty, TReg1, Offset); 5994 if (Ty == IceType_f32) 5995 Sandboxer(this).lwc1(TReg, Addr, RO_Lo); 5996 else 5997 Sandboxer(this).ldc1(TReg, Addr, RO_Lo); 5998 } 5999 return copyToReg(TReg, RegNum); 6000 } 6001 } 6002 6003 if (auto *Var = llvm::dyn_cast<Variable>(From)) { 6004 if (Var->isRematerializable()) { 6005 if (Allowed & Legal_Rematerializable) { 6006 return From; 6007 } 6008 6009 Variable *T = makeReg(Var->getType(), RegNum); 6010 _mov(T, Var); 6011 return T; 6012 } 6013 // Check if the variable is guaranteed a physical register. This 6014 // can happen either when the variable is pre-colored or when it is 6015 // assigned infinite weight. 6016 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); 6017 // We need a new physical register for the operand if: 6018 // Mem is not allowed and Var isn't guaranteed a physical 6019 // register, or 6020 // RegNum is required and Var->getRegNum() doesn't match. 6021 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || 6022 (RegNum.hasValue() && RegNum != Var->getRegNum())) { 6023 From = copyToReg(From, RegNum); 6024 } 6025 return From; 6026 } 6027 return From; 6028 } 6029 6030 namespace BoolFolding { 6031 // TODO(sagar.thakur): Add remaining instruction kinds to shouldTrackProducer() 6032 // and isValidConsumer() 6033 bool shouldTrackProducer(const Inst &Instr) { 6034 return Instr.getKind() == Inst::Icmp; 6035 } 6036 6037 bool isValidConsumer(const Inst &Instr) { return Instr.getKind() == Inst::Br; } 6038 } // end of namespace BoolFolding 6039 6040 void TargetMIPS32::ComputationTracker::recordProducers(CfgNode *Node) { 6041 for (Inst &Instr : Node->getInsts()) { 6042 if (Instr.isDeleted()) 6043 continue; 6044 // Check whether Instr is a valid producer. 6045 Variable *Dest = Instr.getDest(); 6046 if (Dest // only consider instructions with an actual dest var; and 6047 && Dest->getType() == IceType_i1 // only bool-type dest vars; and 6048 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr. 6049 KnownComputations.emplace(Dest->getIndex(), 6050 ComputationEntry(&Instr, IceType_i1)); 6051 } 6052 // Check each src variable against the map. 6053 FOREACH_VAR_IN_INST(Var, Instr) { 6054 SizeT VarNum = Var->getIndex(); 6055 auto ComputationIter = KnownComputations.find(VarNum); 6056 if (ComputationIter == KnownComputations.end()) { 6057 continue; 6058 } 6059 6060 ++ComputationIter->second.NumUses; 6061 switch (ComputationIter->second.ComputationType) { 6062 default: 6063 KnownComputations.erase(VarNum); 6064 continue; 6065 case IceType_i1: 6066 if (!BoolFolding::isValidConsumer(Instr)) { 6067 KnownComputations.erase(VarNum); 6068 continue; 6069 } 6070 break; 6071 } 6072 6073 if (Instr.isLastUse(Var)) { 6074 ComputationIter->second.IsLiveOut = false; 6075 } 6076 } 6077 } 6078 6079 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end(); 6080 Iter != End;) { 6081 // Disable the folding if its dest may be live beyond this block. 6082 if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) { 6083 Iter = KnownComputations.erase(Iter); 6084 continue; 6085 } 6086 6087 // Mark as "dead" rather than outright deleting. This is so that other 6088 // peephole style optimizations during or before lowering have access to 6089 // this instruction in undeleted form. See for example 6090 // tryOptimizedCmpxchgCmpBr(). 6091 Iter->second.Instr->setDead(); 6092 ++Iter; 6093 } 6094 } 6095 6096 TargetHeaderMIPS32::TargetHeaderMIPS32(GlobalContext *Ctx) 6097 : TargetHeaderLowering(Ctx) {} 6098 6099 void TargetHeaderMIPS32::lower() { 6100 if (!BuildDefs::dump()) 6101 return; 6102 OstreamLocker L(Ctx); 6103 Ostream &Str = Ctx->getStrEmit(); 6104 Str << "\t.set\t" 6105 << "nomicromips\n"; 6106 Str << "\t.set\t" 6107 << "nomips16\n"; 6108 Str << "\t.set\t" 6109 << "noat\n"; 6110 if (getFlags().getUseSandboxing()) 6111 Str << "\t.bundle_align_mode 4\n"; 6112 } 6113 6114 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM]; 6115 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM]; 6116 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM]; 6117 6118 TargetMIPS32::Sandboxer::Sandboxer(TargetMIPS32 *Target, 6119 InstBundleLock::Option BundleOption) 6120 : Target(Target), BundleOption(BundleOption) {} 6121 6122 TargetMIPS32::Sandboxer::~Sandboxer() {} 6123 6124 void TargetMIPS32::Sandboxer::createAutoBundle() { 6125 Bundler = makeUnique<AutoBundle>(Target, BundleOption); 6126 } 6127 6128 void TargetMIPS32::Sandboxer::addiu_sp(uint32_t StackOffset) { 6129 Variable *SP = Target->getPhysicalRegister(RegMIPS32::Reg_SP); 6130 if (!Target->NeedSandboxing) { 6131 Target->_addiu(SP, SP, StackOffset); 6132 return; 6133 } 6134 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7); 6135 Target->Context.insert<InstFakeDef>(T7); 6136 createAutoBundle(); 6137 Target->_addiu(SP, SP, StackOffset); 6138 Target->_and(SP, SP, T7); 6139 } 6140 6141 void TargetMIPS32::Sandboxer::lw(Variable *Dest, OperandMIPS32Mem *Mem) { 6142 Variable *Base = Mem->getBase(); 6143 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum()) && 6144 (RegMIPS32::Reg_T8 != Base->getRegNum())) { 6145 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7); 6146 Target->Context.insert<InstFakeDef>(T7); 6147 createAutoBundle(); 6148 Target->_and(Base, Base, T7); 6149 } 6150 Target->_lw(Dest, Mem); 6151 if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) { 6152 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7); 6153 Target->Context.insert<InstFakeDef>(T7); 6154 Target->_and(Dest, Dest, T7); 6155 } 6156 } 6157 6158 void TargetMIPS32::Sandboxer::ll(Variable *Dest, OperandMIPS32Mem *Mem) { 6159 Variable *Base = Mem->getBase(); 6160 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) { 6161 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7); 6162 Target->Context.insert<InstFakeDef>(T7); 6163 createAutoBundle(); 6164 Target->_and(Base, Base, T7); 6165 } 6166 Target->_ll(Dest, Mem); 6167 if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) { 6168 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7); 6169 Target->Context.insert<InstFakeDef>(T7); 6170 Target->_and(Dest, Dest, T7); 6171 } 6172 } 6173 6174 void TargetMIPS32::Sandboxer::sc(Variable *Dest, OperandMIPS32Mem *Mem) { 6175 Variable *Base = Mem->getBase(); 6176 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) { 6177 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7); 6178 Target->Context.insert<InstFakeDef>(T7); 6179 createAutoBundle(); 6180 Target->_and(Base, Base, T7); 6181 } 6182 Target->_sc(Dest, Mem); 6183 } 6184 6185 void TargetMIPS32::Sandboxer::sw(Variable *Dest, OperandMIPS32Mem *Mem) { 6186 Variable *Base = Mem->getBase(); 6187 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) { 6188 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7); 6189 Target->Context.insert<InstFakeDef>(T7); 6190 createAutoBundle(); 6191 Target->_and(Base, Base, T7); 6192 } 6193 Target->_sw(Dest, Mem); 6194 } 6195 6196 void TargetMIPS32::Sandboxer::lwc1(Variable *Dest, OperandMIPS32Mem *Mem, 6197 RelocOp Reloc) { 6198 Variable *Base = Mem->getBase(); 6199 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) { 6200 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7); 6201 Target->Context.insert<InstFakeDef>(T7); 6202 createAutoBundle(); 6203 Target->_and(Base, Base, T7); 6204 } 6205 Target->_lwc1(Dest, Mem, Reloc); 6206 if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) { 6207 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7); 6208 Target->Context.insert<InstFakeDef>(T7); 6209 Target->_and(Dest, Dest, T7); 6210 } 6211 } 6212 6213 void TargetMIPS32::Sandboxer::ldc1(Variable *Dest, OperandMIPS32Mem *Mem, 6214 RelocOp Reloc) { 6215 Variable *Base = Mem->getBase(); 6216 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) { 6217 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7); 6218 Target->Context.insert<InstFakeDef>(T7); 6219 createAutoBundle(); 6220 Target->_and(Base, Base, T7); 6221 } 6222 Target->_ldc1(Dest, Mem, Reloc); 6223 if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) { 6224 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7); 6225 Target->Context.insert<InstFakeDef>(T7); 6226 Target->_and(Dest, Dest, T7); 6227 } 6228 } 6229 6230 void TargetMIPS32::Sandboxer::ret(Variable *RetAddr, Variable *RetValue) { 6231 if (!Target->NeedSandboxing) { 6232 Target->_ret(RetAddr, RetValue); 6233 } 6234 auto *T6 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T6); 6235 Target->Context.insert<InstFakeDef>(T6); 6236 createAutoBundle(); 6237 Target->_and(RetAddr, RetAddr, T6); 6238 Target->_ret(RetAddr, RetValue); 6239 } 6240 6241 void TargetMIPS32::Sandboxer::reset_sp(Variable *Src) { 6242 Variable *SP = Target->getPhysicalRegister(RegMIPS32::Reg_SP); 6243 if (!Target->NeedSandboxing) { 6244 Target->_mov(SP, Src); 6245 return; 6246 } 6247 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7); 6248 Target->Context.insert<InstFakeDef>(T7); 6249 createAutoBundle(); 6250 Target->_mov(SP, Src); 6251 Target->_and(SP, SP, T7); 6252 Target->getContext().insert<InstFakeUse>(SP); 6253 } 6254 6255 InstMIPS32Call *TargetMIPS32::Sandboxer::jal(Variable *ReturnReg, 6256 Operand *CallTarget) { 6257 if (Target->NeedSandboxing) { 6258 createAutoBundle(); 6259 if (auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget)) { 6260 auto *T6 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T6); 6261 Target->Context.insert<InstFakeDef>(T6); 6262 Target->_and(CallTargetR, CallTargetR, T6); 6263 } 6264 } 6265 return Target->Context.insert<InstMIPS32Call>(ReturnReg, CallTarget); 6266 } 6267 6268 } // end of namespace MIPS32 6269 } // end of namespace Ice 6270