1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that ARM uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-isel" 16 #include "ARMISelLowering.h" 17 #include "ARM.h" 18 #include "ARMCallingConv.h" 19 #include "ARMConstantPoolValue.h" 20 #include "ARMMachineFunctionInfo.h" 21 #include "ARMPerfectShuffle.h" 22 #include "ARMSubtarget.h" 23 #include "ARMTargetMachine.h" 24 #include "ARMTargetObjectFile.h" 25 #include "MCTargetDesc/ARMAddressingModes.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/ADT/StringExtras.h" 28 #include "llvm/CodeGen/CallingConvLower.h" 29 #include "llvm/CodeGen/IntrinsicLowering.h" 30 #include "llvm/CodeGen/MachineBasicBlock.h" 31 #include "llvm/CodeGen/MachineFrameInfo.h" 32 #include "llvm/CodeGen/MachineFunction.h" 33 #include "llvm/CodeGen/MachineInstrBuilder.h" 34 #include "llvm/CodeGen/MachineModuleInfo.h" 35 #include "llvm/CodeGen/MachineRegisterInfo.h" 36 #include "llvm/CodeGen/SelectionDAG.h" 37 #include "llvm/IR/CallingConv.h" 38 #include "llvm/IR/Constants.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/IR/GlobalValue.h" 41 #include "llvm/IR/Instruction.h" 42 #include "llvm/IR/Instructions.h" 43 #include "llvm/IR/Intrinsics.h" 44 #include "llvm/IR/Type.h" 45 #include "llvm/MC/MCSectionMachO.h" 46 #include "llvm/Support/CommandLine.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include "llvm/Support/MathExtras.h" 49 #include "llvm/Support/raw_ostream.h" 50 #include "llvm/Target/TargetOptions.h" 51 using namespace llvm; 52 53 STATISTIC(NumTailCalls, "Number of tail calls"); 54 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); 55 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); 56 57 // This option should go away when tail calls fully work. 58 static cl::opt<bool> 59 EnableARMTailCalls("arm-tail-calls", cl::Hidden, 60 cl::desc("Generate tail calls (TEMPORARY OPTION)."), 61 cl::init(false)); 62 63 cl::opt<bool> 64 EnableARMLongCalls("arm-long-calls", cl::Hidden, 65 cl::desc("Generate calls via indirect call instructions"), 66 cl::init(false)); 67 68 static cl::opt<bool> 69 ARMInterworking("arm-interworking", cl::Hidden, 70 cl::desc("Enable / disable ARM interworking (for debugging only)"), 71 cl::init(true)); 72 73 namespace { 74 class ARMCCState : public CCState { 75 public: 76 ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, 77 const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs, 78 LLVMContext &C, ParmContext PC) 79 : CCState(CC, isVarArg, MF, TM, locs, C) { 80 assert(((PC == Call) || (PC == Prologue)) && 81 "ARMCCState users must specify whether their context is call" 82 "or prologue generation."); 83 CallOrPrologue = PC; 84 } 85 }; 86 } 87 88 // The APCS parameter registers. 89 static const uint16_t GPRArgRegs[] = { 90 ARM::R0, ARM::R1, ARM::R2, ARM::R3 91 }; 92 93 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, 94 MVT PromotedBitwiseVT) { 95 if (VT != PromotedLdStVT) { 96 setOperationAction(ISD::LOAD, VT, Promote); 97 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); 98 99 setOperationAction(ISD::STORE, VT, Promote); 100 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); 101 } 102 103 MVT ElemTy = VT.getVectorElementType(); 104 if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 105 setOperationAction(ISD::SETCC, VT, Custom); 106 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 107 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 108 if (ElemTy == MVT::i32) { 109 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 110 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 111 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 112 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 113 } else { 114 setOperationAction(ISD::SINT_TO_FP, VT, Expand); 115 setOperationAction(ISD::UINT_TO_FP, VT, Expand); 116 setOperationAction(ISD::FP_TO_SINT, VT, Expand); 117 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 118 } 119 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 120 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 121 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); 122 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); 123 setOperationAction(ISD::SELECT, VT, Expand); 124 setOperationAction(ISD::SELECT_CC, VT, Expand); 125 setOperationAction(ISD::VSELECT, VT, Expand); 126 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 127 if (VT.isInteger()) { 128 setOperationAction(ISD::SHL, VT, Custom); 129 setOperationAction(ISD::SRA, VT, Custom); 130 setOperationAction(ISD::SRL, VT, Custom); 131 } 132 133 // Promote all bit-wise operations. 134 if (VT.isInteger() && VT != PromotedBitwiseVT) { 135 setOperationAction(ISD::AND, VT, Promote); 136 AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); 137 setOperationAction(ISD::OR, VT, Promote); 138 AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); 139 setOperationAction(ISD::XOR, VT, Promote); 140 AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); 141 } 142 143 // Neon does not support vector divide/remainder operations. 144 setOperationAction(ISD::SDIV, VT, Expand); 145 setOperationAction(ISD::UDIV, VT, Expand); 146 setOperationAction(ISD::FDIV, VT, Expand); 147 setOperationAction(ISD::SREM, VT, Expand); 148 setOperationAction(ISD::UREM, VT, Expand); 149 setOperationAction(ISD::FREM, VT, Expand); 150 } 151 152 void ARMTargetLowering::addDRTypeForNEON(MVT VT) { 153 addRegisterClass(VT, &ARM::DPRRegClass); 154 addTypeForNEON(VT, MVT::f64, MVT::v2i32); 155 } 156 157 void ARMTargetLowering::addQRTypeForNEON(MVT VT) { 158 addRegisterClass(VT, &ARM::QPRRegClass); 159 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 160 } 161 162 static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { 163 if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) 164 return new TargetLoweringObjectFileMachO(); 165 166 return new ARMElfTargetObjectFile(); 167 } 168 169 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 170 : TargetLowering(TM, createTLOF(TM)) { 171 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 172 RegInfo = TM.getRegisterInfo(); 173 Itins = TM.getInstrItineraryData(); 174 175 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 176 177 if (Subtarget->isTargetDarwin()) { 178 // Uses VFP for Thumb libfuncs if available. 179 if (Subtarget->isThumb() && Subtarget->hasVFP2()) { 180 // Single-precision floating-point arithmetic. 181 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 182 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 183 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 184 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 185 186 // Double-precision floating-point arithmetic. 187 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 188 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 189 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 190 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 191 192 // Single-precision comparisons. 193 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 194 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 195 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 196 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 197 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 198 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 199 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 200 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 201 202 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 203 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 204 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 205 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 206 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 207 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 208 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 209 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 210 211 // Double-precision comparisons. 212 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 213 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 214 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 215 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 216 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 217 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 218 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 219 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 220 221 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 222 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 223 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 224 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 225 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 226 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 227 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 228 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 229 230 // Floating-point to integer conversions. 231 // i64 conversions are done via library routines even when generating VFP 232 // instructions, so use the same ones. 233 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 234 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 235 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 236 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 237 238 // Conversions between floating types. 239 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 240 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 241 242 // Integer to floating-point conversions. 243 // i64 conversions are done via library routines even when generating VFP 244 // instructions, so use the same ones. 245 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 246 // e.g., __floatunsidf vs. __floatunssidfvfp. 247 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 248 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 249 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 250 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 251 } 252 } 253 254 // These libcalls are not available in 32-bit. 255 setLibcallName(RTLIB::SHL_I128, 0); 256 setLibcallName(RTLIB::SRL_I128, 0); 257 setLibcallName(RTLIB::SRA_I128, 0); 258 259 if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) { 260 // Double-precision floating-point arithmetic helper functions 261 // RTABI chapter 4.1.2, Table 2 262 setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); 263 setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); 264 setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); 265 setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); 266 setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); 267 setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); 268 setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); 269 setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); 270 271 // Double-precision floating-point comparison helper functions 272 // RTABI chapter 4.1.2, Table 3 273 setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); 274 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 275 setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); 276 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); 277 setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); 278 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 279 setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); 280 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 281 setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); 282 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 283 setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); 284 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 285 setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); 286 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 287 setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); 288 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 289 setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); 290 setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); 291 setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); 292 setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); 293 setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); 294 setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); 295 setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); 296 setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); 297 298 // Single-precision floating-point arithmetic helper functions 299 // RTABI chapter 4.1.2, Table 4 300 setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); 301 setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); 302 setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); 303 setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); 304 setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); 305 setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); 306 setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); 307 setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); 308 309 // Single-precision floating-point comparison helper functions 310 // RTABI chapter 4.1.2, Table 5 311 setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); 312 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 313 setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); 314 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); 315 setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); 316 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 317 setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); 318 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 319 setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); 320 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 321 setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); 322 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 323 setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); 324 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 325 setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); 326 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 327 setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); 328 setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); 329 setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); 330 setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); 331 setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); 332 setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); 333 setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); 334 setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); 335 336 // Floating-point to integer conversions. 337 // RTABI chapter 4.1.2, Table 6 338 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); 339 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); 340 setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); 341 setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); 342 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); 343 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); 344 setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); 345 setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); 346 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); 347 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); 348 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); 349 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); 350 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); 351 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); 352 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); 353 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); 354 355 // Conversions between floating types. 356 // RTABI chapter 4.1.2, Table 7 357 setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); 358 setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); 359 setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); 360 setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); 361 362 // Integer to floating-point conversions. 363 // RTABI chapter 4.1.2, Table 8 364 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); 365 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); 366 setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); 367 setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); 368 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); 369 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); 370 setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); 371 setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); 372 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 373 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 374 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 375 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 376 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 377 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 378 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 379 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 380 381 // Long long helper functions 382 // RTABI chapter 4.2, Table 9 383 setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); 384 setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); 385 setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); 386 setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); 387 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); 388 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 389 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 390 setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); 391 setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); 392 setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); 393 394 // Integer division functions 395 // RTABI chapter 4.3.1 396 setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); 397 setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); 398 setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); 399 setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); 400 setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); 401 setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); 402 setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); 403 setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); 404 setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); 405 setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); 406 setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); 407 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 408 setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); 409 setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); 410 setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); 411 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 412 413 // Memory operations 414 // RTABI chapter 4.3.4 415 setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); 416 setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); 417 setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); 418 setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS); 419 setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS); 420 setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS); 421 } 422 423 // Use divmod compiler-rt calls for iOS 5.0 and later. 424 if (Subtarget->getTargetTriple().getOS() == Triple::IOS && 425 !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { 426 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); 427 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); 428 } 429 430 if (Subtarget->isThumb1Only()) 431 addRegisterClass(MVT::i32, &ARM::tGPRRegClass); 432 else 433 addRegisterClass(MVT::i32, &ARM::GPRRegClass); 434 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 435 !Subtarget->isThumb1Only()) { 436 addRegisterClass(MVT::f32, &ARM::SPRRegClass); 437 if (!Subtarget->isFPOnlySP()) 438 addRegisterClass(MVT::f64, &ARM::DPRRegClass); 439 440 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 441 } 442 443 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 444 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 445 for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 446 InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) 447 setTruncStoreAction((MVT::SimpleValueType)VT, 448 (MVT::SimpleValueType)InnerVT, Expand); 449 setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); 450 setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); 451 setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); 452 } 453 454 setOperationAction(ISD::ConstantFP, MVT::f32, Custom); 455 456 if (Subtarget->hasNEON()) { 457 addDRTypeForNEON(MVT::v2f32); 458 addDRTypeForNEON(MVT::v8i8); 459 addDRTypeForNEON(MVT::v4i16); 460 addDRTypeForNEON(MVT::v2i32); 461 addDRTypeForNEON(MVT::v1i64); 462 463 addQRTypeForNEON(MVT::v4f32); 464 addQRTypeForNEON(MVT::v2f64); 465 addQRTypeForNEON(MVT::v16i8); 466 addQRTypeForNEON(MVT::v8i16); 467 addQRTypeForNEON(MVT::v4i32); 468 addQRTypeForNEON(MVT::v2i64); 469 470 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 471 // neither Neon nor VFP support any arithmetic operations on it. 472 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively 473 // supported for v4f32. 474 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 475 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 476 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 477 // FIXME: Code duplication: FDIV and FREM are expanded always, see 478 // ARMTargetLowering::addTypeForNEON method for details. 479 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 480 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 481 // FIXME: Create unittest. 482 // In another words, find a way when "copysign" appears in DAG with vector 483 // operands. 484 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 485 // FIXME: Code duplication: SETCC has custom operation action, see 486 // ARMTargetLowering::addTypeForNEON method for details. 487 setOperationAction(ISD::SETCC, MVT::v2f64, Expand); 488 // FIXME: Create unittest for FNEG and for FABS. 489 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 490 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 491 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 492 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 493 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 494 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 495 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 496 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 497 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 498 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 499 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 500 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 501 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. 502 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 503 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 504 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 505 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 506 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 507 setOperationAction(ISD::FMA, MVT::v2f64, Expand); 508 509 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); 510 setOperationAction(ISD::FSIN, MVT::v4f32, Expand); 511 setOperationAction(ISD::FCOS, MVT::v4f32, Expand); 512 setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); 513 setOperationAction(ISD::FPOW, MVT::v4f32, Expand); 514 setOperationAction(ISD::FLOG, MVT::v4f32, Expand); 515 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); 516 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); 517 setOperationAction(ISD::FEXP, MVT::v4f32, Expand); 518 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); 519 setOperationAction(ISD::FCEIL, MVT::v4f32, Expand); 520 setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand); 521 setOperationAction(ISD::FRINT, MVT::v4f32, Expand); 522 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); 523 setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); 524 525 // Mark v2f32 intrinsics. 526 setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); 527 setOperationAction(ISD::FSIN, MVT::v2f32, Expand); 528 setOperationAction(ISD::FCOS, MVT::v2f32, Expand); 529 setOperationAction(ISD::FPOWI, MVT::v2f32, Expand); 530 setOperationAction(ISD::FPOW, MVT::v2f32, Expand); 531 setOperationAction(ISD::FLOG, MVT::v2f32, Expand); 532 setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); 533 setOperationAction(ISD::FLOG10, MVT::v2f32, Expand); 534 setOperationAction(ISD::FEXP, MVT::v2f32, Expand); 535 setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); 536 setOperationAction(ISD::FCEIL, MVT::v2f32, Expand); 537 setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand); 538 setOperationAction(ISD::FRINT, MVT::v2f32, Expand); 539 setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand); 540 setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand); 541 542 // Neon does not support some operations on v1i64 and v2i64 types. 543 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 544 // Custom handling for some quad-vector types to detect VMULL. 545 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 546 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 547 setOperationAction(ISD::MUL, MVT::v2i64, Custom); 548 // Custom handling for some vector types to avoid expensive expansions 549 setOperationAction(ISD::SDIV, MVT::v4i16, Custom); 550 setOperationAction(ISD::SDIV, MVT::v8i8, Custom); 551 setOperationAction(ISD::UDIV, MVT::v4i16, Custom); 552 setOperationAction(ISD::UDIV, MVT::v8i8, Custom); 553 setOperationAction(ISD::SETCC, MVT::v1i64, Expand); 554 setOperationAction(ISD::SETCC, MVT::v2i64, Expand); 555 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with 556 // a destination type that is wider than the source, and nor does 557 // it have a FP_TO_[SU]INT instruction with a narrower destination than 558 // source. 559 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); 560 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); 561 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); 562 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); 563 564 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); 565 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); 566 567 // Custom expand long extensions to vectors. 568 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); 569 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); 570 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); 571 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); 572 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); 573 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); 574 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); 575 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); 576 577 // NEON does not have single instruction CTPOP for vectors with element 578 // types wider than 8-bits. However, custom lowering can leverage the 579 // v8i8/v16i8 vcnt instruction. 580 setOperationAction(ISD::CTPOP, MVT::v2i32, Custom); 581 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); 582 setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); 583 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); 584 585 // NEON only has FMA instructions as of VFP4. 586 if (!Subtarget->hasVFP4()) { 587 setOperationAction(ISD::FMA, MVT::v2f32, Expand); 588 setOperationAction(ISD::FMA, MVT::v4f32, Expand); 589 } 590 591 setTargetDAGCombine(ISD::INTRINSIC_VOID); 592 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 593 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 594 setTargetDAGCombine(ISD::SHL); 595 setTargetDAGCombine(ISD::SRL); 596 setTargetDAGCombine(ISD::SRA); 597 setTargetDAGCombine(ISD::SIGN_EXTEND); 598 setTargetDAGCombine(ISD::ZERO_EXTEND); 599 setTargetDAGCombine(ISD::ANY_EXTEND); 600 setTargetDAGCombine(ISD::SELECT_CC); 601 setTargetDAGCombine(ISD::BUILD_VECTOR); 602 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 603 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); 604 setTargetDAGCombine(ISD::STORE); 605 setTargetDAGCombine(ISD::FP_TO_SINT); 606 setTargetDAGCombine(ISD::FP_TO_UINT); 607 setTargetDAGCombine(ISD::FDIV); 608 609 // It is legal to extload from v4i8 to v4i16 or v4i32. 610 MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8, 611 MVT::v4i16, MVT::v2i16, 612 MVT::v2i32}; 613 for (unsigned i = 0; i < 6; ++i) { 614 setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal); 615 setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal); 616 setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal); 617 } 618 } 619 620 // ARM and Thumb2 support UMLAL/SMLAL. 621 if (!Subtarget->isThumb1Only()) 622 setTargetDAGCombine(ISD::ADDC); 623 624 625 computeRegisterProperties(); 626 627 // ARM does not have f32 extending load. 628 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 629 630 // ARM does not have i1 sign extending load. 631 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 632 633 // ARM supports all 4 flavors of integer indexed load / store. 634 if (!Subtarget->isThumb1Only()) { 635 for (unsigned im = (unsigned)ISD::PRE_INC; 636 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 637 setIndexedLoadAction(im, MVT::i1, Legal); 638 setIndexedLoadAction(im, MVT::i8, Legal); 639 setIndexedLoadAction(im, MVT::i16, Legal); 640 setIndexedLoadAction(im, MVT::i32, Legal); 641 setIndexedStoreAction(im, MVT::i1, Legal); 642 setIndexedStoreAction(im, MVT::i8, Legal); 643 setIndexedStoreAction(im, MVT::i16, Legal); 644 setIndexedStoreAction(im, MVT::i32, Legal); 645 } 646 } 647 648 // i64 operation support. 649 setOperationAction(ISD::MUL, MVT::i64, Expand); 650 setOperationAction(ISD::MULHU, MVT::i32, Expand); 651 if (Subtarget->isThumb1Only()) { 652 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 653 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 654 } 655 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() 656 || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP())) 657 setOperationAction(ISD::MULHS, MVT::i32, Expand); 658 659 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 660 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 661 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 662 setOperationAction(ISD::SRL, MVT::i64, Custom); 663 setOperationAction(ISD::SRA, MVT::i64, Custom); 664 665 if (!Subtarget->isThumb1Only()) { 666 // FIXME: We should do this for Thumb1 as well. 667 setOperationAction(ISD::ADDC, MVT::i32, Custom); 668 setOperationAction(ISD::ADDE, MVT::i32, Custom); 669 setOperationAction(ISD::SUBC, MVT::i32, Custom); 670 setOperationAction(ISD::SUBE, MVT::i32, Custom); 671 } 672 673 // ARM does not have ROTL. 674 setOperationAction(ISD::ROTL, MVT::i32, Expand); 675 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 676 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 677 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 678 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 679 680 // These just redirect to CTTZ and CTLZ on ARM. 681 setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); 682 setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); 683 684 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); 685 686 // Only ARMv6 has BSWAP. 687 if (!Subtarget->hasV6Ops()) 688 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 689 690 if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) && 691 !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) { 692 // These are expanded into libcalls if the cpu doesn't have HW divider. 693 setOperationAction(ISD::SDIV, MVT::i32, Expand); 694 setOperationAction(ISD::UDIV, MVT::i32, Expand); 695 } 696 697 // FIXME: Also set divmod for SREM on EABI 698 setOperationAction(ISD::SREM, MVT::i32, Expand); 699 setOperationAction(ISD::UREM, MVT::i32, Expand); 700 // Register based DivRem for AEABI (RTABI 4.2) 701 if (Subtarget->isTargetAEABI()) { 702 setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod"); 703 setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod"); 704 setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod"); 705 setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod"); 706 setLibcallName(RTLIB::UDIVREM_I8, "__aeabi_uidivmod"); 707 setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod"); 708 setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod"); 709 setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod"); 710 711 setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS); 712 setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS); 713 setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS); 714 setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS); 715 setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS); 716 setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS); 717 setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS); 718 setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS); 719 720 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 721 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 722 } else { 723 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 724 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 725 } 726 727 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 728 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 729 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 730 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 731 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 732 733 setOperationAction(ISD::TRAP, MVT::Other, Legal); 734 735 // Use the default implementation. 736 setOperationAction(ISD::VASTART, MVT::Other, Custom); 737 setOperationAction(ISD::VAARG, MVT::Other, Expand); 738 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 739 setOperationAction(ISD::VAEND, MVT::Other, Expand); 740 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 741 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 742 743 if (!Subtarget->isTargetDarwin()) { 744 // Non-Darwin platforms may return values in these registers via the 745 // personality function. 746 setExceptionPointerRegister(ARM::R0); 747 setExceptionSelectorRegister(ARM::R1); 748 } 749 750 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 751 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use 752 // the default expansion. 753 // FIXME: This should be checking for v6k, not just v6. 754 if (Subtarget->hasDataBarrier() || 755 (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { 756 // membarrier needs custom lowering; the rest are legal and handled 757 // normally. 758 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 759 // Custom lowering for 64-bit ops 760 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); 761 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); 762 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); 763 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); 764 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); 765 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); 766 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom); 767 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom); 768 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); 769 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); 770 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); 771 // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. 772 setInsertFencesForAtomic(true); 773 } else { 774 // Set them all for expansion, which will force libcalls. 775 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); 776 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); 777 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); 778 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); 779 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 780 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); 781 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); 782 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); 783 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); 784 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); 785 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); 786 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); 787 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); 788 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the 789 // Unordered/Monotonic case. 790 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); 791 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); 792 } 793 794 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 795 796 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. 797 if (!Subtarget->hasV6Ops()) { 798 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 799 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 800 } 801 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 802 803 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 804 !Subtarget->isThumb1Only()) { 805 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 806 // iff target supports vfp2. 807 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 808 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 809 } 810 811 // We want to custom lower some of our intrinsics. 812 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 813 if (Subtarget->isTargetDarwin()) { 814 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 815 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 816 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); 817 } 818 819 setOperationAction(ISD::SETCC, MVT::i32, Expand); 820 setOperationAction(ISD::SETCC, MVT::f32, Expand); 821 setOperationAction(ISD::SETCC, MVT::f64, Expand); 822 setOperationAction(ISD::SELECT, MVT::i32, Custom); 823 setOperationAction(ISD::SELECT, MVT::f32, Custom); 824 setOperationAction(ISD::SELECT, MVT::f64, Custom); 825 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 826 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 827 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 828 829 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 830 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 831 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 832 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 833 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 834 835 // We don't support sin/cos/fmod/copysign/pow 836 setOperationAction(ISD::FSIN, MVT::f64, Expand); 837 setOperationAction(ISD::FSIN, MVT::f32, Expand); 838 setOperationAction(ISD::FCOS, MVT::f32, Expand); 839 setOperationAction(ISD::FCOS, MVT::f64, Expand); 840 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 841 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 842 setOperationAction(ISD::FREM, MVT::f64, Expand); 843 setOperationAction(ISD::FREM, MVT::f32, Expand); 844 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 845 !Subtarget->isThumb1Only()) { 846 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 847 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 848 } 849 setOperationAction(ISD::FPOW, MVT::f64, Expand); 850 setOperationAction(ISD::FPOW, MVT::f32, Expand); 851 852 if (!Subtarget->hasVFP4()) { 853 setOperationAction(ISD::FMA, MVT::f64, Expand); 854 setOperationAction(ISD::FMA, MVT::f32, Expand); 855 } 856 857 // Various VFP goodness 858 if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { 859 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 860 if (Subtarget->hasVFP2()) { 861 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 862 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 863 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 864 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 865 } 866 // Special handling for half-precision FP. 867 if (!Subtarget->hasFP16()) { 868 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); 869 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); 870 } 871 } 872 873 // We have target-specific dag combine patterns for the following nodes: 874 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 875 setTargetDAGCombine(ISD::ADD); 876 setTargetDAGCombine(ISD::SUB); 877 setTargetDAGCombine(ISD::MUL); 878 setTargetDAGCombine(ISD::AND); 879 setTargetDAGCombine(ISD::OR); 880 setTargetDAGCombine(ISD::XOR); 881 882 if (Subtarget->hasV6Ops()) 883 setTargetDAGCombine(ISD::SRL); 884 885 setStackPointerRegisterToSaveRestore(ARM::SP); 886 887 if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() || 888 !Subtarget->hasVFP2()) 889 setSchedulingPreference(Sched::RegPressure); 890 else 891 setSchedulingPreference(Sched::Hybrid); 892 893 //// temporary - rewrite interface to use type 894 MaxStoresPerMemset = 8; 895 MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; 896 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores 897 MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2; 898 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores 899 MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2; 900 901 // On ARM arguments smaller than 4 bytes are extended, so all arguments 902 // are at least 4 bytes aligned. 903 setMinStackArgumentAlignment(4); 904 905 // Prefer likely predicted branches to selects on out-of-order cores. 906 PredictableSelectIsExpensive = Subtarget->isLikeA9(); 907 908 setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); 909 } 910 911 // FIXME: It might make sense to define the representative register class as the 912 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is 913 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, 914 // SPR's representative would be DPR_VFP2. This should work well if register 915 // pressure tracking were modified such that a register use would increment the 916 // pressure of the register class's representative and all of it's super 917 // classes' representatives transitively. We have not implemented this because 918 // of the difficulty prior to coalescing of modeling operand register classes 919 // due to the common occurrence of cross class copies and subregister insertions 920 // and extractions. 921 std::pair<const TargetRegisterClass*, uint8_t> 922 ARMTargetLowering::findRepresentativeClass(MVT VT) const{ 923 const TargetRegisterClass *RRC = 0; 924 uint8_t Cost = 1; 925 switch (VT.SimpleTy) { 926 default: 927 return TargetLowering::findRepresentativeClass(VT); 928 // Use DPR as representative register class for all floating point 929 // and vector types. Since there are 32 SPR registers and 32 DPR registers so 930 // the cost is 1 for both f32 and f64. 931 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: 932 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: 933 RRC = &ARM::DPRRegClass; 934 // When NEON is used for SP, only half of the register file is available 935 // because operations that define both SP and DP results will be constrained 936 // to the VFP2 class (D0-D15). We currently model this constraint prior to 937 // coalescing by double-counting the SP regs. See the FIXME above. 938 if (Subtarget->useNEONForSinglePrecisionFP()) 939 Cost = 2; 940 break; 941 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 942 case MVT::v4f32: case MVT::v2f64: 943 RRC = &ARM::DPRRegClass; 944 Cost = 2; 945 break; 946 case MVT::v4i64: 947 RRC = &ARM::DPRRegClass; 948 Cost = 4; 949 break; 950 case MVT::v8i64: 951 RRC = &ARM::DPRRegClass; 952 Cost = 8; 953 break; 954 } 955 return std::make_pair(RRC, Cost); 956 } 957 958 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 959 switch (Opcode) { 960 default: return 0; 961 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 962 case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN"; 963 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; 964 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 965 case ARMISD::CALL: return "ARMISD::CALL"; 966 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 967 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 968 case ARMISD::tCALL: return "ARMISD::tCALL"; 969 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 970 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 971 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 972 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 973 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 974 case ARMISD::CMP: return "ARMISD::CMP"; 975 case ARMISD::CMN: return "ARMISD::CMN"; 976 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 977 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 978 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 979 case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; 980 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 981 982 case ARMISD::CMOV: return "ARMISD::CMOV"; 983 984 case ARMISD::RBIT: return "ARMISD::RBIT"; 985 986 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 987 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 988 case ARMISD::SITOF: return "ARMISD::SITOF"; 989 case ARMISD::UITOF: return "ARMISD::UITOF"; 990 991 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 992 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 993 case ARMISD::RRX: return "ARMISD::RRX"; 994 995 case ARMISD::ADDC: return "ARMISD::ADDC"; 996 case ARMISD::ADDE: return "ARMISD::ADDE"; 997 case ARMISD::SUBC: return "ARMISD::SUBC"; 998 case ARMISD::SUBE: return "ARMISD::SUBE"; 999 1000 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 1001 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 1002 1003 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 1004 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 1005 1006 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; 1007 1008 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 1009 1010 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 1011 1012 case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; 1013 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; 1014 1015 case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; 1016 1017 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 1018 case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; 1019 case ARMISD::VCGE: return "ARMISD::VCGE"; 1020 case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; 1021 case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; 1022 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 1023 case ARMISD::VCGT: return "ARMISD::VCGT"; 1024 case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; 1025 case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; 1026 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 1027 case ARMISD::VTST: return "ARMISD::VTST"; 1028 1029 case ARMISD::VSHL: return "ARMISD::VSHL"; 1030 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 1031 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 1032 case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; 1033 case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; 1034 case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; 1035 case ARMISD::VSHRN: return "ARMISD::VSHRN"; 1036 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 1037 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 1038 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 1039 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 1040 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 1041 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 1042 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 1043 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 1044 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 1045 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 1046 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 1047 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 1048 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 1049 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 1050 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; 1051 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; 1052 case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM"; 1053 case ARMISD::VDUP: return "ARMISD::VDUP"; 1054 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 1055 case ARMISD::VEXT: return "ARMISD::VEXT"; 1056 case ARMISD::VREV64: return "ARMISD::VREV64"; 1057 case ARMISD::VREV32: return "ARMISD::VREV32"; 1058 case ARMISD::VREV16: return "ARMISD::VREV16"; 1059 case ARMISD::VZIP: return "ARMISD::VZIP"; 1060 case ARMISD::VUZP: return "ARMISD::VUZP"; 1061 case ARMISD::VTRN: return "ARMISD::VTRN"; 1062 case ARMISD::VTBL1: return "ARMISD::VTBL1"; 1063 case ARMISD::VTBL2: return "ARMISD::VTBL2"; 1064 case ARMISD::VMULLs: return "ARMISD::VMULLs"; 1065 case ARMISD::VMULLu: return "ARMISD::VMULLu"; 1066 case ARMISD::UMLAL: return "ARMISD::UMLAL"; 1067 case ARMISD::SMLAL: return "ARMISD::SMLAL"; 1068 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; 1069 case ARMISD::FMAX: return "ARMISD::FMAX"; 1070 case ARMISD::FMIN: return "ARMISD::FMIN"; 1071 case ARMISD::BFI: return "ARMISD::BFI"; 1072 case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; 1073 case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; 1074 case ARMISD::VBSL: return "ARMISD::VBSL"; 1075 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; 1076 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; 1077 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; 1078 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD"; 1079 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD"; 1080 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD"; 1081 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD"; 1082 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; 1083 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; 1084 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; 1085 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; 1086 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; 1087 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; 1088 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD"; 1089 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD"; 1090 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD"; 1091 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD"; 1092 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; 1093 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; 1094 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; 1095 1096 case ARMISD::ATOMADD64_DAG: return "ATOMADD64_DAG"; 1097 case ARMISD::ATOMSUB64_DAG: return "ATOMSUB64_DAG"; 1098 case ARMISD::ATOMOR64_DAG: return "ATOMOR64_DAG"; 1099 case ARMISD::ATOMXOR64_DAG: return "ATOMXOR64_DAG"; 1100 case ARMISD::ATOMAND64_DAG: return "ATOMAND64_DAG"; 1101 case ARMISD::ATOMNAND64_DAG: return "ATOMNAND64_DAG"; 1102 case ARMISD::ATOMSWAP64_DAG: return "ATOMSWAP64_DAG"; 1103 case ARMISD::ATOMCMPXCHG64_DAG: return "ATOMCMPXCHG64_DAG"; 1104 case ARMISD::ATOMMIN64_DAG: return "ATOMMIN64_DAG"; 1105 case ARMISD::ATOMUMIN64_DAG: return "ATOMUMIN64_DAG"; 1106 case ARMISD::ATOMMAX64_DAG: return "ATOMMAX64_DAG"; 1107 case ARMISD::ATOMUMAX64_DAG: return "ATOMUMAX64_DAG"; 1108 } 1109 } 1110 1111 EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { 1112 if (!VT.isVector()) return getPointerTy(); 1113 return VT.changeVectorElementTypeToInteger(); 1114 } 1115 1116 /// getRegClassFor - Return the register class that should be used for the 1117 /// specified value type. 1118 const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const { 1119 // Map v4i64 to QQ registers but do not make the type legal. Similarly map 1120 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to 1121 // load / store 4 to 8 consecutive D registers. 1122 if (Subtarget->hasNEON()) { 1123 if (VT == MVT::v4i64) 1124 return &ARM::QQPRRegClass; 1125 if (VT == MVT::v8i64) 1126 return &ARM::QQQQPRRegClass; 1127 } 1128 return TargetLowering::getRegClassFor(VT); 1129 } 1130 1131 // Create a fast isel object. 1132 FastISel * 1133 ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, 1134 const TargetLibraryInfo *libInfo) const { 1135 return ARM::createFastISel(funcInfo, libInfo); 1136 } 1137 1138 /// getMaximalGlobalOffset - Returns the maximal possible offset which can 1139 /// be used for loads / stores from the global. 1140 unsigned ARMTargetLowering::getMaximalGlobalOffset() const { 1141 return (Subtarget->isThumb1Only() ? 127 : 4095); 1142 } 1143 1144 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { 1145 unsigned NumVals = N->getNumValues(); 1146 if (!NumVals) 1147 return Sched::RegPressure; 1148 1149 for (unsigned i = 0; i != NumVals; ++i) { 1150 EVT VT = N->getValueType(i); 1151 if (VT == MVT::Glue || VT == MVT::Other) 1152 continue; 1153 if (VT.isFloatingPoint() || VT.isVector()) 1154 return Sched::ILP; 1155 } 1156 1157 if (!N->isMachineOpcode()) 1158 return Sched::RegPressure; 1159 1160 // Load are scheduled for latency even if there instruction itinerary 1161 // is not available. 1162 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 1163 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1164 1165 if (MCID.getNumDefs() == 0) 1166 return Sched::RegPressure; 1167 if (!Itins->isEmpty() && 1168 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) 1169 return Sched::ILP; 1170 1171 return Sched::RegPressure; 1172 } 1173 1174 //===----------------------------------------------------------------------===// 1175 // Lowering Code 1176 //===----------------------------------------------------------------------===// 1177 1178 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 1179 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 1180 switch (CC) { 1181 default: llvm_unreachable("Unknown condition code!"); 1182 case ISD::SETNE: return ARMCC::NE; 1183 case ISD::SETEQ: return ARMCC::EQ; 1184 case ISD::SETGT: return ARMCC::GT; 1185 case ISD::SETGE: return ARMCC::GE; 1186 case ISD::SETLT: return ARMCC::LT; 1187 case ISD::SETLE: return ARMCC::LE; 1188 case ISD::SETUGT: return ARMCC::HI; 1189 case ISD::SETUGE: return ARMCC::HS; 1190 case ISD::SETULT: return ARMCC::LO; 1191 case ISD::SETULE: return ARMCC::LS; 1192 } 1193 } 1194 1195 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 1196 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 1197 ARMCC::CondCodes &CondCode2) { 1198 CondCode2 = ARMCC::AL; 1199 switch (CC) { 1200 default: llvm_unreachable("Unknown FP condition!"); 1201 case ISD::SETEQ: 1202 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 1203 case ISD::SETGT: 1204 case ISD::SETOGT: CondCode = ARMCC::GT; break; 1205 case ISD::SETGE: 1206 case ISD::SETOGE: CondCode = ARMCC::GE; break; 1207 case ISD::SETOLT: CondCode = ARMCC::MI; break; 1208 case ISD::SETOLE: CondCode = ARMCC::LS; break; 1209 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 1210 case ISD::SETO: CondCode = ARMCC::VC; break; 1211 case ISD::SETUO: CondCode = ARMCC::VS; break; 1212 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 1213 case ISD::SETUGT: CondCode = ARMCC::HI; break; 1214 case ISD::SETUGE: CondCode = ARMCC::PL; break; 1215 case ISD::SETLT: 1216 case ISD::SETULT: CondCode = ARMCC::LT; break; 1217 case ISD::SETLE: 1218 case ISD::SETULE: CondCode = ARMCC::LE; break; 1219 case ISD::SETNE: 1220 case ISD::SETUNE: CondCode = ARMCC::NE; break; 1221 } 1222 } 1223 1224 //===----------------------------------------------------------------------===// 1225 // Calling Convention Implementation 1226 //===----------------------------------------------------------------------===// 1227 1228 #include "ARMGenCallingConv.inc" 1229 1230 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the 1231 /// given CallingConvention value. 1232 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 1233 bool Return, 1234 bool isVarArg) const { 1235 switch (CC) { 1236 default: 1237 llvm_unreachable("Unsupported calling convention"); 1238 case CallingConv::Fast: 1239 if (Subtarget->hasVFP2() && !isVarArg) { 1240 if (!Subtarget->isAAPCS_ABI()) 1241 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); 1242 // For AAPCS ABI targets, just use VFP variant of the calling convention. 1243 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1244 } 1245 // Fallthrough 1246 case CallingConv::C: { 1247 // Use target triple & subtarget features to do actual dispatch. 1248 if (!Subtarget->isAAPCS_ABI()) 1249 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1250 else if (Subtarget->hasVFP2() && 1251 getTargetMachine().Options.FloatABIType == FloatABI::Hard && 1252 !isVarArg) 1253 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1254 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1255 } 1256 case CallingConv::ARM_AAPCS_VFP: 1257 if (!isVarArg) 1258 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1259 // Fallthrough 1260 case CallingConv::ARM_AAPCS: 1261 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1262 case CallingConv::ARM_APCS: 1263 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1264 case CallingConv::GHC: 1265 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); 1266 } 1267 } 1268 1269 /// LowerCallResult - Lower the result values of a call into the 1270 /// appropriate copies out of appropriate physical registers. 1271 SDValue 1272 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 1273 CallingConv::ID CallConv, bool isVarArg, 1274 const SmallVectorImpl<ISD::InputArg> &Ins, 1275 SDLoc dl, SelectionDAG &DAG, 1276 SmallVectorImpl<SDValue> &InVals, 1277 bool isThisReturn, SDValue ThisVal) const { 1278 1279 // Assign locations to each value returned by this call. 1280 SmallVector<CCValAssign, 16> RVLocs; 1281 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1282 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 1283 CCInfo.AnalyzeCallResult(Ins, 1284 CCAssignFnForNode(CallConv, /* Return*/ true, 1285 isVarArg)); 1286 1287 // Copy all of the result registers out of their specified physreg. 1288 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1289 CCValAssign VA = RVLocs[i]; 1290 1291 // Pass 'this' value directly from the argument to return value, to avoid 1292 // reg unit interference 1293 if (i == 0 && isThisReturn) { 1294 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 && 1295 "unexpected return calling convention register assignment"); 1296 InVals.push_back(ThisVal); 1297 continue; 1298 } 1299 1300 SDValue Val; 1301 if (VA.needsCustom()) { 1302 // Handle f64 or half of a v2f64. 1303 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1304 InFlag); 1305 Chain = Lo.getValue(1); 1306 InFlag = Lo.getValue(2); 1307 VA = RVLocs[++i]; // skip ahead to next loc 1308 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1309 InFlag); 1310 Chain = Hi.getValue(1); 1311 InFlag = Hi.getValue(2); 1312 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1313 1314 if (VA.getLocVT() == MVT::v2f64) { 1315 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 1316 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1317 DAG.getConstant(0, MVT::i32)); 1318 1319 VA = RVLocs[++i]; // skip ahead to next loc 1320 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1321 Chain = Lo.getValue(1); 1322 InFlag = Lo.getValue(2); 1323 VA = RVLocs[++i]; // skip ahead to next loc 1324 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1325 Chain = Hi.getValue(1); 1326 InFlag = Hi.getValue(2); 1327 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1328 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1329 DAG.getConstant(1, MVT::i32)); 1330 } 1331 } else { 1332 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 1333 InFlag); 1334 Chain = Val.getValue(1); 1335 InFlag = Val.getValue(2); 1336 } 1337 1338 switch (VA.getLocInfo()) { 1339 default: llvm_unreachable("Unknown loc info!"); 1340 case CCValAssign::Full: break; 1341 case CCValAssign::BCvt: 1342 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); 1343 break; 1344 } 1345 1346 InVals.push_back(Val); 1347 } 1348 1349 return Chain; 1350 } 1351 1352 /// LowerMemOpCallTo - Store the argument to the stack. 1353 SDValue 1354 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 1355 SDValue StackPtr, SDValue Arg, 1356 SDLoc dl, SelectionDAG &DAG, 1357 const CCValAssign &VA, 1358 ISD::ArgFlagsTy Flags) const { 1359 unsigned LocMemOffset = VA.getLocMemOffset(); 1360 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1361 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 1362 return DAG.getStore(Chain, dl, Arg, PtrOff, 1363 MachinePointerInfo::getStack(LocMemOffset), 1364 false, false, 0); 1365 } 1366 1367 void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, 1368 SDValue Chain, SDValue &Arg, 1369 RegsToPassVector &RegsToPass, 1370 CCValAssign &VA, CCValAssign &NextVA, 1371 SDValue &StackPtr, 1372 SmallVectorImpl<SDValue> &MemOpChains, 1373 ISD::ArgFlagsTy Flags) const { 1374 1375 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1376 DAG.getVTList(MVT::i32, MVT::i32), Arg); 1377 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); 1378 1379 if (NextVA.isRegLoc()) 1380 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); 1381 else { 1382 assert(NextVA.isMemLoc()); 1383 if (StackPtr.getNode() == 0) 1384 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1385 1386 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), 1387 dl, DAG, NextVA, 1388 Flags)); 1389 } 1390 } 1391 1392 /// LowerCall - Lowering a call into a callseq_start <- 1393 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 1394 /// nodes. 1395 SDValue 1396 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 1397 SmallVectorImpl<SDValue> &InVals) const { 1398 SelectionDAG &DAG = CLI.DAG; 1399 SDLoc &dl = CLI.DL; 1400 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1401 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1402 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1403 SDValue Chain = CLI.Chain; 1404 SDValue Callee = CLI.Callee; 1405 bool &isTailCall = CLI.IsTailCall; 1406 CallingConv::ID CallConv = CLI.CallConv; 1407 bool doesNotRet = CLI.DoesNotReturn; 1408 bool isVarArg = CLI.IsVarArg; 1409 1410 MachineFunction &MF = DAG.getMachineFunction(); 1411 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); 1412 bool isThisReturn = false; 1413 bool isSibCall = false; 1414 // Disable tail calls if they're not supported. 1415 if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) 1416 isTailCall = false; 1417 if (isTailCall) { 1418 // Check if it's really possible to do a tail call. 1419 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 1420 isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), 1421 Outs, OutVals, Ins, DAG); 1422 // We don't support GuaranteedTailCallOpt for ARM, only automatically 1423 // detected sibcalls. 1424 if (isTailCall) { 1425 ++NumTailCalls; 1426 isSibCall = true; 1427 } 1428 } 1429 1430 // Analyze operands of the call, assigning locations to each operand. 1431 SmallVector<CCValAssign, 16> ArgLocs; 1432 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1433 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 1434 CCInfo.AnalyzeCallOperands(Outs, 1435 CCAssignFnForNode(CallConv, /* Return*/ false, 1436 isVarArg)); 1437 1438 // Get a count of how many bytes are to be pushed on the stack. 1439 unsigned NumBytes = CCInfo.getNextStackOffset(); 1440 1441 // For tail calls, memory operands are available in our caller's stack. 1442 if (isSibCall) 1443 NumBytes = 0; 1444 1445 // Adjust the stack pointer for the new arguments... 1446 // These operations are automatically eliminated by the prolog/epilog pass 1447 if (!isSibCall) 1448 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 1449 dl); 1450 1451 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1452 1453 RegsToPassVector RegsToPass; 1454 SmallVector<SDValue, 8> MemOpChains; 1455 1456 // Walk the register/memloc assignments, inserting copies/loads. In the case 1457 // of tail call optimization, arguments are handled later. 1458 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1459 i != e; 1460 ++i, ++realArgIdx) { 1461 CCValAssign &VA = ArgLocs[i]; 1462 SDValue Arg = OutVals[realArgIdx]; 1463 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1464 bool isByVal = Flags.isByVal(); 1465 1466 // Promote the value if needed. 1467 switch (VA.getLocInfo()) { 1468 default: llvm_unreachable("Unknown loc info!"); 1469 case CCValAssign::Full: break; 1470 case CCValAssign::SExt: 1471 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 1472 break; 1473 case CCValAssign::ZExt: 1474 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 1475 break; 1476 case CCValAssign::AExt: 1477 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 1478 break; 1479 case CCValAssign::BCvt: 1480 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1481 break; 1482 } 1483 1484 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 1485 if (VA.needsCustom()) { 1486 if (VA.getLocVT() == MVT::v2f64) { 1487 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1488 DAG.getConstant(0, MVT::i32)); 1489 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1490 DAG.getConstant(1, MVT::i32)); 1491 1492 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 1493 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1494 1495 VA = ArgLocs[++i]; // skip ahead to next loc 1496 if (VA.isRegLoc()) { 1497 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 1498 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1499 } else { 1500 assert(VA.isMemLoc()); 1501 1502 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 1503 dl, DAG, VA, Flags)); 1504 } 1505 } else { 1506 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 1507 StackPtr, MemOpChains, Flags); 1508 } 1509 } else if (VA.isRegLoc()) { 1510 if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) { 1511 assert(VA.getLocVT() == MVT::i32 && 1512 "unexpected calling convention register assignment"); 1513 assert(!Ins.empty() && Ins[0].VT == MVT::i32 && 1514 "unexpected use of 'returned'"); 1515 isThisReturn = true; 1516 } 1517 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1518 } else if (isByVal) { 1519 assert(VA.isMemLoc()); 1520 unsigned offset = 0; 1521 1522 // True if this byval aggregate will be split between registers 1523 // and memory. 1524 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); 1525 unsigned CurByValIdx = CCInfo.getInRegsParamsProceed(); 1526 1527 if (CurByValIdx < ByValArgsCount) { 1528 1529 unsigned RegBegin, RegEnd; 1530 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); 1531 1532 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1533 unsigned int i, j; 1534 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { 1535 SDValue Const = DAG.getConstant(4*i, MVT::i32); 1536 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 1537 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 1538 MachinePointerInfo(), 1539 false, false, false, 0); 1540 MemOpChains.push_back(Load.getValue(1)); 1541 RegsToPass.push_back(std::make_pair(j, Load)); 1542 } 1543 1544 // If parameter size outsides register area, "offset" value 1545 // helps us to calculate stack slot for remained part properly. 1546 offset = RegEnd - RegBegin; 1547 1548 CCInfo.nextInRegsParam(); 1549 } 1550 1551 if (Flags.getByValSize() > 4*offset) { 1552 unsigned LocMemOffset = VA.getLocMemOffset(); 1553 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); 1554 SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, 1555 StkPtrOff); 1556 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset); 1557 SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); 1558 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, 1559 MVT::i32); 1560 SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32); 1561 1562 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 1563 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; 1564 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, 1565 Ops, array_lengthof(Ops))); 1566 } 1567 } else if (!isSibCall) { 1568 assert(VA.isMemLoc()); 1569 1570 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 1571 dl, DAG, VA, Flags)); 1572 } 1573 } 1574 1575 if (!MemOpChains.empty()) 1576 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1577 &MemOpChains[0], MemOpChains.size()); 1578 1579 // Build a sequence of copy-to-reg nodes chained together with token chain 1580 // and flag operands which copy the outgoing args into the appropriate regs. 1581 SDValue InFlag; 1582 // Tail call byval lowering might overwrite argument registers so in case of 1583 // tail call optimization the copies to registers are lowered later. 1584 if (!isTailCall) 1585 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1586 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1587 RegsToPass[i].second, InFlag); 1588 InFlag = Chain.getValue(1); 1589 } 1590 1591 // For tail calls lower the arguments to the 'real' stack slot. 1592 if (isTailCall) { 1593 // Force all the incoming stack arguments to be loaded from the stack 1594 // before any new outgoing arguments are stored to the stack, because the 1595 // outgoing stack slots may alias the incoming argument stack slots, and 1596 // the alias isn't otherwise explicit. This is slightly more conservative 1597 // than necessary, because it means that each store effectively depends 1598 // on every argument instead of just those arguments it would clobber. 1599 1600 // Do not flag preceding copytoreg stuff together with the following stuff. 1601 InFlag = SDValue(); 1602 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1603 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1604 RegsToPass[i].second, InFlag); 1605 InFlag = Chain.getValue(1); 1606 } 1607 InFlag = SDValue(); 1608 } 1609 1610 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1611 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1612 // node so that legalize doesn't hack it. 1613 bool isDirect = false; 1614 bool isARMFunc = false; 1615 bool isLocalARMFunc = false; 1616 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1617 1618 if (EnableARMLongCalls) { 1619 assert (getTargetMachine().getRelocationModel() == Reloc::Static 1620 && "long-calls with non-static relocation model!"); 1621 // Handle a global address or an external symbol. If it's not one of 1622 // those, the target's already in a register, so we don't need to do 1623 // anything extra. 1624 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1625 const GlobalValue *GV = G->getGlobal(); 1626 // Create a constant pool entry for the callee address 1627 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1628 ARMConstantPoolValue *CPV = 1629 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); 1630 1631 // Get the address of the callee into a register 1632 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1633 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1634 Callee = DAG.getLoad(getPointerTy(), dl, 1635 DAG.getEntryNode(), CPAddr, 1636 MachinePointerInfo::getConstantPool(), 1637 false, false, false, 0); 1638 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { 1639 const char *Sym = S->getSymbol(); 1640 1641 // Create a constant pool entry for the callee address 1642 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1643 ARMConstantPoolValue *CPV = 1644 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 1645 ARMPCLabelIndex, 0); 1646 // Get the address of the callee into a register 1647 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1648 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1649 Callee = DAG.getLoad(getPointerTy(), dl, 1650 DAG.getEntryNode(), CPAddr, 1651 MachinePointerInfo::getConstantPool(), 1652 false, false, false, 0); 1653 } 1654 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1655 const GlobalValue *GV = G->getGlobal(); 1656 isDirect = true; 1657 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 1658 bool isStub = (isExt && Subtarget->isTargetDarwin()) && 1659 getTargetMachine().getRelocationModel() != Reloc::Static; 1660 isARMFunc = !Subtarget->isThumb() || isStub; 1661 // ARM call to a local ARM function is predicable. 1662 isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); 1663 // tBX takes a register source operand. 1664 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1665 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1666 ARMConstantPoolValue *CPV = 1667 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4); 1668 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1669 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1670 Callee = DAG.getLoad(getPointerTy(), dl, 1671 DAG.getEntryNode(), CPAddr, 1672 MachinePointerInfo::getConstantPool(), 1673 false, false, false, 0); 1674 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1675 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1676 getPointerTy(), Callee, PICLabel); 1677 } else { 1678 // On ELF targets for PIC code, direct calls should go through the PLT 1679 unsigned OpFlags = 0; 1680 if (Subtarget->isTargetELF() && 1681 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1682 OpFlags = ARMII::MO_PLT; 1683 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); 1684 } 1685 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1686 isDirect = true; 1687 bool isStub = Subtarget->isTargetDarwin() && 1688 getTargetMachine().getRelocationModel() != Reloc::Static; 1689 isARMFunc = !Subtarget->isThumb() || isStub; 1690 // tBX takes a register source operand. 1691 const char *Sym = S->getSymbol(); 1692 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1693 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1694 ARMConstantPoolValue *CPV = 1695 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 1696 ARMPCLabelIndex, 4); 1697 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1698 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1699 Callee = DAG.getLoad(getPointerTy(), dl, 1700 DAG.getEntryNode(), CPAddr, 1701 MachinePointerInfo::getConstantPool(), 1702 false, false, false, 0); 1703 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1704 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1705 getPointerTy(), Callee, PICLabel); 1706 } else { 1707 unsigned OpFlags = 0; 1708 // On ELF targets for PIC code, direct calls should go through the PLT 1709 if (Subtarget->isTargetELF() && 1710 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1711 OpFlags = ARMII::MO_PLT; 1712 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags); 1713 } 1714 } 1715 1716 // FIXME: handle tail calls differently. 1717 unsigned CallOpc; 1718 bool HasMinSizeAttr = MF.getFunction()->getAttributes(). 1719 hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); 1720 if (Subtarget->isThumb()) { 1721 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 1722 CallOpc = ARMISD::CALL_NOLINK; 1723 else 1724 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 1725 } else { 1726 if (!isDirect && !Subtarget->hasV5TOps()) 1727 CallOpc = ARMISD::CALL_NOLINK; 1728 else if (doesNotRet && isDirect && Subtarget->hasRAS() && 1729 // Emit regular call when code size is the priority 1730 !HasMinSizeAttr) 1731 // "mov lr, pc; b _foo" to avoid confusing the RSP 1732 CallOpc = ARMISD::CALL_NOLINK; 1733 else 1734 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; 1735 } 1736 1737 std::vector<SDValue> Ops; 1738 Ops.push_back(Chain); 1739 Ops.push_back(Callee); 1740 1741 // Add argument registers to the end of the list so that they are known live 1742 // into the call. 1743 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1744 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1745 RegsToPass[i].second.getValueType())); 1746 1747 // Add a register mask operand representing the call-preserved registers. 1748 const uint32_t *Mask; 1749 const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 1750 const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI); 1751 if (isThisReturn) { 1752 // For 'this' returns, use the R0-preserving mask if applicable 1753 Mask = ARI->getThisReturnPreservedMask(CallConv); 1754 if (!Mask) { 1755 // Set isThisReturn to false if the calling convention is not one that 1756 // allows 'returned' to be modeled in this way, so LowerCallResult does 1757 // not try to pass 'this' straight through 1758 isThisReturn = false; 1759 Mask = ARI->getCallPreservedMask(CallConv); 1760 } 1761 } else 1762 Mask = ARI->getCallPreservedMask(CallConv); 1763 1764 assert(Mask && "Missing call preserved mask for calling convention"); 1765 Ops.push_back(DAG.getRegisterMask(Mask)); 1766 1767 if (InFlag.getNode()) 1768 Ops.push_back(InFlag); 1769 1770 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1771 if (isTailCall) 1772 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); 1773 1774 // Returns a chain and a flag for retval copy to use. 1775 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 1776 InFlag = Chain.getValue(1); 1777 1778 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 1779 DAG.getIntPtrConstant(0, true), InFlag, dl); 1780 if (!Ins.empty()) 1781 InFlag = Chain.getValue(1); 1782 1783 // Handle result values, copying them out of physregs into vregs that we 1784 // return. 1785 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, 1786 InVals, isThisReturn, 1787 isThisReturn ? OutVals[0] : SDValue()); 1788 } 1789 1790 /// HandleByVal - Every parameter *after* a byval parameter is passed 1791 /// on the stack. Remember the next parameter register to allocate, 1792 /// and then confiscate the rest of the parameter registers to insure 1793 /// this. 1794 void 1795 ARMTargetLowering::HandleByVal( 1796 CCState *State, unsigned &size, unsigned Align) const { 1797 unsigned reg = State->AllocateReg(GPRArgRegs, 4); 1798 assert((State->getCallOrPrologue() == Prologue || 1799 State->getCallOrPrologue() == Call) && 1800 "unhandled ParmContext"); 1801 1802 // For in-prologue parameters handling, we also introduce stack offset 1803 // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal. 1804 // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how 1805 // NSAA should be evaluted (NSAA means "next stacked argument address"). 1806 // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs. 1807 // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs. 1808 unsigned NSAAOffset = State->getNextStackOffset(); 1809 if (State->getCallOrPrologue() != Call) { 1810 for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) { 1811 unsigned RB, RE; 1812 State->getInRegsParamInfo(i, RB, RE); 1813 assert(NSAAOffset >= (RE-RB)*4 && 1814 "Stack offset for byval regs doesn't introduced anymore?"); 1815 NSAAOffset -= (RE-RB)*4; 1816 } 1817 } 1818 if ((ARM::R0 <= reg) && (reg <= ARM::R3)) { 1819 if (Subtarget->isAAPCS_ABI() && Align > 4) { 1820 unsigned AlignInRegs = Align / 4; 1821 unsigned Waste = (ARM::R4 - reg) % AlignInRegs; 1822 for (unsigned i = 0; i < Waste; ++i) 1823 reg = State->AllocateReg(GPRArgRegs, 4); 1824 } 1825 if (reg != 0) { 1826 unsigned excess = 4 * (ARM::R4 - reg); 1827 1828 // Special case when NSAA != SP and parameter size greater than size of 1829 // all remained GPR regs. In that case we can't split parameter, we must 1830 // send it to stack. We also must set NCRN to R4, so waste all 1831 // remained registers. 1832 if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) { 1833 while (State->AllocateReg(GPRArgRegs, 4)) 1834 ; 1835 return; 1836 } 1837 1838 // First register for byval parameter is the first register that wasn't 1839 // allocated before this method call, so it would be "reg". 1840 // If parameter is small enough to be saved in range [reg, r4), then 1841 // the end (first after last) register would be reg + param-size-in-regs, 1842 // else parameter would be splitted between registers and stack, 1843 // end register would be r4 in this case. 1844 unsigned ByValRegBegin = reg; 1845 unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4; 1846 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); 1847 // Note, first register is allocated in the beginning of function already, 1848 // allocate remained amount of registers we need. 1849 for (unsigned i = reg+1; i != ByValRegEnd; ++i) 1850 State->AllocateReg(GPRArgRegs, 4); 1851 // At a call site, a byval parameter that is split between 1852 // registers and memory needs its size truncated here. In a 1853 // function prologue, such byval parameters are reassembled in 1854 // memory, and are not truncated. 1855 if (State->getCallOrPrologue() == Call) { 1856 // Make remained size equal to 0 in case, when 1857 // the whole structure may be stored into registers. 1858 if (size < excess) 1859 size = 0; 1860 else 1861 size -= excess; 1862 } 1863 } 1864 } 1865 } 1866 1867 /// MatchingStackOffset - Return true if the given stack call argument is 1868 /// already available in the same position (relatively) of the caller's 1869 /// incoming argument stack. 1870 static 1871 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 1872 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 1873 const TargetInstrInfo *TII) { 1874 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 1875 int FI = INT_MAX; 1876 if (Arg.getOpcode() == ISD::CopyFromReg) { 1877 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 1878 if (!TargetRegisterInfo::isVirtualRegister(VR)) 1879 return false; 1880 MachineInstr *Def = MRI->getVRegDef(VR); 1881 if (!Def) 1882 return false; 1883 if (!Flags.isByVal()) { 1884 if (!TII->isLoadFromStackSlot(Def, FI)) 1885 return false; 1886 } else { 1887 return false; 1888 } 1889 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 1890 if (Flags.isByVal()) 1891 // ByVal argument is passed in as a pointer but it's now being 1892 // dereferenced. e.g. 1893 // define @foo(%struct.X* %A) { 1894 // tail call @bar(%struct.X* byval %A) 1895 // } 1896 return false; 1897 SDValue Ptr = Ld->getBasePtr(); 1898 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 1899 if (!FINode) 1900 return false; 1901 FI = FINode->getIndex(); 1902 } else 1903 return false; 1904 1905 assert(FI != INT_MAX); 1906 if (!MFI->isFixedObjectIndex(FI)) 1907 return false; 1908 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 1909 } 1910 1911 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 1912 /// for tail call optimization. Targets which want to do tail call 1913 /// optimization should implement this function. 1914 bool 1915 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 1916 CallingConv::ID CalleeCC, 1917 bool isVarArg, 1918 bool isCalleeStructRet, 1919 bool isCallerStructRet, 1920 const SmallVectorImpl<ISD::OutputArg> &Outs, 1921 const SmallVectorImpl<SDValue> &OutVals, 1922 const SmallVectorImpl<ISD::InputArg> &Ins, 1923 SelectionDAG& DAG) const { 1924 const Function *CallerF = DAG.getMachineFunction().getFunction(); 1925 CallingConv::ID CallerCC = CallerF->getCallingConv(); 1926 bool CCMatch = CallerCC == CalleeCC; 1927 1928 // Look for obvious safe cases to perform tail call optimization that do not 1929 // require ABI changes. This is what gcc calls sibcall. 1930 1931 // Do not sibcall optimize vararg calls unless the call site is not passing 1932 // any arguments. 1933 if (isVarArg && !Outs.empty()) 1934 return false; 1935 1936 // Also avoid sibcall optimization if either caller or callee uses struct 1937 // return semantics. 1938 if (isCalleeStructRet || isCallerStructRet) 1939 return false; 1940 1941 // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: 1942 // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as 1943 // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation 1944 // support in the assembler and linker to be used. This would need to be 1945 // fixed to fully support tail calls in Thumb1. 1946 // 1947 // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take 1948 // LR. This means if we need to reload LR, it takes an extra instructions, 1949 // which outweighs the value of the tail call; but here we don't know yet 1950 // whether LR is going to be used. Probably the right approach is to 1951 // generate the tail call here and turn it back into CALL/RET in 1952 // emitEpilogue if LR is used. 1953 1954 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 1955 // but we need to make sure there are enough registers; the only valid 1956 // registers are the 4 used for parameters. We don't currently do this 1957 // case. 1958 if (Subtarget->isThumb1Only()) 1959 return false; 1960 1961 // If the calling conventions do not match, then we'd better make sure the 1962 // results are returned in the same way as what the caller expects. 1963 if (!CCMatch) { 1964 SmallVector<CCValAssign, 16> RVLocs1; 1965 ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), 1966 getTargetMachine(), RVLocs1, *DAG.getContext(), Call); 1967 CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); 1968 1969 SmallVector<CCValAssign, 16> RVLocs2; 1970 ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), 1971 getTargetMachine(), RVLocs2, *DAG.getContext(), Call); 1972 CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); 1973 1974 if (RVLocs1.size() != RVLocs2.size()) 1975 return false; 1976 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { 1977 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) 1978 return false; 1979 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) 1980 return false; 1981 if (RVLocs1[i].isRegLoc()) { 1982 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) 1983 return false; 1984 } else { 1985 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) 1986 return false; 1987 } 1988 } 1989 } 1990 1991 // If Caller's vararg or byval argument has been split between registers and 1992 // stack, do not perform tail call, since part of the argument is in caller's 1993 // local frame. 1994 const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction(). 1995 getInfo<ARMFunctionInfo>(); 1996 if (AFI_Caller->getArgRegsSaveSize()) 1997 return false; 1998 1999 // If the callee takes no arguments then go on to check the results of the 2000 // call. 2001 if (!Outs.empty()) { 2002 // Check if stack adjustment is needed. For now, do not do this if any 2003 // argument is passed on the stack. 2004 SmallVector<CCValAssign, 16> ArgLocs; 2005 ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), 2006 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 2007 CCInfo.AnalyzeCallOperands(Outs, 2008 CCAssignFnForNode(CalleeCC, false, isVarArg)); 2009 if (CCInfo.getNextStackOffset()) { 2010 MachineFunction &MF = DAG.getMachineFunction(); 2011 2012 // Check if the arguments are already laid out in the right way as 2013 // the caller's fixed stack objects. 2014 MachineFrameInfo *MFI = MF.getFrameInfo(); 2015 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 2016 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2017 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 2018 i != e; 2019 ++i, ++realArgIdx) { 2020 CCValAssign &VA = ArgLocs[i]; 2021 EVT RegVT = VA.getLocVT(); 2022 SDValue Arg = OutVals[realArgIdx]; 2023 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 2024 if (VA.getLocInfo() == CCValAssign::Indirect) 2025 return false; 2026 if (VA.needsCustom()) { 2027 // f64 and vector types are split into multiple registers or 2028 // register/stack-slot combinations. The types will not match 2029 // the registers; give up on memory f64 refs until we figure 2030 // out what to do about this. 2031 if (!VA.isRegLoc()) 2032 return false; 2033 if (!ArgLocs[++i].isRegLoc()) 2034 return false; 2035 if (RegVT == MVT::v2f64) { 2036 if (!ArgLocs[++i].isRegLoc()) 2037 return false; 2038 if (!ArgLocs[++i].isRegLoc()) 2039 return false; 2040 } 2041 } else if (!VA.isRegLoc()) { 2042 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 2043 MFI, MRI, TII)) 2044 return false; 2045 } 2046 } 2047 } 2048 } 2049 2050 return true; 2051 } 2052 2053 bool 2054 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 2055 MachineFunction &MF, bool isVarArg, 2056 const SmallVectorImpl<ISD::OutputArg> &Outs, 2057 LLVMContext &Context) const { 2058 SmallVector<CCValAssign, 16> RVLocs; 2059 CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context); 2060 return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true, 2061 isVarArg)); 2062 } 2063 2064 SDValue 2065 ARMTargetLowering::LowerReturn(SDValue Chain, 2066 CallingConv::ID CallConv, bool isVarArg, 2067 const SmallVectorImpl<ISD::OutputArg> &Outs, 2068 const SmallVectorImpl<SDValue> &OutVals, 2069 SDLoc dl, SelectionDAG &DAG) const { 2070 2071 // CCValAssign - represent the assignment of the return value to a location. 2072 SmallVector<CCValAssign, 16> RVLocs; 2073 2074 // CCState - Info about the registers and stack slots. 2075 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2076 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 2077 2078 // Analyze outgoing return values. 2079 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 2080 isVarArg)); 2081 2082 SDValue Flag; 2083 SmallVector<SDValue, 4> RetOps; 2084 RetOps.push_back(Chain); // Operand #0 = Chain (updated below) 2085 2086 // Copy the result values into the output registers. 2087 for (unsigned i = 0, realRVLocIdx = 0; 2088 i != RVLocs.size(); 2089 ++i, ++realRVLocIdx) { 2090 CCValAssign &VA = RVLocs[i]; 2091 assert(VA.isRegLoc() && "Can only return in registers!"); 2092 2093 SDValue Arg = OutVals[realRVLocIdx]; 2094 2095 switch (VA.getLocInfo()) { 2096 default: llvm_unreachable("Unknown loc info!"); 2097 case CCValAssign::Full: break; 2098 case CCValAssign::BCvt: 2099 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 2100 break; 2101 } 2102 2103 if (VA.needsCustom()) { 2104 if (VA.getLocVT() == MVT::v2f64) { 2105 // Extract the first half and return it in two registers. 2106 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 2107 DAG.getConstant(0, MVT::i32)); 2108 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 2109 DAG.getVTList(MVT::i32, MVT::i32), Half); 2110 2111 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); 2112 Flag = Chain.getValue(1); 2113 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2114 VA = RVLocs[++i]; // skip ahead to next loc 2115 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 2116 HalfGPRs.getValue(1), Flag); 2117 Flag = Chain.getValue(1); 2118 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2119 VA = RVLocs[++i]; // skip ahead to next loc 2120 2121 // Extract the 2nd half and fall through to handle it as an f64 value. 2122 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 2123 DAG.getConstant(1, MVT::i32)); 2124 } 2125 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 2126 // available. 2127 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 2128 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); 2129 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); 2130 Flag = Chain.getValue(1); 2131 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2132 VA = RVLocs[++i]; // skip ahead to next loc 2133 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), 2134 Flag); 2135 } else 2136 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 2137 2138 // Guarantee that all emitted copies are 2139 // stuck together, avoiding something bad. 2140 Flag = Chain.getValue(1); 2141 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2142 } 2143 2144 // Update chain and glue. 2145 RetOps[0] = Chain; 2146 if (Flag.getNode()) 2147 RetOps.push_back(Flag); 2148 2149 return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, 2150 RetOps.data(), RetOps.size()); 2151 } 2152 2153 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { 2154 if (N->getNumValues() != 1) 2155 return false; 2156 if (!N->hasNUsesOfValue(1, 0)) 2157 return false; 2158 2159 SDValue TCChain = Chain; 2160 SDNode *Copy = *N->use_begin(); 2161 if (Copy->getOpcode() == ISD::CopyToReg) { 2162 // If the copy has a glue operand, we conservatively assume it isn't safe to 2163 // perform a tail call. 2164 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) 2165 return false; 2166 TCChain = Copy->getOperand(0); 2167 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) { 2168 SDNode *VMov = Copy; 2169 // f64 returned in a pair of GPRs. 2170 SmallPtrSet<SDNode*, 2> Copies; 2171 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); 2172 UI != UE; ++UI) { 2173 if (UI->getOpcode() != ISD::CopyToReg) 2174 return false; 2175 Copies.insert(*UI); 2176 } 2177 if (Copies.size() > 2) 2178 return false; 2179 2180 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); 2181 UI != UE; ++UI) { 2182 SDValue UseChain = UI->getOperand(0); 2183 if (Copies.count(UseChain.getNode())) 2184 // Second CopyToReg 2185 Copy = *UI; 2186 else 2187 // First CopyToReg 2188 TCChain = UseChain; 2189 } 2190 } else if (Copy->getOpcode() == ISD::BITCAST) { 2191 // f32 returned in a single GPR. 2192 if (!Copy->hasOneUse()) 2193 return false; 2194 Copy = *Copy->use_begin(); 2195 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) 2196 return false; 2197 TCChain = Copy->getOperand(0); 2198 } else { 2199 return false; 2200 } 2201 2202 bool HasRet = false; 2203 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); 2204 UI != UE; ++UI) { 2205 if (UI->getOpcode() != ARMISD::RET_FLAG) 2206 return false; 2207 HasRet = true; 2208 } 2209 2210 if (!HasRet) 2211 return false; 2212 2213 Chain = TCChain; 2214 return true; 2215 } 2216 2217 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { 2218 if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) 2219 return false; 2220 2221 if (!CI->isTailCall()) 2222 return false; 2223 2224 return !Subtarget->isThumb1Only(); 2225 } 2226 2227 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 2228 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 2229 // one of the above mentioned nodes. It has to be wrapped because otherwise 2230 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2231 // be used to form addressing mode. These wrapped nodes will be selected 2232 // into MOVi. 2233 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 2234 EVT PtrVT = Op.getValueType(); 2235 // FIXME there is no actual debug info here 2236 SDLoc dl(Op); 2237 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2238 SDValue Res; 2239 if (CP->isMachineConstantPoolEntry()) 2240 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 2241 CP->getAlignment()); 2242 else 2243 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 2244 CP->getAlignment()); 2245 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 2246 } 2247 2248 unsigned ARMTargetLowering::getJumpTableEncoding() const { 2249 return MachineJumpTableInfo::EK_Inline; 2250 } 2251 2252 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, 2253 SelectionDAG &DAG) const { 2254 MachineFunction &MF = DAG.getMachineFunction(); 2255 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2256 unsigned ARMPCLabelIndex = 0; 2257 SDLoc DL(Op); 2258 EVT PtrVT = getPointerTy(); 2259 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 2260 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2261 SDValue CPAddr; 2262 if (RelocM == Reloc::Static) { 2263 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 2264 } else { 2265 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2266 ARMPCLabelIndex = AFI->createPICLabelUId(); 2267 ARMConstantPoolValue *CPV = 2268 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, 2269 ARMCP::CPBlockAddress, PCAdj); 2270 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2271 } 2272 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 2273 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 2274 MachinePointerInfo::getConstantPool(), 2275 false, false, false, 0); 2276 if (RelocM == Reloc::Static) 2277 return Result; 2278 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2279 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 2280 } 2281 2282 // Lower ISD::GlobalTLSAddress using the "general dynamic" model 2283 SDValue 2284 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 2285 SelectionDAG &DAG) const { 2286 SDLoc dl(GA); 2287 EVT PtrVT = getPointerTy(); 2288 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 2289 MachineFunction &MF = DAG.getMachineFunction(); 2290 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2291 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2292 ARMConstantPoolValue *CPV = 2293 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 2294 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); 2295 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2296 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 2297 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 2298 MachinePointerInfo::getConstantPool(), 2299 false, false, false, 0); 2300 SDValue Chain = Argument.getValue(1); 2301 2302 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2303 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 2304 2305 // call __tls_get_addr. 2306 ArgListTy Args; 2307 ArgListEntry Entry; 2308 Entry.Node = Argument; 2309 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); 2310 Args.push_back(Entry); 2311 // FIXME: is there useful debug info available here? 2312 TargetLowering::CallLoweringInfo CLI(Chain, 2313 (Type *) Type::getInt32Ty(*DAG.getContext()), 2314 false, false, false, false, 2315 0, CallingConv::C, /*isTailCall=*/false, 2316 /*doesNotRet=*/false, /*isReturnValueUsed=*/true, 2317 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); 2318 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 2319 return CallResult.first; 2320 } 2321 2322 // Lower ISD::GlobalTLSAddress using the "initial exec" or 2323 // "local exec" model. 2324 SDValue 2325 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 2326 SelectionDAG &DAG, 2327 TLSModel::Model model) const { 2328 const GlobalValue *GV = GA->getGlobal(); 2329 SDLoc dl(GA); 2330 SDValue Offset; 2331 SDValue Chain = DAG.getEntryNode(); 2332 EVT PtrVT = getPointerTy(); 2333 // Get the Thread Pointer 2334 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2335 2336 if (model == TLSModel::InitialExec) { 2337 MachineFunction &MF = DAG.getMachineFunction(); 2338 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2339 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2340 // Initial exec model. 2341 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 2342 ARMConstantPoolValue *CPV = 2343 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 2344 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, 2345 true); 2346 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2347 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2348 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2349 MachinePointerInfo::getConstantPool(), 2350 false, false, false, 0); 2351 Chain = Offset.getValue(1); 2352 2353 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2354 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 2355 2356 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2357 MachinePointerInfo::getConstantPool(), 2358 false, false, false, 0); 2359 } else { 2360 // local exec model 2361 assert(model == TLSModel::LocalExec); 2362 ARMConstantPoolValue *CPV = 2363 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); 2364 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2365 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2366 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2367 MachinePointerInfo::getConstantPool(), 2368 false, false, false, 0); 2369 } 2370 2371 // The address of the thread local variable is the add of the thread 2372 // pointer with the offset of the variable. 2373 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 2374 } 2375 2376 SDValue 2377 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { 2378 // TODO: implement the "local dynamic" model 2379 assert(Subtarget->isTargetELF() && 2380 "TLS not implemented for non-ELF targets"); 2381 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 2382 2383 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); 2384 2385 switch (model) { 2386 case TLSModel::GeneralDynamic: 2387 case TLSModel::LocalDynamic: 2388 return LowerToTLSGeneralDynamicModel(GA, DAG); 2389 case TLSModel::InitialExec: 2390 case TLSModel::LocalExec: 2391 return LowerToTLSExecModels(GA, DAG, model); 2392 } 2393 llvm_unreachable("bogus TLS model"); 2394 } 2395 2396 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 2397 SelectionDAG &DAG) const { 2398 EVT PtrVT = getPointerTy(); 2399 SDLoc dl(Op); 2400 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2401 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 2402 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 2403 ARMConstantPoolValue *CPV = 2404 ARMConstantPoolConstant::Create(GV, 2405 UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); 2406 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2407 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2408 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 2409 CPAddr, 2410 MachinePointerInfo::getConstantPool(), 2411 false, false, false, 0); 2412 SDValue Chain = Result.getValue(1); 2413 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 2414 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 2415 if (!UseGOTOFF) 2416 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 2417 MachinePointerInfo::getGOT(), 2418 false, false, false, 0); 2419 return Result; 2420 } 2421 2422 // If we have T2 ops, we can materialize the address directly via movt/movw 2423 // pair. This is always cheaper. 2424 if (Subtarget->useMovt()) { 2425 ++NumMovwMovt; 2426 // FIXME: Once remat is capable of dealing with instructions with register 2427 // operands, expand this into two nodes. 2428 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2429 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2430 } else { 2431 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2432 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2433 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2434 MachinePointerInfo::getConstantPool(), 2435 false, false, false, 0); 2436 } 2437 } 2438 2439 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 2440 SelectionDAG &DAG) const { 2441 EVT PtrVT = getPointerTy(); 2442 SDLoc dl(Op); 2443 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2444 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2445 2446 // FIXME: Enable this for static codegen when tool issues are fixed. Also 2447 // update ARMFastISel::ARMMaterializeGV. 2448 if (Subtarget->useMovt() && RelocM != Reloc::Static) { 2449 ++NumMovwMovt; 2450 // FIXME: Once remat is capable of dealing with instructions with register 2451 // operands, expand this into two nodes. 2452 if (RelocM == Reloc::Static) 2453 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2454 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2455 2456 unsigned Wrapper = (RelocM == Reloc::PIC_) 2457 ? ARMISD::WrapperPIC : ARMISD::WrapperDYN; 2458 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, 2459 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2460 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2461 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, 2462 MachinePointerInfo::getGOT(), 2463 false, false, false, 0); 2464 return Result; 2465 } 2466 2467 unsigned ARMPCLabelIndex = 0; 2468 SDValue CPAddr; 2469 if (RelocM == Reloc::Static) { 2470 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2471 } else { 2472 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 2473 ARMPCLabelIndex = AFI->createPICLabelUId(); 2474 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); 2475 ARMConstantPoolValue *CPV = 2476 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 2477 PCAdj); 2478 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2479 } 2480 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2481 2482 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2483 MachinePointerInfo::getConstantPool(), 2484 false, false, false, 0); 2485 SDValue Chain = Result.getValue(1); 2486 2487 if (RelocM == Reloc::PIC_) { 2488 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2489 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2490 } 2491 2492 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2493 Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), 2494 false, false, false, 0); 2495 2496 return Result; 2497 } 2498 2499 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 2500 SelectionDAG &DAG) const { 2501 assert(Subtarget->isTargetELF() && 2502 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 2503 MachineFunction &MF = DAG.getMachineFunction(); 2504 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2505 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2506 EVT PtrVT = getPointerTy(); 2507 SDLoc dl(Op); 2508 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2509 ARMConstantPoolValue *CPV = 2510 ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", 2511 ARMPCLabelIndex, PCAdj); 2512 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2513 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2514 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2515 MachinePointerInfo::getConstantPool(), 2516 false, false, false, 0); 2517 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2518 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2519 } 2520 2521 SDValue 2522 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { 2523 SDLoc dl(Op); 2524 SDValue Val = DAG.getConstant(0, MVT::i32); 2525 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, 2526 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), 2527 Op.getOperand(1), Val); 2528 } 2529 2530 SDValue 2531 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { 2532 SDLoc dl(Op); 2533 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), 2534 Op.getOperand(1), DAG.getConstant(0, MVT::i32)); 2535 } 2536 2537 SDValue 2538 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 2539 const ARMSubtarget *Subtarget) const { 2540 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2541 SDLoc dl(Op); 2542 switch (IntNo) { 2543 default: return SDValue(); // Don't custom lower most intrinsics. 2544 case Intrinsic::arm_thread_pointer: { 2545 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2546 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2547 } 2548 case Intrinsic::eh_sjlj_lsda: { 2549 MachineFunction &MF = DAG.getMachineFunction(); 2550 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2551 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2552 EVT PtrVT = getPointerTy(); 2553 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2554 SDValue CPAddr; 2555 unsigned PCAdj = (RelocM != Reloc::PIC_) 2556 ? 0 : (Subtarget->isThumb() ? 4 : 8); 2557 ARMConstantPoolValue *CPV = 2558 ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, 2559 ARMCP::CPLSDA, PCAdj); 2560 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2561 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2562 SDValue Result = 2563 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2564 MachinePointerInfo::getConstantPool(), 2565 false, false, false, 0); 2566 2567 if (RelocM == Reloc::PIC_) { 2568 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2569 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2570 } 2571 return Result; 2572 } 2573 case Intrinsic::arm_neon_vmulls: 2574 case Intrinsic::arm_neon_vmullu: { 2575 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) 2576 ? ARMISD::VMULLs : ARMISD::VMULLu; 2577 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), 2578 Op.getOperand(1), Op.getOperand(2)); 2579 } 2580 } 2581 } 2582 2583 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, 2584 const ARMSubtarget *Subtarget) { 2585 // FIXME: handle "fence singlethread" more efficiently. 2586 SDLoc dl(Op); 2587 if (!Subtarget->hasDataBarrier()) { 2588 // Some ARMv6 cpus can support data barriers with an mcr instruction. 2589 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 2590 // here. 2591 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && 2592 "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); 2593 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), 2594 DAG.getConstant(0, MVT::i32)); 2595 } 2596 2597 ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1)); 2598 AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue()); 2599 unsigned Domain = ARM_MB::ISH; 2600 if (Subtarget->isSwift() && Ord == Release) { 2601 // Swift happens to implement ISHST barriers in a way that's compatible with 2602 // Release semantics but weaker than ISH so we'd be fools not to use 2603 // it. Beware: other processors probably don't! 2604 Domain = ARM_MB::ISHST; 2605 } 2606 2607 return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), 2608 DAG.getConstant(Domain, MVT::i32)); 2609 } 2610 2611 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, 2612 const ARMSubtarget *Subtarget) { 2613 // ARM pre v5TE and Thumb1 does not have preload instructions. 2614 if (!(Subtarget->isThumb2() || 2615 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) 2616 // Just preserve the chain. 2617 return Op.getOperand(0); 2618 2619 SDLoc dl(Op); 2620 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; 2621 if (!isRead && 2622 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) 2623 // ARMv7 with MP extension has PLDW. 2624 return Op.getOperand(0); 2625 2626 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); 2627 if (Subtarget->isThumb()) { 2628 // Invert the bits. 2629 isRead = ~isRead & 1; 2630 isData = ~isData & 1; 2631 } 2632 2633 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), 2634 Op.getOperand(1), DAG.getConstant(isRead, MVT::i32), 2635 DAG.getConstant(isData, MVT::i32)); 2636 } 2637 2638 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { 2639 MachineFunction &MF = DAG.getMachineFunction(); 2640 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); 2641 2642 // vastart just stores the address of the VarArgsFrameIndex slot into the 2643 // memory location argument. 2644 SDLoc dl(Op); 2645 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2646 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2647 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2648 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2649 MachinePointerInfo(SV), false, false, 0); 2650 } 2651 2652 SDValue 2653 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 2654 SDValue &Root, SelectionDAG &DAG, 2655 SDLoc dl) const { 2656 MachineFunction &MF = DAG.getMachineFunction(); 2657 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2658 2659 const TargetRegisterClass *RC; 2660 if (AFI->isThumb1OnlyFunction()) 2661 RC = &ARM::tGPRRegClass; 2662 else 2663 RC = &ARM::GPRRegClass; 2664 2665 // Transform the arguments stored in physical registers into virtual ones. 2666 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2667 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2668 2669 SDValue ArgValue2; 2670 if (NextVA.isMemLoc()) { 2671 MachineFrameInfo *MFI = MF.getFrameInfo(); 2672 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); 2673 2674 // Create load node to retrieve arguments from the stack. 2675 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2676 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 2677 MachinePointerInfo::getFixedStack(FI), 2678 false, false, false, 0); 2679 } else { 2680 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 2681 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2682 } 2683 2684 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 2685 } 2686 2687 void 2688 ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, 2689 unsigned InRegsParamRecordIdx, 2690 unsigned ArgSize, 2691 unsigned &ArgRegsSize, 2692 unsigned &ArgRegsSaveSize) 2693 const { 2694 unsigned NumGPRs; 2695 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { 2696 unsigned RBegin, REnd; 2697 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); 2698 NumGPRs = REnd - RBegin; 2699 } else { 2700 unsigned int firstUnalloced; 2701 firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, 2702 sizeof(GPRArgRegs) / 2703 sizeof(GPRArgRegs[0])); 2704 NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; 2705 } 2706 2707 unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); 2708 ArgRegsSize = NumGPRs * 4; 2709 2710 // If parameter is split between stack and GPRs... 2711 if (NumGPRs && Align == 8 && 2712 (ArgRegsSize < ArgSize || 2713 InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) { 2714 // Add padding for part of param recovered from GPRs, so 2715 // its last byte must be at address K*8 - 1. 2716 // We need to do it, since remained (stack) part of parameter has 2717 // stack alignment, and we need to "attach" "GPRs head" without gaps 2718 // to it: 2719 // Stack: 2720 // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes... 2721 // [ [padding] [GPRs head] ] [ Tail passed via stack .... 2722 // 2723 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2724 unsigned Padding = 2725 ((ArgRegsSize + AFI->getArgRegsSaveSize() + Align - 1) & ~(Align-1)) - 2726 (ArgRegsSize + AFI->getArgRegsSaveSize()); 2727 ArgRegsSaveSize = ArgRegsSize + Padding; 2728 } else 2729 // We don't need to extend regs save size for byval parameters if they 2730 // are passed via GPRs only. 2731 ArgRegsSaveSize = ArgRegsSize; 2732 } 2733 2734 // The remaining GPRs hold either the beginning of variable-argument 2735 // data, or the beginning of an aggregate passed by value (usually 2736 // byval). Either way, we allocate stack slots adjacent to the data 2737 // provided by our caller, and store the unallocated registers there. 2738 // If this is a variadic function, the va_list pointer will begin with 2739 // these values; otherwise, this reassembles a (byval) structure that 2740 // was split between registers and memory. 2741 // Return: The frame index registers were stored into. 2742 int 2743 ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, 2744 SDLoc dl, SDValue &Chain, 2745 const Value *OrigArg, 2746 unsigned InRegsParamRecordIdx, 2747 unsigned OffsetFromOrigArg, 2748 unsigned ArgOffset, 2749 unsigned ArgSize, 2750 bool ForceMutable) const { 2751 2752 // Currently, two use-cases possible: 2753 // Case #1. Non var-args function, and we meet first byval parameter. 2754 // Setup first unallocated register as first byval register; 2755 // eat all remained registers 2756 // (these two actions are performed by HandleByVal method). 2757 // Then, here, we initialize stack frame with 2758 // "store-reg" instructions. 2759 // Case #2. Var-args function, that doesn't contain byval parameters. 2760 // The same: eat all remained unallocated registers, 2761 // initialize stack frame. 2762 2763 MachineFunction &MF = DAG.getMachineFunction(); 2764 MachineFrameInfo *MFI = MF.getFrameInfo(); 2765 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2766 unsigned firstRegToSaveIndex, lastRegToSaveIndex; 2767 unsigned RBegin, REnd; 2768 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { 2769 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); 2770 firstRegToSaveIndex = RBegin - ARM::R0; 2771 lastRegToSaveIndex = REnd - ARM::R0; 2772 } else { 2773 firstRegToSaveIndex = CCInfo.getFirstUnallocated 2774 (GPRArgRegs, array_lengthof(GPRArgRegs)); 2775 lastRegToSaveIndex = 4; 2776 } 2777 2778 unsigned ArgRegsSize, ArgRegsSaveSize; 2779 computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize, 2780 ArgRegsSize, ArgRegsSaveSize); 2781 2782 // Store any by-val regs to their spots on the stack so that they may be 2783 // loaded by deferencing the result of formal parameter pointer or va_next. 2784 // Note: once stack area for byval/varargs registers 2785 // was initialized, it can't be initialized again. 2786 if (ArgRegsSaveSize) { 2787 2788 unsigned Padding = ArgRegsSaveSize - ArgRegsSize; 2789 2790 if (Padding) { 2791 assert(AFI->getStoredByValParamsPadding() == 0 && 2792 "The only parameter may be padded."); 2793 AFI->setStoredByValParamsPadding(Padding); 2794 } 2795 2796 int FrameIndex = MFI->CreateFixedObject( 2797 ArgRegsSaveSize, 2798 Padding + ArgOffset, 2799 false); 2800 SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); 2801 2802 SmallVector<SDValue, 4> MemOps; 2803 for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex; 2804 ++firstRegToSaveIndex, ++i) { 2805 const TargetRegisterClass *RC; 2806 if (AFI->isThumb1OnlyFunction()) 2807 RC = &ARM::tGPRRegClass; 2808 else 2809 RC = &ARM::GPRRegClass; 2810 2811 unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC); 2812 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2813 SDValue Store = 2814 DAG.getStore(Val.getValue(1), dl, Val, FIN, 2815 MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i), 2816 false, false, 0); 2817 MemOps.push_back(Store); 2818 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 2819 DAG.getConstant(4, getPointerTy())); 2820 } 2821 2822 AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize()); 2823 2824 if (!MemOps.empty()) 2825 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2826 &MemOps[0], MemOps.size()); 2827 return FrameIndex; 2828 } else 2829 // This will point to the next argument passed via stack. 2830 return MFI->CreateFixedObject( 2831 4, AFI->getStoredByValParamsPadding() + ArgOffset, !ForceMutable); 2832 } 2833 2834 // Setup stack frame, the va_list pointer will start from. 2835 void 2836 ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, 2837 SDLoc dl, SDValue &Chain, 2838 unsigned ArgOffset, 2839 bool ForceMutable) const { 2840 MachineFunction &MF = DAG.getMachineFunction(); 2841 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2842 2843 // Try to store any remaining integer argument regs 2844 // to their spots on the stack so that they may be loaded by deferencing 2845 // the result of va_next. 2846 // If there is no regs to be stored, just point address after last 2847 // argument passed via stack. 2848 int FrameIndex = 2849 StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(), 2850 0, ArgOffset, 0, ForceMutable); 2851 2852 AFI->setVarArgsFrameIndex(FrameIndex); 2853 } 2854 2855 SDValue 2856 ARMTargetLowering::LowerFormalArguments(SDValue Chain, 2857 CallingConv::ID CallConv, bool isVarArg, 2858 const SmallVectorImpl<ISD::InputArg> 2859 &Ins, 2860 SDLoc dl, SelectionDAG &DAG, 2861 SmallVectorImpl<SDValue> &InVals) 2862 const { 2863 MachineFunction &MF = DAG.getMachineFunction(); 2864 MachineFrameInfo *MFI = MF.getFrameInfo(); 2865 2866 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2867 2868 // Assign locations to all of the incoming arguments. 2869 SmallVector<CCValAssign, 16> ArgLocs; 2870 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2871 getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue); 2872 CCInfo.AnalyzeFormalArguments(Ins, 2873 CCAssignFnForNode(CallConv, /* Return*/ false, 2874 isVarArg)); 2875 2876 SmallVector<SDValue, 16> ArgValues; 2877 int lastInsIndex = -1; 2878 SDValue ArgValue; 2879 Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); 2880 unsigned CurArgIdx = 0; 2881 2882 // Initially ArgRegsSaveSize is zero. 2883 // Then we increase this value each time we meet byval parameter. 2884 // We also increase this value in case of varargs function. 2885 AFI->setArgRegsSaveSize(0); 2886 2887 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2888 CCValAssign &VA = ArgLocs[i]; 2889 std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx); 2890 CurArgIdx = Ins[VA.getValNo()].OrigArgIndex; 2891 // Arguments stored in registers. 2892 if (VA.isRegLoc()) { 2893 EVT RegVT = VA.getLocVT(); 2894 2895 if (VA.needsCustom()) { 2896 // f64 and vector types are split up into multiple registers or 2897 // combinations of registers and stack slots. 2898 if (VA.getLocVT() == MVT::v2f64) { 2899 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 2900 Chain, DAG, dl); 2901 VA = ArgLocs[++i]; // skip ahead to next loc 2902 SDValue ArgValue2; 2903 if (VA.isMemLoc()) { 2904 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); 2905 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2906 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, 2907 MachinePointerInfo::getFixedStack(FI), 2908 false, false, false, 0); 2909 } else { 2910 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 2911 Chain, DAG, dl); 2912 } 2913 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 2914 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2915 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 2916 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2917 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 2918 } else 2919 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 2920 2921 } else { 2922 const TargetRegisterClass *RC; 2923 2924 if (RegVT == MVT::f32) 2925 RC = &ARM::SPRRegClass; 2926 else if (RegVT == MVT::f64) 2927 RC = &ARM::DPRRegClass; 2928 else if (RegVT == MVT::v2f64) 2929 RC = &ARM::QPRRegClass; 2930 else if (RegVT == MVT::i32) 2931 RC = AFI->isThumb1OnlyFunction() ? 2932 (const TargetRegisterClass*)&ARM::tGPRRegClass : 2933 (const TargetRegisterClass*)&ARM::GPRRegClass; 2934 else 2935 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 2936 2937 // Transform the arguments in physical registers into virtual ones. 2938 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2939 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 2940 } 2941 2942 // If this is an 8 or 16-bit value, it is really passed promoted 2943 // to 32 bits. Insert an assert[sz]ext to capture this, then 2944 // truncate to the right size. 2945 switch (VA.getLocInfo()) { 2946 default: llvm_unreachable("Unknown loc info!"); 2947 case CCValAssign::Full: break; 2948 case CCValAssign::BCvt: 2949 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); 2950 break; 2951 case CCValAssign::SExt: 2952 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 2953 DAG.getValueType(VA.getValVT())); 2954 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2955 break; 2956 case CCValAssign::ZExt: 2957 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 2958 DAG.getValueType(VA.getValVT())); 2959 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2960 break; 2961 } 2962 2963 InVals.push_back(ArgValue); 2964 2965 } else { // VA.isRegLoc() 2966 2967 // sanity check 2968 assert(VA.isMemLoc()); 2969 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 2970 2971 int index = ArgLocs[i].getValNo(); 2972 2973 // Some Ins[] entries become multiple ArgLoc[] entries. 2974 // Process them only once. 2975 if (index != lastInsIndex) 2976 { 2977 ISD::ArgFlagsTy Flags = Ins[index].Flags; 2978 // FIXME: For now, all byval parameter objects are marked mutable. 2979 // This can be changed with more analysis. 2980 // In case of tail call optimization mark all arguments mutable. 2981 // Since they could be overwritten by lowering of arguments in case of 2982 // a tail call. 2983 if (Flags.isByVal()) { 2984 unsigned CurByValIndex = CCInfo.getInRegsParamsProceed(); 2985 int FrameIndex = StoreByValRegs( 2986 CCInfo, DAG, dl, Chain, CurOrigArg, 2987 CurByValIndex, 2988 Ins[VA.getValNo()].PartOffset, 2989 VA.getLocMemOffset(), 2990 Flags.getByValSize(), 2991 true /*force mutable frames*/); 2992 InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); 2993 CCInfo.nextInRegsParam(); 2994 } else { 2995 unsigned FIOffset = VA.getLocMemOffset() + 2996 AFI->getStoredByValParamsPadding(); 2997 int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, 2998 FIOffset, true); 2999 3000 // Create load nodes to retrieve arguments from the stack. 3001 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 3002 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 3003 MachinePointerInfo::getFixedStack(FI), 3004 false, false, false, 0)); 3005 } 3006 lastInsIndex = index; 3007 } 3008 } 3009 } 3010 3011 // varargs 3012 if (isVarArg) 3013 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 3014 CCInfo.getNextStackOffset()); 3015 3016 return Chain; 3017 } 3018 3019 /// isFloatingPointZero - Return true if this is +0.0. 3020 static bool isFloatingPointZero(SDValue Op) { 3021 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 3022 return CFP->getValueAPF().isPosZero(); 3023 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 3024 // Maybe this has already been legalized into the constant pool? 3025 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 3026 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 3027 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 3028 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 3029 return CFP->getValueAPF().isPosZero(); 3030 } 3031 } 3032 return false; 3033 } 3034 3035 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for 3036 /// the given operands. 3037 SDValue 3038 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 3039 SDValue &ARMcc, SelectionDAG &DAG, 3040 SDLoc dl) const { 3041 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 3042 unsigned C = RHSC->getZExtValue(); 3043 if (!isLegalICmpImmediate(C)) { 3044 // Constant does not fit, try adjusting it by one? 3045 switch (CC) { 3046 default: break; 3047 case ISD::SETLT: 3048 case ISD::SETGE: 3049 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { 3050 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 3051 RHS = DAG.getConstant(C-1, MVT::i32); 3052 } 3053 break; 3054 case ISD::SETULT: 3055 case ISD::SETUGE: 3056 if (C != 0 && isLegalICmpImmediate(C-1)) { 3057 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 3058 RHS = DAG.getConstant(C-1, MVT::i32); 3059 } 3060 break; 3061 case ISD::SETLE: 3062 case ISD::SETGT: 3063 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { 3064 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 3065 RHS = DAG.getConstant(C+1, MVT::i32); 3066 } 3067 break; 3068 case ISD::SETULE: 3069 case ISD::SETUGT: 3070 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { 3071 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 3072 RHS = DAG.getConstant(C+1, MVT::i32); 3073 } 3074 break; 3075 } 3076 } 3077 } 3078 3079 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 3080 ARMISD::NodeType CompareType; 3081 switch (CondCode) { 3082 default: 3083 CompareType = ARMISD::CMP; 3084 break; 3085 case ARMCC::EQ: 3086 case ARMCC::NE: 3087 // Uses only Z Flag 3088 CompareType = ARMISD::CMPZ; 3089 break; 3090 } 3091 ARMcc = DAG.getConstant(CondCode, MVT::i32); 3092 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); 3093 } 3094 3095 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 3096 SDValue 3097 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 3098 SDLoc dl) const { 3099 SDValue Cmp; 3100 if (!isFloatingPointZero(RHS)) 3101 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); 3102 else 3103 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); 3104 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); 3105 } 3106 3107 /// duplicateCmp - Glue values can have only one use, so this function 3108 /// duplicates a comparison node. 3109 SDValue 3110 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { 3111 unsigned Opc = Cmp.getOpcode(); 3112 SDLoc DL(Cmp); 3113 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) 3114 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 3115 3116 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); 3117 Cmp = Cmp.getOperand(0); 3118 Opc = Cmp.getOpcode(); 3119 if (Opc == ARMISD::CMPFP) 3120 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 3121 else { 3122 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); 3123 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); 3124 } 3125 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); 3126 } 3127 3128 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 3129 SDValue Cond = Op.getOperand(0); 3130 SDValue SelectTrue = Op.getOperand(1); 3131 SDValue SelectFalse = Op.getOperand(2); 3132 SDLoc dl(Op); 3133 3134 // Convert: 3135 // 3136 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) 3137 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) 3138 // 3139 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { 3140 const ConstantSDNode *CMOVTrue = 3141 dyn_cast<ConstantSDNode>(Cond.getOperand(0)); 3142 const ConstantSDNode *CMOVFalse = 3143 dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 3144 3145 if (CMOVTrue && CMOVFalse) { 3146 unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); 3147 unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); 3148 3149 SDValue True; 3150 SDValue False; 3151 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { 3152 True = SelectTrue; 3153 False = SelectFalse; 3154 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { 3155 True = SelectFalse; 3156 False = SelectTrue; 3157 } 3158 3159 if (True.getNode() && False.getNode()) { 3160 EVT VT = Op.getValueType(); 3161 SDValue ARMcc = Cond.getOperand(2); 3162 SDValue CCR = Cond.getOperand(3); 3163 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); 3164 assert(True.getValueType() == VT); 3165 return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); 3166 } 3167 } 3168 } 3169 3170 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the 3171 // undefined bits before doing a full-word comparison with zero. 3172 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, 3173 DAG.getConstant(1, Cond.getValueType())); 3174 3175 return DAG.getSelectCC(dl, Cond, 3176 DAG.getConstant(0, Cond.getValueType()), 3177 SelectTrue, SelectFalse, ISD::SETNE); 3178 } 3179 3180 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 3181 EVT VT = Op.getValueType(); 3182 SDValue LHS = Op.getOperand(0); 3183 SDValue RHS = Op.getOperand(1); 3184 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 3185 SDValue TrueVal = Op.getOperand(2); 3186 SDValue FalseVal = Op.getOperand(3); 3187 SDLoc dl(Op); 3188 3189 if (LHS.getValueType() == MVT::i32) { 3190 SDValue ARMcc; 3191 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3192 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3193 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); 3194 } 3195 3196 ARMCC::CondCodes CondCode, CondCode2; 3197 FPCCToARMCC(CC, CondCode, CondCode2); 3198 3199 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 3200 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 3201 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3202 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, 3203 ARMcc, CCR, Cmp); 3204 if (CondCode2 != ARMCC::AL) { 3205 SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); 3206 // FIXME: Needs another CMP because flag can have but one use. 3207 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 3208 Result = DAG.getNode(ARMISD::CMOV, dl, VT, 3209 Result, TrueVal, ARMcc2, CCR, Cmp2); 3210 } 3211 return Result; 3212 } 3213 3214 /// canChangeToInt - Given the fp compare operand, return true if it is suitable 3215 /// to morph to an integer compare sequence. 3216 static bool canChangeToInt(SDValue Op, bool &SeenZero, 3217 const ARMSubtarget *Subtarget) { 3218 SDNode *N = Op.getNode(); 3219 if (!N->hasOneUse()) 3220 // Otherwise it requires moving the value from fp to integer registers. 3221 return false; 3222 if (!N->getNumValues()) 3223 return false; 3224 EVT VT = Op.getValueType(); 3225 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) 3226 // f32 case is generally profitable. f64 case only makes sense when vcmpe + 3227 // vmrs are very slow, e.g. cortex-a8. 3228 return false; 3229 3230 if (isFloatingPointZero(Op)) { 3231 SeenZero = true; 3232 return true; 3233 } 3234 return ISD::isNormalLoad(N); 3235 } 3236 3237 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { 3238 if (isFloatingPointZero(Op)) 3239 return DAG.getConstant(0, MVT::i32); 3240 3241 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) 3242 return DAG.getLoad(MVT::i32, SDLoc(Op), 3243 Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), 3244 Ld->isVolatile(), Ld->isNonTemporal(), 3245 Ld->isInvariant(), Ld->getAlignment()); 3246 3247 llvm_unreachable("Unknown VFP cmp argument!"); 3248 } 3249 3250 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, 3251 SDValue &RetVal1, SDValue &RetVal2) { 3252 if (isFloatingPointZero(Op)) { 3253 RetVal1 = DAG.getConstant(0, MVT::i32); 3254 RetVal2 = DAG.getConstant(0, MVT::i32); 3255 return; 3256 } 3257 3258 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { 3259 SDValue Ptr = Ld->getBasePtr(); 3260 RetVal1 = DAG.getLoad(MVT::i32, SDLoc(Op), 3261 Ld->getChain(), Ptr, 3262 Ld->getPointerInfo(), 3263 Ld->isVolatile(), Ld->isNonTemporal(), 3264 Ld->isInvariant(), Ld->getAlignment()); 3265 3266 EVT PtrType = Ptr.getValueType(); 3267 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); 3268 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(Op), 3269 PtrType, Ptr, DAG.getConstant(4, PtrType)); 3270 RetVal2 = DAG.getLoad(MVT::i32, SDLoc(Op), 3271 Ld->getChain(), NewPtr, 3272 Ld->getPointerInfo().getWithOffset(4), 3273 Ld->isVolatile(), Ld->isNonTemporal(), 3274 Ld->isInvariant(), NewAlign); 3275 return; 3276 } 3277 3278 llvm_unreachable("Unknown VFP cmp argument!"); 3279 } 3280 3281 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some 3282 /// f32 and even f64 comparisons to integer ones. 3283 SDValue 3284 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { 3285 SDValue Chain = Op.getOperand(0); 3286 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 3287 SDValue LHS = Op.getOperand(2); 3288 SDValue RHS = Op.getOperand(3); 3289 SDValue Dest = Op.getOperand(4); 3290 SDLoc dl(Op); 3291 3292 bool LHSSeenZero = false; 3293 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); 3294 bool RHSSeenZero = false; 3295 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); 3296 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { 3297 // If unsafe fp math optimization is enabled and there are no other uses of 3298 // the CMP operands, and the condition code is EQ or NE, we can optimize it 3299 // to an integer comparison. 3300 if (CC == ISD::SETOEQ) 3301 CC = ISD::SETEQ; 3302 else if (CC == ISD::SETUNE) 3303 CC = ISD::SETNE; 3304 3305 SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32); 3306 SDValue ARMcc; 3307 if (LHS.getValueType() == MVT::f32) { 3308 LHS = DAG.getNode(ISD::AND, dl, MVT::i32, 3309 bitcastf32Toi32(LHS, DAG), Mask); 3310 RHS = DAG.getNode(ISD::AND, dl, MVT::i32, 3311 bitcastf32Toi32(RHS, DAG), Mask); 3312 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3313 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3314 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 3315 Chain, Dest, ARMcc, CCR, Cmp); 3316 } 3317 3318 SDValue LHS1, LHS2; 3319 SDValue RHS1, RHS2; 3320 expandf64Toi32(LHS, DAG, LHS1, LHS2); 3321 expandf64Toi32(RHS, DAG, RHS1, RHS2); 3322 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); 3323 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); 3324 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 3325 ARMcc = DAG.getConstant(CondCode, MVT::i32); 3326 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 3327 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; 3328 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); 3329 } 3330 3331 return SDValue(); 3332 } 3333 3334 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 3335 SDValue Chain = Op.getOperand(0); 3336 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 3337 SDValue LHS = Op.getOperand(2); 3338 SDValue RHS = Op.getOperand(3); 3339 SDValue Dest = Op.getOperand(4); 3340 SDLoc dl(Op); 3341 3342 if (LHS.getValueType() == MVT::i32) { 3343 SDValue ARMcc; 3344 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3345 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3346 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 3347 Chain, Dest, ARMcc, CCR, Cmp); 3348 } 3349 3350 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 3351 3352 if (getTargetMachine().Options.UnsafeFPMath && 3353 (CC == ISD::SETEQ || CC == ISD::SETOEQ || 3354 CC == ISD::SETNE || CC == ISD::SETUNE)) { 3355 SDValue Result = OptimizeVFPBrcond(Op, DAG); 3356 if (Result.getNode()) 3357 return Result; 3358 } 3359 3360 ARMCC::CondCodes CondCode, CondCode2; 3361 FPCCToARMCC(CC, CondCode, CondCode2); 3362 3363 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 3364 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 3365 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3366 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 3367 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; 3368 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 3369 if (CondCode2 != ARMCC::AL) { 3370 ARMcc = DAG.getConstant(CondCode2, MVT::i32); 3371 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; 3372 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 3373 } 3374 return Res; 3375 } 3376 3377 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { 3378 SDValue Chain = Op.getOperand(0); 3379 SDValue Table = Op.getOperand(1); 3380 SDValue Index = Op.getOperand(2); 3381 SDLoc dl(Op); 3382 3383 EVT PTy = getPointerTy(); 3384 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 3385 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 3386 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 3387 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 3388 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 3389 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 3390 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 3391 if (Subtarget->isThumb2()) { 3392 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 3393 // which does another jump to the destination. This also makes it easier 3394 // to translate it to TBB / TBH later. 3395 // FIXME: This might not work if the function is extremely large. 3396 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 3397 Addr, Op.getOperand(2), JTI, UId); 3398 } 3399 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 3400 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 3401 MachinePointerInfo::getJumpTable(), 3402 false, false, false, 0); 3403 Chain = Addr.getValue(1); 3404 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 3405 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 3406 } else { 3407 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 3408 MachinePointerInfo::getJumpTable(), 3409 false, false, false, 0); 3410 Chain = Addr.getValue(1); 3411 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 3412 } 3413 } 3414 3415 static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 3416 EVT VT = Op.getValueType(); 3417 SDLoc dl(Op); 3418 3419 if (Op.getValueType().getVectorElementType() == MVT::i32) { 3420 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) 3421 return Op; 3422 return DAG.UnrollVectorOp(Op.getNode()); 3423 } 3424 3425 assert(Op.getOperand(0).getValueType() == MVT::v4f32 && 3426 "Invalid type for custom lowering!"); 3427 if (VT != MVT::v4i16) 3428 return DAG.UnrollVectorOp(Op.getNode()); 3429 3430 Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0)); 3431 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); 3432 } 3433 3434 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 3435 EVT VT = Op.getValueType(); 3436 if (VT.isVector()) 3437 return LowerVectorFP_TO_INT(Op, DAG); 3438 3439 SDLoc dl(Op); 3440 unsigned Opc; 3441 3442 switch (Op.getOpcode()) { 3443 default: llvm_unreachable("Invalid opcode!"); 3444 case ISD::FP_TO_SINT: 3445 Opc = ARMISD::FTOSI; 3446 break; 3447 case ISD::FP_TO_UINT: 3448 Opc = ARMISD::FTOUI; 3449 break; 3450 } 3451 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 3452 return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 3453 } 3454 3455 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 3456 EVT VT = Op.getValueType(); 3457 SDLoc dl(Op); 3458 3459 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { 3460 if (VT.getVectorElementType() == MVT::f32) 3461 return Op; 3462 return DAG.UnrollVectorOp(Op.getNode()); 3463 } 3464 3465 assert(Op.getOperand(0).getValueType() == MVT::v4i16 && 3466 "Invalid type for custom lowering!"); 3467 if (VT != MVT::v4f32) 3468 return DAG.UnrollVectorOp(Op.getNode()); 3469 3470 unsigned CastOpc; 3471 unsigned Opc; 3472 switch (Op.getOpcode()) { 3473 default: llvm_unreachable("Invalid opcode!"); 3474 case ISD::SINT_TO_FP: 3475 CastOpc = ISD::SIGN_EXTEND; 3476 Opc = ISD::SINT_TO_FP; 3477 break; 3478 case ISD::UINT_TO_FP: 3479 CastOpc = ISD::ZERO_EXTEND; 3480 Opc = ISD::UINT_TO_FP; 3481 break; 3482 } 3483 3484 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); 3485 return DAG.getNode(Opc, dl, VT, Op); 3486 } 3487 3488 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 3489 EVT VT = Op.getValueType(); 3490 if (VT.isVector()) 3491 return LowerVectorINT_TO_FP(Op, DAG); 3492 3493 SDLoc dl(Op); 3494 unsigned Opc; 3495 3496 switch (Op.getOpcode()) { 3497 default: llvm_unreachable("Invalid opcode!"); 3498 case ISD::SINT_TO_FP: 3499 Opc = ARMISD::SITOF; 3500 break; 3501 case ISD::UINT_TO_FP: 3502 Opc = ARMISD::UITOF; 3503 break; 3504 } 3505 3506 Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); 3507 return DAG.getNode(Opc, dl, VT, Op); 3508 } 3509 3510 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { 3511 // Implement fcopysign with a fabs and a conditional fneg. 3512 SDValue Tmp0 = Op.getOperand(0); 3513 SDValue Tmp1 = Op.getOperand(1); 3514 SDLoc dl(Op); 3515 EVT VT = Op.getValueType(); 3516 EVT SrcVT = Tmp1.getValueType(); 3517 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || 3518 Tmp0.getOpcode() == ARMISD::VMOVDRR; 3519 bool UseNEON = !InGPR && Subtarget->hasNEON(); 3520 3521 if (UseNEON) { 3522 // Use VBSL to copy the sign bit. 3523 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); 3524 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, 3525 DAG.getTargetConstant(EncodedVal, MVT::i32)); 3526 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; 3527 if (VT == MVT::f64) 3528 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3529 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), 3530 DAG.getConstant(32, MVT::i32)); 3531 else /*if (VT == MVT::f32)*/ 3532 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); 3533 if (SrcVT == MVT::f32) { 3534 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); 3535 if (VT == MVT::f64) 3536 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3537 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), 3538 DAG.getConstant(32, MVT::i32)); 3539 } else if (VT == MVT::f32) 3540 Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64, 3541 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), 3542 DAG.getConstant(32, MVT::i32)); 3543 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); 3544 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); 3545 3546 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), 3547 MVT::i32); 3548 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); 3549 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, 3550 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); 3551 3552 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, 3553 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), 3554 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); 3555 if (VT == MVT::f32) { 3556 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); 3557 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, 3558 DAG.getConstant(0, MVT::i32)); 3559 } else { 3560 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); 3561 } 3562 3563 return Res; 3564 } 3565 3566 // Bitcast operand 1 to i32. 3567 if (SrcVT == MVT::f64) 3568 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3569 &Tmp1, 1).getValue(1); 3570 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); 3571 3572 // Or in the signbit with integer operations. 3573 SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); 3574 SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); 3575 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); 3576 if (VT == MVT::f32) { 3577 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, 3578 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); 3579 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 3580 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); 3581 } 3582 3583 // f64: Or the high part with signbit and then combine two parts. 3584 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3585 &Tmp0, 1); 3586 SDValue Lo = Tmp0.getValue(0); 3587 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); 3588 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); 3589 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 3590 } 3591 3592 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ 3593 MachineFunction &MF = DAG.getMachineFunction(); 3594 MachineFrameInfo *MFI = MF.getFrameInfo(); 3595 MFI->setReturnAddressIsTaken(true); 3596 3597 EVT VT = Op.getValueType(); 3598 SDLoc dl(Op); 3599 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3600 if (Depth) { 3601 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 3602 SDValue Offset = DAG.getConstant(4, MVT::i32); 3603 return DAG.getLoad(VT, dl, DAG.getEntryNode(), 3604 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), 3605 MachinePointerInfo(), false, false, false, 0); 3606 } 3607 3608 // Return LR, which contains the return address. Mark it an implicit live-in. 3609 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); 3610 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); 3611 } 3612 3613 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { 3614 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 3615 MFI->setFrameAddressIsTaken(true); 3616 3617 EVT VT = Op.getValueType(); 3618 SDLoc dl(Op); // FIXME probably not meaningful 3619 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3620 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) 3621 ? ARM::R7 : ARM::R11; 3622 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 3623 while (Depth--) 3624 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, 3625 MachinePointerInfo(), 3626 false, false, false, 0); 3627 return FrameAddr; 3628 } 3629 3630 /// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec), 3631 /// and size(DestVec) > 128-bits. 3632 /// This is achieved by doing the one extension from the SrcVec, splitting the 3633 /// result, extending these parts, and then concatenating these into the 3634 /// destination. 3635 static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) { 3636 SDValue Op = N->getOperand(0); 3637 EVT SrcVT = Op.getValueType(); 3638 EVT DestVT = N->getValueType(0); 3639 3640 assert(DestVT.getSizeInBits() > 128 && 3641 "Custom sext/zext expansion needs >128-bit vector."); 3642 // If this is a normal length extension, use the default expansion. 3643 if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() && 3644 SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits()) 3645 return SDValue(); 3646 3647 SDLoc dl(N); 3648 unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits(); 3649 unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits(); 3650 unsigned NumElts = SrcVT.getVectorNumElements(); 3651 LLVMContext &Ctx = *DAG.getContext(); 3652 SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi; 3653 3654 EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), 3655 NumElts); 3656 EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), 3657 NumElts/2); 3658 EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize), 3659 NumElts/2); 3660 3661 Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op); 3662 SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, 3663 DAG.getIntPtrConstant(0)); 3664 SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, 3665 DAG.getIntPtrConstant(NumElts/2)); 3666 ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo); 3667 ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi); 3668 return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi); 3669 } 3670 3671 /// ExpandBITCAST - If the target supports VFP, this function is called to 3672 /// expand a bit convert where either the source or destination type is i64 to 3673 /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 3674 /// operand type is illegal (e.g., v2f32 for a target that doesn't support 3675 /// vectors), since the legalizer won't know what to do with that. 3676 static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { 3677 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3678 SDLoc dl(N); 3679 SDValue Op = N->getOperand(0); 3680 3681 // This function is only supposed to be called for i64 types, either as the 3682 // source or destination of the bit convert. 3683 EVT SrcVT = Op.getValueType(); 3684 EVT DstVT = N->getValueType(0); 3685 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && 3686 "ExpandBITCAST called for non-i64 type"); 3687 3688 // Turn i64->f64 into VMOVDRR. 3689 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { 3690 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3691 DAG.getConstant(0, MVT::i32)); 3692 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3693 DAG.getConstant(1, MVT::i32)); 3694 return DAG.getNode(ISD::BITCAST, dl, DstVT, 3695 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); 3696 } 3697 3698 // Turn f64->i64 into VMOVRRD. 3699 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { 3700 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 3701 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); 3702 // Merge the pieces into a single i64 value. 3703 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 3704 } 3705 3706 return SDValue(); 3707 } 3708 3709 /// getZeroVector - Returns a vector of specified type with all zero elements. 3710 /// Zero vectors are used to represent vector negation and in those cases 3711 /// will be implemented with the NEON VNEG instruction. However, VNEG does 3712 /// not support i64 elements, so sometimes the zero vectors will need to be 3713 /// explicitly constructed. Regardless, use a canonical VMOV to create the 3714 /// zero vector. 3715 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) { 3716 assert(VT.isVector() && "Expected a vector type"); 3717 // The canonical modified immediate encoding of a zero vector is....0! 3718 SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); 3719 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 3720 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); 3721 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 3722 } 3723 3724 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two 3725 /// i32 values and take a 2 x i32 value to shift plus a shift amount. 3726 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, 3727 SelectionDAG &DAG) const { 3728 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3729 EVT VT = Op.getValueType(); 3730 unsigned VTBits = VT.getSizeInBits(); 3731 SDLoc dl(Op); 3732 SDValue ShOpLo = Op.getOperand(0); 3733 SDValue ShOpHi = Op.getOperand(1); 3734 SDValue ShAmt = Op.getOperand(2); 3735 SDValue ARMcc; 3736 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 3737 3738 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 3739 3740 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3741 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3742 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 3743 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3744 DAG.getConstant(VTBits, MVT::i32)); 3745 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 3746 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3747 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 3748 3749 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3750 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3751 ARMcc, DAG, dl); 3752 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 3753 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, 3754 CCR, Cmp); 3755 3756 SDValue Ops[2] = { Lo, Hi }; 3757 return DAG.getMergeValues(Ops, 2, dl); 3758 } 3759 3760 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 3761 /// i32 values and take a 2 x i32 value to shift plus a shift amount. 3762 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, 3763 SelectionDAG &DAG) const { 3764 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3765 EVT VT = Op.getValueType(); 3766 unsigned VTBits = VT.getSizeInBits(); 3767 SDLoc dl(Op); 3768 SDValue ShOpLo = Op.getOperand(0); 3769 SDValue ShOpHi = Op.getOperand(1); 3770 SDValue ShAmt = Op.getOperand(2); 3771 SDValue ARMcc; 3772 3773 assert(Op.getOpcode() == ISD::SHL_PARTS); 3774 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3775 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3776 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 3777 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3778 DAG.getConstant(VTBits, MVT::i32)); 3779 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 3780 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 3781 3782 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3783 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3784 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3785 ARMcc, DAG, dl); 3786 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 3787 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, 3788 CCR, Cmp); 3789 3790 SDValue Ops[2] = { Lo, Hi }; 3791 return DAG.getMergeValues(Ops, 2, dl); 3792 } 3793 3794 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 3795 SelectionDAG &DAG) const { 3796 // The rounding mode is in bits 23:22 of the FPSCR. 3797 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 3798 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) 3799 // so that the shift + and get folded into a bitfield extract. 3800 SDLoc dl(Op); 3801 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, 3802 DAG.getConstant(Intrinsic::arm_get_fpscr, 3803 MVT::i32)); 3804 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, 3805 DAG.getConstant(1U << 22, MVT::i32)); 3806 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, 3807 DAG.getConstant(22, MVT::i32)); 3808 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, 3809 DAG.getConstant(3, MVT::i32)); 3810 } 3811 3812 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 3813 const ARMSubtarget *ST) { 3814 EVT VT = N->getValueType(0); 3815 SDLoc dl(N); 3816 3817 if (!ST->hasV6T2Ops()) 3818 return SDValue(); 3819 3820 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 3821 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 3822 } 3823 3824 /// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count 3825 /// for each 16-bit element from operand, repeated. The basic idea is to 3826 /// leverage vcnt to get the 8-bit counts, gather and add the results. 3827 /// 3828 /// Trace for v4i16: 3829 /// input = [v0 v1 v2 v3 ] (vi 16-bit element) 3830 /// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element) 3831 /// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi) 3832 /// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6] 3833 /// [b0 b1 b2 b3 b4 b5 b6 b7] 3834 /// +[b1 b0 b3 b2 b5 b4 b7 b6] 3835 /// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0, 3836 /// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits) 3837 static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) { 3838 EVT VT = N->getValueType(0); 3839 SDLoc DL(N); 3840 3841 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; 3842 SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0)); 3843 SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0); 3844 SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1); 3845 SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2); 3846 return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3); 3847 } 3848 3849 /// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the 3850 /// bit-count for each 16-bit element from the operand. We need slightly 3851 /// different sequencing for v4i16 and v8i16 to stay within NEON's available 3852 /// 64/128-bit registers. 3853 /// 3854 /// Trace for v4i16: 3855 /// input = [v0 v1 v2 v3 ] (vi 16-bit element) 3856 /// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi) 3857 /// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ] 3858 /// v4i16:Extracted = [k0 k1 k2 k3 ] 3859 static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) { 3860 EVT VT = N->getValueType(0); 3861 SDLoc DL(N); 3862 3863 SDValue BitCounts = getCTPOP16BitCounts(N, DAG); 3864 if (VT.is64BitVector()) { 3865 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts); 3866 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended, 3867 DAG.getIntPtrConstant(0)); 3868 } else { 3869 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, 3870 BitCounts, DAG.getIntPtrConstant(0)); 3871 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted); 3872 } 3873 } 3874 3875 /// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the 3876 /// bit-count for each 32-bit element from the operand. The idea here is 3877 /// to split the vector into 16-bit elements, leverage the 16-bit count 3878 /// routine, and then combine the results. 3879 /// 3880 /// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged): 3881 /// input = [v0 v1 ] (vi: 32-bit elements) 3882 /// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1]) 3883 /// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi) 3884 /// vrev: N0 = [k1 k0 k3 k2 ] 3885 /// [k0 k1 k2 k3 ] 3886 /// N1 =+[k1 k0 k3 k2 ] 3887 /// [k0 k2 k1 k3 ] 3888 /// N2 =+[k1 k3 k0 k2 ] 3889 /// [k0 k2 k1 k3 ] 3890 /// Extended =+[k1 k3 k0 k2 ] 3891 /// [k0 k2 ] 3892 /// Extracted=+[k1 k3 ] 3893 /// 3894 static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) { 3895 EVT VT = N->getValueType(0); 3896 SDLoc DL(N); 3897 3898 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16; 3899 3900 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0)); 3901 SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG); 3902 SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16); 3903 SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0); 3904 SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1); 3905 3906 if (VT.is64BitVector()) { 3907 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2); 3908 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended, 3909 DAG.getIntPtrConstant(0)); 3910 } else { 3911 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2, 3912 DAG.getIntPtrConstant(0)); 3913 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted); 3914 } 3915 } 3916 3917 static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, 3918 const ARMSubtarget *ST) { 3919 EVT VT = N->getValueType(0); 3920 3921 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON."); 3922 assert((VT == MVT::v2i32 || VT == MVT::v4i32 || 3923 VT == MVT::v4i16 || VT == MVT::v8i16) && 3924 "Unexpected type for custom ctpop lowering"); 3925 3926 if (VT.getVectorElementType() == MVT::i32) 3927 return lowerCTPOP32BitElements(N, DAG); 3928 else 3929 return lowerCTPOP16BitElements(N, DAG); 3930 } 3931 3932 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 3933 const ARMSubtarget *ST) { 3934 EVT VT = N->getValueType(0); 3935 SDLoc dl(N); 3936 3937 if (!VT.isVector()) 3938 return SDValue(); 3939 3940 // Lower vector shifts on NEON to use VSHL. 3941 assert(ST->hasNEON() && "unexpected vector shift"); 3942 3943 // Left shifts translate directly to the vshiftu intrinsic. 3944 if (N->getOpcode() == ISD::SHL) 3945 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 3946 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 3947 N->getOperand(0), N->getOperand(1)); 3948 3949 assert((N->getOpcode() == ISD::SRA || 3950 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 3951 3952 // NEON uses the same intrinsics for both left and right shifts. For 3953 // right shifts, the shift amounts are negative, so negate the vector of 3954 // shift amounts. 3955 EVT ShiftVT = N->getOperand(1).getValueType(); 3956 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 3957 getZeroVector(ShiftVT, DAG, dl), 3958 N->getOperand(1)); 3959 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 3960 Intrinsic::arm_neon_vshifts : 3961 Intrinsic::arm_neon_vshiftu); 3962 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 3963 DAG.getConstant(vshiftInt, MVT::i32), 3964 N->getOperand(0), NegatedCount); 3965 } 3966 3967 static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, 3968 const ARMSubtarget *ST) { 3969 EVT VT = N->getValueType(0); 3970 SDLoc dl(N); 3971 3972 // We can get here for a node like i32 = ISD::SHL i32, i64 3973 if (VT != MVT::i64) 3974 return SDValue(); 3975 3976 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 3977 "Unknown shift to lower!"); 3978 3979 // We only lower SRA, SRL of 1 here, all others use generic lowering. 3980 if (!isa<ConstantSDNode>(N->getOperand(1)) || 3981 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 3982 return SDValue(); 3983 3984 // If we are in thumb mode, we don't have RRX. 3985 if (ST->isThumb1Only()) return SDValue(); 3986 3987 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 3988 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 3989 DAG.getConstant(0, MVT::i32)); 3990 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 3991 DAG.getConstant(1, MVT::i32)); 3992 3993 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 3994 // captures the result into a carry flag. 3995 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 3996 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); 3997 3998 // The low part is an ARMISD::RRX operand, which shifts the carry in. 3999 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 4000 4001 // Merge the pieces into a single i64 value. 4002 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 4003 } 4004 4005 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 4006 SDValue TmpOp0, TmpOp1; 4007 bool Invert = false; 4008 bool Swap = false; 4009 unsigned Opc = 0; 4010 4011 SDValue Op0 = Op.getOperand(0); 4012 SDValue Op1 = Op.getOperand(1); 4013 SDValue CC = Op.getOperand(2); 4014 EVT VT = Op.getValueType(); 4015 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4016 SDLoc dl(Op); 4017 4018 if (Op.getOperand(1).