1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that ARM uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-isel" 16 #include "ARM.h" 17 #include "ARMAddressingModes.h" 18 #include "ARMCallingConv.h" 19 #include "ARMConstantPoolValue.h" 20 #include "ARMISelLowering.h" 21 #include "ARMMachineFunctionInfo.h" 22 #include "ARMPerfectShuffle.h" 23 #include "ARMRegisterInfo.h" 24 #include "ARMSubtarget.h" 25 #include "ARMTargetMachine.h" 26 #include "ARMTargetObjectFile.h" 27 #include "llvm/CallingConv.h" 28 #include "llvm/Constants.h" 29 #include "llvm/Function.h" 30 #include "llvm/GlobalValue.h" 31 #include "llvm/Instruction.h" 32 #include "llvm/Instructions.h" 33 #include "llvm/Intrinsics.h" 34 #include "llvm/Type.h" 35 #include "llvm/CodeGen/CallingConvLower.h" 36 #include "llvm/CodeGen/IntrinsicLowering.h" 37 #include "llvm/CodeGen/MachineBasicBlock.h" 38 #include "llvm/CodeGen/MachineFrameInfo.h" 39 #include "llvm/CodeGen/MachineFunction.h" 40 #include "llvm/CodeGen/MachineInstrBuilder.h" 41 #include "llvm/CodeGen/MachineRegisterInfo.h" 42 #include "llvm/CodeGen/PseudoSourceValue.h" 43 #include "llvm/CodeGen/SelectionDAG.h" 44 #include "llvm/MC/MCSectionMachO.h" 45 #include "llvm/Target/TargetOptions.h" 46 #include "llvm/ADT/VectorExtras.h" 47 #include "llvm/ADT/StringExtras.h" 48 #include "llvm/ADT/Statistic.h" 49 #include "llvm/Support/CommandLine.h" 50 #include "llvm/Support/ErrorHandling.h" 51 #include "llvm/Support/MathExtras.h" 52 #include "llvm/Support/raw_ostream.h" 53 #include <sstream> 54 using namespace llvm; 55 56 STATISTIC(NumTailCalls, "Number of tail calls"); 57 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); 58 59 // This option should go away when tail calls fully work. 60 static cl::opt<bool> 61 EnableARMTailCalls("arm-tail-calls", cl::Hidden, 62 cl::desc("Generate tail calls (TEMPORARY OPTION)."), 63 cl::init(false)); 64 65 cl::opt<bool> 66 EnableARMLongCalls("arm-long-calls", cl::Hidden, 67 cl::desc("Generate calls via indirect call instructions"), 68 cl::init(false)); 69 70 static cl::opt<bool> 71 ARMInterworking("arm-interworking", cl::Hidden, 72 cl::desc("Enable / disable ARM interworking (for debugging only)"), 73 cl::init(true)); 74 75 namespace llvm { 76 class ARMCCState : public CCState { 77 public: 78 ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, 79 const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs, 80 LLVMContext &C, ParmContext PC) 81 : CCState(CC, isVarArg, MF, TM, locs, C) { 82 assert(((PC == Call) || (PC == Prologue)) && 83 "ARMCCState users must specify whether their context is call" 84 "or prologue generation."); 85 CallOrPrologue = PC; 86 } 87 }; 88 } 89 90 // The APCS parameter registers. 91 static const unsigned GPRArgRegs[] = { 92 ARM::R0, ARM::R1, ARM::R2, ARM::R3 93 }; 94 95 void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, 96 EVT PromotedBitwiseVT) { 97 if (VT != PromotedLdStVT) { 98 setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); 99 AddPromotedToType (ISD::LOAD, VT.getSimpleVT(), 100 PromotedLdStVT.getSimpleVT()); 101 102 setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); 103 AddPromotedToType (ISD::STORE, VT.getSimpleVT(), 104 PromotedLdStVT.getSimpleVT()); 105 } 106 107 EVT ElemTy = VT.getVectorElementType(); 108 if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 109 setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom); 110 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); 111 if (ElemTy != MVT::i32) { 112 setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); 113 setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); 114 setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); 115 setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand); 116 } 117 setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); 118 setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); 119 setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); 120 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal); 121 setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); 122 setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); 123 if (VT.isInteger()) { 124 setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); 125 setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); 126 setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); 127 setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand); 128 setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand); 129 for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 130 InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) 131 setTruncStoreAction(VT.getSimpleVT(), 132 (MVT::SimpleValueType)InnerVT, Expand); 133 } 134 setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand); 135 136 // Promote all bit-wise operations. 137 if (VT.isInteger() && VT != PromotedBitwiseVT) { 138 setOperationAction(ISD::AND, VT.getSimpleVT(), Promote); 139 AddPromotedToType (ISD::AND, VT.getSimpleVT(), 140 PromotedBitwiseVT.getSimpleVT()); 141 setOperationAction(ISD::OR, VT.getSimpleVT(), Promote); 142 AddPromotedToType (ISD::OR, VT.getSimpleVT(), 143 PromotedBitwiseVT.getSimpleVT()); 144 setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote); 145 AddPromotedToType (ISD::XOR, VT.getSimpleVT(), 146 PromotedBitwiseVT.getSimpleVT()); 147 } 148 149 // Neon does not support vector divide/remainder operations. 150 setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); 151 setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); 152 setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand); 153 setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); 154 setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); 155 setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); 156 } 157 158 void ARMTargetLowering::addDRTypeForNEON(EVT VT) { 159 addRegisterClass(VT, ARM::DPRRegisterClass); 160 addTypeForNEON(VT, MVT::f64, MVT::v2i32); 161 } 162 163 void ARMTargetLowering::addQRTypeForNEON(EVT VT) { 164 addRegisterClass(VT, ARM::QPRRegisterClass); 165 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 166 } 167 168 static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { 169 if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) 170 return new TargetLoweringObjectFileMachO(); 171 172 return new ARMElfTargetObjectFile(); 173 } 174 175 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 176 : TargetLowering(TM, createTLOF(TM)) { 177 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 178 RegInfo = TM.getRegisterInfo(); 179 Itins = TM.getInstrItineraryData(); 180 181 if (Subtarget->isTargetDarwin()) { 182 // Uses VFP for Thumb libfuncs if available. 183 if (Subtarget->isThumb() && Subtarget->hasVFP2()) { 184 // Single-precision floating-point arithmetic. 185 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 186 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 187 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 188 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 189 190 // Double-precision floating-point arithmetic. 191 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 192 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 193 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 194 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 195 196 // Single-precision comparisons. 197 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 198 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 199 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 200 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 201 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 202 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 203 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 204 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 205 206 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 207 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 208 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 209 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 210 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 211 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 212 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 213 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 214 215 // Double-precision comparisons. 216 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 217 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 218 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 219 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 220 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 221 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 222 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 223 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 224 225 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 226 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 227 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 228 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 229 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 230 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 231 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 232 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 233 234 // Floating-point to integer conversions. 235 // i64 conversions are done via library routines even when generating VFP 236 // instructions, so use the same ones. 237 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 238 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 239 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 240 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 241 242 // Conversions between floating types. 243 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 244 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 245 246 // Integer to floating-point conversions. 247 // i64 conversions are done via library routines even when generating VFP 248 // instructions, so use the same ones. 249 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 250 // e.g., __floatunsidf vs. __floatunssidfvfp. 251 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 252 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 253 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 254 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 255 } 256 } 257 258 // These libcalls are not available in 32-bit. 259 setLibcallName(RTLIB::SHL_I128, 0); 260 setLibcallName(RTLIB::SRL_I128, 0); 261 setLibcallName(RTLIB::SRA_I128, 0); 262 263 if (Subtarget->isAAPCS_ABI()) { 264 // Double-precision floating-point arithmetic helper functions 265 // RTABI chapter 4.1.2, Table 2 266 setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); 267 setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); 268 setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); 269 setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); 270 setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); 271 setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); 272 setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); 273 setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); 274 275 // Double-precision floating-point comparison helper functions 276 // RTABI chapter 4.1.2, Table 3 277 setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); 278 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 279 setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); 280 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); 281 setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); 282 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 283 setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); 284 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 285 setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); 286 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 287 setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); 288 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 289 setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); 290 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 291 setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); 292 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 293 setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); 294 setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); 295 setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); 296 setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); 297 setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); 298 setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); 299 setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); 300 setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); 301 302 // Single-precision floating-point arithmetic helper functions 303 // RTABI chapter 4.1.2, Table 4 304 setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); 305 setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); 306 setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); 307 setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); 308 setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); 309 setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); 310 setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); 311 setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); 312 313 // Single-precision floating-point comparison helper functions 314 // RTABI chapter 4.1.2, Table 5 315 setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); 316 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 317 setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); 318 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); 319 setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); 320 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 321 setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); 322 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 323 setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); 324 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 325 setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); 326 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 327 setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); 328 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 329 setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); 330 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 331 setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); 332 setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); 333 setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); 334 setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); 335 setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); 336 setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); 337 setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); 338 setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); 339 340 // Floating-point to integer conversions. 341 // RTABI chapter 4.1.2, Table 6 342 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); 343 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); 344 setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); 345 setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); 346 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); 347 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); 348 setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); 349 setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); 350 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); 351 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); 352 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); 353 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); 354 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); 355 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); 356 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); 357 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); 358 359 // Conversions between floating types. 360 // RTABI chapter 4.1.2, Table 7 361 setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); 362 setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); 363 setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); 364 setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); 365 366 // Integer to floating-point conversions. 367 // RTABI chapter 4.1.2, Table 8 368 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); 369 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); 370 setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); 371 setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); 372 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); 373 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); 374 setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); 375 setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); 376 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 377 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 378 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 379 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 380 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 381 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 382 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 383 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 384 385 // Long long helper functions 386 // RTABI chapter 4.2, Table 9 387 setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); 388 setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); 389 setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); 390 setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); 391 setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); 392 setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); 393 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); 394 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 395 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 396 setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); 397 setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); 398 setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); 399 400 // Integer division functions 401 // RTABI chapter 4.3.1 402 setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); 403 setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); 404 setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); 405 setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); 406 setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); 407 setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); 408 setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); 409 setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); 410 setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); 411 setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); 412 setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); 413 setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); 414 415 // Memory operations 416 // RTABI chapter 4.3.4 417 setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); 418 setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); 419 setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); 420 } 421 422 if (Subtarget->isThumb1Only()) 423 addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); 424 else 425 addRegisterClass(MVT::i32, ARM::GPRRegisterClass); 426 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 427 addRegisterClass(MVT::f32, ARM::SPRRegisterClass); 428 if (!Subtarget->isFPOnlySP()) 429 addRegisterClass(MVT::f64, ARM::DPRRegisterClass); 430 431 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 432 } 433 434 if (Subtarget->hasNEON()) { 435 addDRTypeForNEON(MVT::v2f32); 436 addDRTypeForNEON(MVT::v8i8); 437 addDRTypeForNEON(MVT::v4i16); 438 addDRTypeForNEON(MVT::v2i32); 439 addDRTypeForNEON(MVT::v1i64); 440 441 addQRTypeForNEON(MVT::v4f32); 442 addQRTypeForNEON(MVT::v2f64); 443 addQRTypeForNEON(MVT::v16i8); 444 addQRTypeForNEON(MVT::v8i16); 445 addQRTypeForNEON(MVT::v4i32); 446 addQRTypeForNEON(MVT::v2i64); 447 448 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 449 // neither Neon nor VFP support any arithmetic operations on it. 450 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 451 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 452 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 453 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 454 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 455 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 456 setOperationAction(ISD::VSETCC, MVT::v2f64, Expand); 457 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 458 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 459 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 460 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 461 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 462 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 463 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 464 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 465 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 466 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 467 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 468 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 469 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 470 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 471 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 472 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 473 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 474 475 setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand); 476 477 // Neon does not support some operations on v1i64 and v2i64 types. 478 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 479 // Custom handling for some quad-vector types to detect VMULL. 480 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 481 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 482 setOperationAction(ISD::MUL, MVT::v2i64, Custom); 483 // Custom handling for some vector types to avoid expensive expansions 484 setOperationAction(ISD::SDIV, MVT::v4i16, Custom); 485 setOperationAction(ISD::SDIV, MVT::v8i8, Custom); 486 setOperationAction(ISD::UDIV, MVT::v4i16, Custom); 487 setOperationAction(ISD::UDIV, MVT::v8i8, Custom); 488 setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); 489 setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); 490 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with 491 // a destination type that is wider than the source. 492 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); 493 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); 494 495 setTargetDAGCombine(ISD::INTRINSIC_VOID); 496 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 497 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 498 setTargetDAGCombine(ISD::SHL); 499 setTargetDAGCombine(ISD::SRL); 500 setTargetDAGCombine(ISD::SRA); 501 setTargetDAGCombine(ISD::SIGN_EXTEND); 502 setTargetDAGCombine(ISD::ZERO_EXTEND); 503 setTargetDAGCombine(ISD::ANY_EXTEND); 504 setTargetDAGCombine(ISD::SELECT_CC); 505 setTargetDAGCombine(ISD::BUILD_VECTOR); 506 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 507 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); 508 setTargetDAGCombine(ISD::STORE); 509 setTargetDAGCombine(ISD::FP_TO_SINT); 510 setTargetDAGCombine(ISD::FP_TO_UINT); 511 setTargetDAGCombine(ISD::FDIV); 512 } 513 514 computeRegisterProperties(); 515 516 // ARM does not have f32 extending load. 517 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 518 519 // ARM does not have i1 sign extending load. 520 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 521 522 // ARM supports all 4 flavors of integer indexed load / store. 523 if (!Subtarget->isThumb1Only()) { 524 for (unsigned im = (unsigned)ISD::PRE_INC; 525 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 526 setIndexedLoadAction(im, MVT::i1, Legal); 527 setIndexedLoadAction(im, MVT::i8, Legal); 528 setIndexedLoadAction(im, MVT::i16, Legal); 529 setIndexedLoadAction(im, MVT::i32, Legal); 530 setIndexedStoreAction(im, MVT::i1, Legal); 531 setIndexedStoreAction(im, MVT::i8, Legal); 532 setIndexedStoreAction(im, MVT::i16, Legal); 533 setIndexedStoreAction(im, MVT::i32, Legal); 534 } 535 } 536 537 // i64 operation support. 538 setOperationAction(ISD::MUL, MVT::i64, Expand); 539 setOperationAction(ISD::MULHU, MVT::i32, Expand); 540 if (Subtarget->isThumb1Only()) { 541 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 542 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 543 } 544 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() 545 || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP())) 546 setOperationAction(ISD::MULHS, MVT::i32, Expand); 547 548 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 549 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 550 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 551 setOperationAction(ISD::SRL, MVT::i64, Custom); 552 setOperationAction(ISD::SRA, MVT::i64, Custom); 553 554 // ARM does not have ROTL. 555 setOperationAction(ISD::ROTL, MVT::i32, Expand); 556 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 557 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 558 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 559 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 560 561 // Only ARMv6 has BSWAP. 562 if (!Subtarget->hasV6Ops()) 563 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 564 565 // These are expanded into libcalls. 566 if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) { 567 // v7M has a hardware divider 568 setOperationAction(ISD::SDIV, MVT::i32, Expand); 569 setOperationAction(ISD::UDIV, MVT::i32, Expand); 570 } 571 setOperationAction(ISD::SREM, MVT::i32, Expand); 572 setOperationAction(ISD::UREM, MVT::i32, Expand); 573 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 574 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 575 576 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 577 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 578 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 579 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 580 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 581 582 setOperationAction(ISD::TRAP, MVT::Other, Legal); 583 584 // Use the default implementation. 585 setOperationAction(ISD::VASTART, MVT::Other, Custom); 586 setOperationAction(ISD::VAARG, MVT::Other, Expand); 587 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 588 setOperationAction(ISD::VAEND, MVT::Other, Expand); 589 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 590 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 591 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 592 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 593 setExceptionPointerRegister(ARM::R0); 594 setExceptionSelectorRegister(ARM::R1); 595 596 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 597 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use 598 // the default expansion. 599 if (Subtarget->hasDataBarrier() || 600 (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { 601 // membarrier needs custom lowering; the rest are legal and handled 602 // normally. 603 setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); 604 } else { 605 // Set them all for expansion, which will force libcalls. 606 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 607 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand); 608 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand); 609 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); 610 setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand); 611 setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand); 612 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); 613 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand); 614 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand); 615 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); 616 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand); 617 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand); 618 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 619 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand); 620 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand); 621 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); 622 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand); 623 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand); 624 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); 625 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand); 626 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand); 627 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); 628 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand); 629 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand); 630 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); 631 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8, Expand); 632 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand); 633 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); 634 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8, Expand); 635 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand); 636 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); 637 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8, Expand); 638 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand); 639 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); 640 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8, Expand); 641 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand); 642 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); 643 // Since the libcalls include locking, fold in the fences 644 setShouldFoldAtomicFences(true); 645 } 646 // 64-bit versions are always libcalls (for now) 647 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand); 648 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand); 649 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand); 650 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand); 651 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand); 652 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand); 653 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand); 654 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand); 655 656 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 657 658 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. 659 if (!Subtarget->hasV6Ops()) { 660 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 661 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 662 } 663 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 664 665 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 666 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 667 // iff target supports vfp2. 668 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 669 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 670 } 671 672 // We want to custom lower some of our intrinsics. 673 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 674 if (Subtarget->isTargetDarwin()) { 675 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 676 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 677 setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom); 678 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); 679 } 680 681 setOperationAction(ISD::SETCC, MVT::i32, Expand); 682 setOperationAction(ISD::SETCC, MVT::f32, Expand); 683 setOperationAction(ISD::SETCC, MVT::f64, Expand); 684 setOperationAction(ISD::SELECT, MVT::i32, Custom); 685 setOperationAction(ISD::SELECT, MVT::f32, Custom); 686 setOperationAction(ISD::SELECT, MVT::f64, Custom); 687 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 688 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 689 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 690 691 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 692 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 693 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 694 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 695 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 696 697 // We don't support sin/cos/fmod/copysign/pow 698 setOperationAction(ISD::FSIN, MVT::f64, Expand); 699 setOperationAction(ISD::FSIN, MVT::f32, Expand); 700 setOperationAction(ISD::FCOS, MVT::f32, Expand); 701 setOperationAction(ISD::FCOS, MVT::f64, Expand); 702 setOperationAction(ISD::FREM, MVT::f64, Expand); 703 setOperationAction(ISD::FREM, MVT::f32, Expand); 704 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 705 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 706 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 707 } 708 setOperationAction(ISD::FPOW, MVT::f64, Expand); 709 setOperationAction(ISD::FPOW, MVT::f32, Expand); 710 711 setOperationAction(ISD::FMA, MVT::f64, Expand); 712 setOperationAction(ISD::FMA, MVT::f32, Expand); 713 714 // Various VFP goodness 715 if (!UseSoftFloat && !Subtarget->isThumb1Only()) { 716 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 717 if (Subtarget->hasVFP2()) { 718 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 719 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 720 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 721 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 722 } 723 // Special handling for half-precision FP. 724 if (!Subtarget->hasFP16()) { 725 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); 726 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); 727 } 728 } 729 730 // We have target-specific dag combine patterns for the following nodes: 731 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 732 setTargetDAGCombine(ISD::ADD); 733 setTargetDAGCombine(ISD::SUB); 734 setTargetDAGCombine(ISD::MUL); 735 736 if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) 737 setTargetDAGCombine(ISD::OR); 738 if (Subtarget->hasNEON()) 739 setTargetDAGCombine(ISD::AND); 740 741 setStackPointerRegisterToSaveRestore(ARM::SP); 742 743 if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) 744 setSchedulingPreference(Sched::RegPressure); 745 else 746 setSchedulingPreference(Sched::Hybrid); 747 748 //// temporary - rewrite interface to use type 749 maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1; 750 751 // On ARM arguments smaller than 4 bytes are extended, so all arguments 752 // are at least 4 bytes aligned. 753 setMinStackArgumentAlignment(4); 754 755 benefitFromCodePlacementOpt = true; 756 757 setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); 758 } 759 760 // FIXME: It might make sense to define the representative register class as the 761 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is 762 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, 763 // SPR's representative would be DPR_VFP2. This should work well if register 764 // pressure tracking were modified such that a register use would increment the 765 // pressure of the register class's representative and all of it's super 766 // classes' representatives transitively. We have not implemented this because 767 // of the difficulty prior to coalescing of modeling operand register classes 768 // due to the common occurrence of cross class copies and subregister insertions 769 // and extractions. 770 std::pair<const TargetRegisterClass*, uint8_t> 771 ARMTargetLowering::findRepresentativeClass(EVT VT) const{ 772 const TargetRegisterClass *RRC = 0; 773 uint8_t Cost = 1; 774 switch (VT.getSimpleVT().SimpleTy) { 775 default: 776 return TargetLowering::findRepresentativeClass(VT); 777 // Use DPR as representative register class for all floating point 778 // and vector types. Since there are 32 SPR registers and 32 DPR registers so 779 // the cost is 1 for both f32 and f64. 780 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: 781 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: 782 RRC = ARM::DPRRegisterClass; 783 // When NEON is used for SP, only half of the register file is available 784 // because operations that define both SP and DP results will be constrained 785 // to the VFP2 class (D0-D15). We currently model this constraint prior to 786 // coalescing by double-counting the SP regs. See the FIXME above. 787 if (Subtarget->useNEONForSinglePrecisionFP()) 788 Cost = 2; 789 break; 790 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 791 case MVT::v4f32: case MVT::v2f64: 792 RRC = ARM::DPRRegisterClass; 793 Cost = 2; 794 break; 795 case MVT::v4i64: 796 RRC = ARM::DPRRegisterClass; 797 Cost = 4; 798 break; 799 case MVT::v8i64: 800 RRC = ARM::DPRRegisterClass; 801 Cost = 8; 802 break; 803 } 804 return std::make_pair(RRC, Cost); 805 } 806 807 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 808 switch (Opcode) { 809 default: return 0; 810 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 811 case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN"; 812 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; 813 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 814 case ARMISD::CALL: return "ARMISD::CALL"; 815 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 816 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 817 case ARMISD::tCALL: return "ARMISD::tCALL"; 818 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 819 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 820 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 821 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 822 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 823 case ARMISD::CMP: return "ARMISD::CMP"; 824 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 825 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 826 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 827 case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; 828 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 829 case ARMISD::CMOV: return "ARMISD::CMOV"; 830 831 case ARMISD::RBIT: return "ARMISD::RBIT"; 832 833 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 834 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 835 case ARMISD::SITOF: return "ARMISD::SITOF"; 836 case ARMISD::UITOF: return "ARMISD::UITOF"; 837 838 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 839 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 840 case ARMISD::RRX: return "ARMISD::RRX"; 841 842 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 843 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 844 845 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 846 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 847 case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP"; 848 849 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; 850 851 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 852 853 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 854 855 case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; 856 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; 857 858 case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; 859 860 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 861 case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; 862 case ARMISD::VCGE: return "ARMISD::VCGE"; 863 case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; 864 case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; 865 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 866 case ARMISD::VCGT: return "ARMISD::VCGT"; 867 case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; 868 case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; 869 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 870 case ARMISD::VTST: return "ARMISD::VTST"; 871 872 case ARMISD::VSHL: return "ARMISD::VSHL"; 873 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 874 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 875 case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; 876 case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; 877 case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; 878 case ARMISD::VSHRN: return "ARMISD::VSHRN"; 879 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 880 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 881 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 882 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 883 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 884 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 885 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 886 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 887 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 888 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 889 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 890 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 891 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 892 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 893 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; 894 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; 895 case ARMISD::VDUP: return "ARMISD::VDUP"; 896 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 897 case ARMISD::VEXT: return "ARMISD::VEXT"; 898 case ARMISD::VREV64: return "ARMISD::VREV64"; 899 case ARMISD::VREV32: return "ARMISD::VREV32"; 900 case ARMISD::VREV16: return "ARMISD::VREV16"; 901 case ARMISD::VZIP: return "ARMISD::VZIP"; 902 case ARMISD::VUZP: return "ARMISD::VUZP"; 903 case ARMISD::VTRN: return "ARMISD::VTRN"; 904 case ARMISD::VTBL1: return "ARMISD::VTBL1"; 905 case ARMISD::VTBL2: return "ARMISD::VTBL2"; 906 case ARMISD::VMULLs: return "ARMISD::VMULLs"; 907 case ARMISD::VMULLu: return "ARMISD::VMULLu"; 908 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; 909 case ARMISD::FMAX: return "ARMISD::FMAX"; 910 case ARMISD::FMIN: return "ARMISD::FMIN"; 911 case ARMISD::BFI: return "ARMISD::BFI"; 912 case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; 913 case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; 914 case ARMISD::VBSL: return "ARMISD::VBSL"; 915 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; 916 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; 917 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; 918 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD"; 919 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD"; 920 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD"; 921 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD"; 922 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; 923 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; 924 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; 925 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; 926 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; 927 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; 928 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD"; 929 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD"; 930 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD"; 931 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD"; 932 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; 933 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; 934 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; 935 } 936 } 937 938 /// getRegClassFor - Return the register class that should be used for the 939 /// specified value type. 940 TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { 941 // Map v4i64 to QQ registers but do not make the type legal. Similarly map 942 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to 943 // load / store 4 to 8 consecutive D registers. 944 if (Subtarget->hasNEON()) { 945 if (VT == MVT::v4i64) 946 return ARM::QQPRRegisterClass; 947 else if (VT == MVT::v8i64) 948 return ARM::QQQQPRRegisterClass; 949 } 950 return TargetLowering::getRegClassFor(VT); 951 } 952 953 // Create a fast isel object. 954 FastISel * 955 ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { 956 return ARM::createFastISel(funcInfo); 957 } 958 959 /// getMaximalGlobalOffset - Returns the maximal possible offset which can 960 /// be used for loads / stores from the global. 961 unsigned ARMTargetLowering::getMaximalGlobalOffset() const { 962 return (Subtarget->isThumb1Only() ? 127 : 4095); 963 } 964 965 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { 966 unsigned NumVals = N->getNumValues(); 967 if (!NumVals) 968 return Sched::RegPressure; 969 970 for (unsigned i = 0; i != NumVals; ++i) { 971 EVT VT = N->getValueType(i); 972 if (VT == MVT::Glue || VT == MVT::Other) 973 continue; 974 if (VT.isFloatingPoint() || VT.isVector()) 975 return Sched::Latency; 976 } 977 978 if (!N->isMachineOpcode()) 979 return Sched::RegPressure; 980 981 // Load are scheduled for latency even if there instruction itinerary 982 // is not available. 983 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 984 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 985 986 if (MCID.getNumDefs() == 0) 987 return Sched::RegPressure; 988 if (!Itins->isEmpty() && 989 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) 990 return Sched::Latency; 991 992 return Sched::RegPressure; 993 } 994 995 //===----------------------------------------------------------------------===// 996 // Lowering Code 997 //===----------------------------------------------------------------------===// 998 999 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 1000 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 1001 switch (CC) { 1002 default: llvm_unreachable("Unknown condition code!"); 1003 case ISD::SETNE: return ARMCC::NE; 1004 case ISD::SETEQ: return ARMCC::EQ; 1005 case ISD::SETGT: return ARMCC::GT; 1006 case ISD::SETGE: return ARMCC::GE; 1007 case ISD::SETLT: return ARMCC::LT; 1008 case ISD::SETLE: return ARMCC::LE; 1009 case ISD::SETUGT: return ARMCC::HI; 1010 case ISD::SETUGE: return ARMCC::HS; 1011 case ISD::SETULT: return ARMCC::LO; 1012 case ISD::SETULE: return ARMCC::LS; 1013 } 1014 } 1015 1016 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 1017 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 1018 ARMCC::CondCodes &CondCode2) { 1019 CondCode2 = ARMCC::AL; 1020 switch (CC) { 1021 default: llvm_unreachable("Unknown FP condition!"); 1022 case ISD::SETEQ: 1023 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 1024 case ISD::SETGT: 1025 case ISD::SETOGT: CondCode = ARMCC::GT; break; 1026 case ISD::SETGE: 1027 case ISD::SETOGE: CondCode = ARMCC::GE; break; 1028 case ISD::SETOLT: CondCode = ARMCC::MI; break; 1029 case ISD::SETOLE: CondCode = ARMCC::LS; break; 1030 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 1031 case ISD::SETO: CondCode = ARMCC::VC; break; 1032 case ISD::SETUO: CondCode = ARMCC::VS; break; 1033 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 1034 case ISD::SETUGT: CondCode = ARMCC::HI; break; 1035 case ISD::SETUGE: CondCode = ARMCC::PL; break; 1036 case ISD::SETLT: 1037 case ISD::SETULT: CondCode = ARMCC::LT; break; 1038 case ISD::SETLE: 1039 case ISD::SETULE: CondCode = ARMCC::LE; break; 1040 case ISD::SETNE: 1041 case ISD::SETUNE: CondCode = ARMCC::NE; break; 1042 } 1043 } 1044 1045 //===----------------------------------------------------------------------===// 1046 // Calling Convention Implementation 1047 //===----------------------------------------------------------------------===// 1048 1049 #include "ARMGenCallingConv.inc" 1050 1051 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the 1052 /// given CallingConvention value. 1053 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 1054 bool Return, 1055 bool isVarArg) const { 1056 switch (CC) { 1057 default: 1058 llvm_unreachable("Unsupported calling convention"); 1059 case CallingConv::Fast: 1060 if (Subtarget->hasVFP2() && !isVarArg) { 1061 if (!Subtarget->isAAPCS_ABI()) 1062 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); 1063 // For AAPCS ABI targets, just use VFP variant of the calling convention. 1064 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1065 } 1066 // Fallthrough 1067 case CallingConv::C: { 1068 // Use target triple & subtarget features to do actual dispatch. 1069 if (!Subtarget->isAAPCS_ABI()) 1070 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1071 else if (Subtarget->hasVFP2() && 1072 FloatABIType == FloatABI::Hard && !isVarArg) 1073 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1074 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1075 } 1076 case CallingConv::ARM_AAPCS_VFP: 1077 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1078 case CallingConv::ARM_AAPCS: 1079 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1080 case CallingConv::ARM_APCS: 1081 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1082 } 1083 } 1084 1085 /// LowerCallResult - Lower the result values of a call into the 1086 /// appropriate copies out of appropriate physical registers. 1087 SDValue 1088 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 1089 CallingConv::ID CallConv, bool isVarArg, 1090 const SmallVectorImpl<ISD::InputArg> &Ins, 1091 DebugLoc dl, SelectionDAG &DAG, 1092 SmallVectorImpl<SDValue> &InVals) const { 1093 1094 // Assign locations to each value returned by this call. 1095 SmallVector<CCValAssign, 16> RVLocs; 1096 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1097 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 1098 CCInfo.AnalyzeCallResult(Ins, 1099 CCAssignFnForNode(CallConv, /* Return*/ true, 1100 isVarArg)); 1101 1102 // Copy all of the result registers out of their specified physreg. 1103 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1104 CCValAssign VA = RVLocs[i]; 1105 1106 SDValue Val; 1107 if (VA.needsCustom()) { 1108 // Handle f64 or half of a v2f64. 1109 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1110 InFlag); 1111 Chain = Lo.getValue(1); 1112 InFlag = Lo.getValue(2); 1113 VA = RVLocs[++i]; // skip ahead to next loc 1114 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1115 InFlag); 1116 Chain = Hi.getValue(1); 1117 InFlag = Hi.getValue(2); 1118 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1119 1120 if (VA.getLocVT() == MVT::v2f64) { 1121 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 1122 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1123 DAG.getConstant(0, MVT::i32)); 1124 1125 VA = RVLocs[++i]; // skip ahead to next loc 1126 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1127 Chain = Lo.getValue(1); 1128 InFlag = Lo.getValue(2); 1129 VA = RVLocs[++i]; // skip ahead to next loc 1130 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1131 Chain = Hi.getValue(1); 1132 InFlag = Hi.getValue(2); 1133 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1134 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1135 DAG.getConstant(1, MVT::i32)); 1136 } 1137 } else { 1138 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 1139 InFlag); 1140 Chain = Val.getValue(1); 1141 InFlag = Val.getValue(2); 1142 } 1143 1144 switch (VA.getLocInfo()) { 1145 default: llvm_unreachable("Unknown loc info!"); 1146 case CCValAssign::Full: break; 1147 case CCValAssign::BCvt: 1148 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); 1149 break; 1150 } 1151 1152 InVals.push_back(Val); 1153 } 1154 1155 return Chain; 1156 } 1157 1158 /// LowerMemOpCallTo - Store the argument to the stack. 1159 SDValue 1160 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 1161 SDValue StackPtr, SDValue Arg, 1162 DebugLoc dl, SelectionDAG &DAG, 1163 const CCValAssign &VA, 1164 ISD::ArgFlagsTy Flags) const { 1165 unsigned LocMemOffset = VA.getLocMemOffset(); 1166 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1167 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 1168 return DAG.getStore(Chain, dl, Arg, PtrOff, 1169 MachinePointerInfo::getStack(LocMemOffset), 1170 false, false, 0); 1171 } 1172 1173 void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, 1174 SDValue Chain, SDValue &Arg, 1175 RegsToPassVector &RegsToPass, 1176 CCValAssign &VA, CCValAssign &NextVA, 1177 SDValue &StackPtr, 1178 SmallVector<SDValue, 8> &MemOpChains, 1179 ISD::ArgFlagsTy Flags) const { 1180 1181 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1182 DAG.getVTList(MVT::i32, MVT::i32), Arg); 1183 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); 1184 1185 if (NextVA.isRegLoc()) 1186 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); 1187 else { 1188 assert(NextVA.isMemLoc()); 1189 if (StackPtr.getNode() == 0) 1190 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1191 1192 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), 1193 dl, DAG, NextVA, 1194 Flags)); 1195 } 1196 } 1197 1198 /// LowerCall - Lowering a call into a callseq_start <- 1199 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 1200 /// nodes. 1201 SDValue 1202 ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 1203 CallingConv::ID CallConv, bool isVarArg, 1204 bool &isTailCall, 1205 const SmallVectorImpl<ISD::OutputArg> &Outs, 1206 const SmallVectorImpl<SDValue> &OutVals, 1207 const SmallVectorImpl<ISD::InputArg> &Ins, 1208 DebugLoc dl, SelectionDAG &DAG, 1209 SmallVectorImpl<SDValue> &InVals) const { 1210 MachineFunction &MF = DAG.getMachineFunction(); 1211 bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); 1212 bool IsSibCall = false; 1213 // Temporarily disable tail calls so things don't break. 1214 if (!EnableARMTailCalls) 1215 isTailCall = false; 1216 if (isTailCall) { 1217 // Check if it's really possible to do a tail call. 1218 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 1219 isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), 1220 Outs, OutVals, Ins, DAG); 1221 // We don't support GuaranteedTailCallOpt for ARM, only automatically 1222 // detected sibcalls. 1223 if (isTailCall) { 1224 ++NumTailCalls; 1225 IsSibCall = true; 1226 } 1227 } 1228 1229 // Analyze operands of the call, assigning locations to each operand. 1230 SmallVector<CCValAssign, 16> ArgLocs; 1231 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1232 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 1233 CCInfo.AnalyzeCallOperands(Outs, 1234 CCAssignFnForNode(CallConv, /* Return*/ false, 1235 isVarArg)); 1236 1237 // Get a count of how many bytes are to be pushed on the stack. 1238 unsigned NumBytes = CCInfo.getNextStackOffset(); 1239 1240 // For tail calls, memory operands are available in our caller's stack. 1241 if (IsSibCall) 1242 NumBytes = 0; 1243 1244 // Adjust the stack pointer for the new arguments... 1245 // These operations are automatically eliminated by the prolog/epilog pass 1246 if (!IsSibCall) 1247 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 1248 1249 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1250 1251 RegsToPassVector RegsToPass; 1252 SmallVector<SDValue, 8> MemOpChains; 1253 1254 // Walk the register/memloc assignments, inserting copies/loads. In the case 1255 // of tail call optimization, arguments are handled later. 1256 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1257 i != e; 1258 ++i, ++realArgIdx) { 1259 CCValAssign &VA = ArgLocs[i]; 1260 SDValue Arg = OutVals[realArgIdx]; 1261 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1262 bool isByVal = Flags.isByVal(); 1263 1264 // Promote the value if needed. 1265 switch (VA.getLocInfo()) { 1266 default: llvm_unreachable("Unknown loc info!"); 1267 case CCValAssign::Full: break; 1268 case CCValAssign::SExt: 1269 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 1270 break; 1271 case CCValAssign::ZExt: 1272 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 1273 break; 1274 case CCValAssign::AExt: 1275 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 1276 break; 1277 case CCValAssign::BCvt: 1278 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1279 break; 1280 } 1281 1282 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 1283 if (VA.needsCustom()) { 1284 if (VA.getLocVT() == MVT::v2f64) { 1285 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1286 DAG.getConstant(0, MVT::i32)); 1287 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1288 DAG.getConstant(1, MVT::i32)); 1289 1290 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 1291 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1292 1293 VA = ArgLocs[++i]; // skip ahead to next loc 1294 if (VA.isRegLoc()) { 1295 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 1296 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1297 } else { 1298 assert(VA.isMemLoc()); 1299 1300 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 1301 dl, DAG, VA, Flags)); 1302 } 1303 } else { 1304 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 1305 StackPtr, MemOpChains, Flags); 1306 } 1307 } else if (VA.isRegLoc()) { 1308 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1309 } else if (isByVal) { 1310 assert(VA.isMemLoc()); 1311 unsigned offset = 0; 1312 1313 // True if this byval aggregate will be split between registers 1314 // and memory. 1315 if (CCInfo.isFirstByValRegValid()) { 1316 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1317 unsigned int i, j; 1318 for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) { 1319 SDValue Const = DAG.getConstant(4*i, MVT::i32); 1320 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 1321 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 1322 MachinePointerInfo(), 1323 false, false, 0); 1324 MemOpChains.push_back(Load.getValue(1)); 1325 RegsToPass.push_back(std::make_pair(j, Load)); 1326 } 1327 offset = ARM::R4 - CCInfo.getFirstByValReg(); 1328 CCInfo.clearFirstByValReg(); 1329 } 1330 1331 unsigned LocMemOffset = VA.getLocMemOffset(); 1332 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); 1333 SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, 1334 StkPtrOff); 1335 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset); 1336 SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); 1337 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, 1338 MVT::i32); 1339 MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, 1340 Flags.getByValAlign(), 1341 /*isVolatile=*/false, 1342 /*AlwaysInline=*/false, 1343 MachinePointerInfo(0), 1344 MachinePointerInfo(0))); 1345 1346 } else if (!IsSibCall) { 1347 assert(VA.isMemLoc()); 1348 1349 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 1350 dl, DAG, VA, Flags)); 1351 } 1352 } 1353 1354 if (!MemOpChains.empty()) 1355 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1356 &MemOpChains[0], MemOpChains.size()); 1357 1358 // Build a sequence of copy-to-reg nodes chained together with token chain 1359 // and flag operands which copy the outgoing args into the appropriate regs. 1360 SDValue InFlag; 1361 // Tail call byval lowering might overwrite argument registers so in case of 1362 // tail call optimization the copies to registers are lowered later. 1363 if (!isTailCall) 1364 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1365 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1366 RegsToPass[i].second, InFlag); 1367 InFlag = Chain.getValue(1); 1368 } 1369 1370 // For tail calls lower the arguments to the 'real' stack slot. 1371 if (isTailCall) { 1372 // Force all the incoming stack arguments to be loaded from the stack 1373 // before any new outgoing arguments are stored to the stack, because the 1374 // outgoing stack slots may alias the incoming argument stack slots, and 1375 // the alias isn't otherwise explicit. This is slightly more conservative 1376 // than necessary, because it means that each store effectively depends 1377 // on every argument instead of just those arguments it would clobber. 1378 1379 // Do not flag preceding copytoreg stuff together with the following stuff. 1380 InFlag = SDValue(); 1381 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1382 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1383 RegsToPass[i].second, InFlag); 1384 InFlag = Chain.getValue(1); 1385 } 1386 InFlag =SDValue(); 1387 } 1388 1389 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1390 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1391 // node so that legalize doesn't hack it. 1392 bool isDirect = false; 1393 bool isARMFunc = false; 1394 bool isLocalARMFunc = false; 1395 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1396 1397 if (EnableARMLongCalls) { 1398 assert (getTargetMachine().getRelocationModel() == Reloc::Static 1399 && "long-calls with non-static relocation model!"); 1400 // Handle a global address or an external symbol. If it's not one of 1401 // those, the target's already in a register, so we don't need to do 1402 // anything extra. 1403 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1404 const GlobalValue *GV = G->getGlobal(); 1405 // Create a constant pool entry for the callee address 1406 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1407 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, 1408 ARMPCLabelIndex, 1409 ARMCP::CPValue, 0); 1410 // Get the address of the callee into a register 1411 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1412 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1413 Callee = DAG.getLoad(getPointerTy(), dl, 1414 DAG.getEntryNode(), CPAddr, 1415 MachinePointerInfo::getConstantPool(), 1416 false, false, 0); 1417 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { 1418 const char *Sym = S->getSymbol(); 1419 1420 // Create a constant pool entry for the callee address 1421 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1422 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1423 Sym, ARMPCLabelIndex, 0); 1424 // Get the address of the callee into a register 1425 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1426 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1427 Callee = DAG.getLoad(getPointerTy(), dl, 1428 DAG.getEntryNode(), CPAddr, 1429 MachinePointerInfo::getConstantPool(), 1430 false, false, 0); 1431 } 1432 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1433 const GlobalValue *GV = G->getGlobal(); 1434 isDirect = true; 1435 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 1436 bool isStub = (isExt && Subtarget->isTargetDarwin()) && 1437 getTargetMachine().getRelocationModel() != Reloc::Static; 1438 isARMFunc = !Subtarget->isThumb() || isStub; 1439 // ARM call to a local ARM function is predicable. 1440 isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); 1441 // tBX takes a register source operand. 1442 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1443 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1444 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, 1445 ARMPCLabelIndex, 1446 ARMCP::CPValue, 4); 1447 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1448 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1449 Callee = DAG.getLoad(getPointerTy(), dl, 1450 DAG.getEntryNode(), CPAddr, 1451 MachinePointerInfo::getConstantPool(), 1452 false, false, 0); 1453 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1454 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1455 getPointerTy(), Callee, PICLabel); 1456 } else { 1457 // On ELF targets for PIC code, direct calls should go through the PLT 1458 unsigned OpFlags = 0; 1459 if (Subtarget->isTargetELF() && 1460 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1461 OpFlags = ARMII::MO_PLT; 1462 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); 1463 } 1464 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1465 isDirect = true; 1466 bool isStub = Subtarget->isTargetDarwin() && 1467 getTargetMachine().getRelocationModel() != Reloc::Static; 1468 isARMFunc = !Subtarget->isThumb() || isStub; 1469 // tBX takes a register source operand. 1470 const char *Sym = S->getSymbol(); 1471 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1472 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1473 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1474 Sym, ARMPCLabelIndex, 4); 1475 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1476 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1477 Callee = DAG.getLoad(getPointerTy(), dl, 1478 DAG.getEntryNode(), CPAddr, 1479 MachinePointerInfo::getConstantPool(), 1480 false, false, 0); 1481 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1482 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1483 getPointerTy(), Callee, PICLabel); 1484 } else { 1485 unsigned OpFlags = 0; 1486 // On ELF targets for PIC code, direct calls should go through the PLT 1487 if (Subtarget->isTargetELF() && 1488 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1489 OpFlags = ARMII::MO_PLT; 1490 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags); 1491 } 1492 } 1493 1494 // FIXME: handle tail calls differently. 1495 unsigned CallOpc; 1496 if (Subtarget->isThumb()) { 1497 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 1498 CallOpc = ARMISD::CALL_NOLINK; 1499 else 1500 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 1501 } else { 1502 CallOpc = (isDirect || Subtarget->hasV5TOps()) 1503 ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) 1504 : ARMISD::CALL_NOLINK; 1505 } 1506 1507 std::vector<SDValue> Ops; 1508 Ops.push_back(Chain); 1509 Ops.push_back(Callee); 1510 1511 // Add argument registers to the end of the list so that they are known live 1512 // into the call. 1513 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1514 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1515 RegsToPass[i].second.getValueType())); 1516 1517 if (InFlag.getNode()) 1518 Ops.push_back(InFlag); 1519 1520 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1521 if (isTailCall) 1522 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); 1523 1524 // Returns a chain and a flag for retval copy to use. 1525 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 1526 InFlag = Chain.getValue(1); 1527 1528 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 1529 DAG.getIntPtrConstant(0, true), InFlag); 1530 if (!Ins.empty()) 1531 InFlag = Chain.getValue(1); 1532 1533 // Handle result values, copying them out of physregs into vregs that we 1534 // return. 1535 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, 1536 dl, DAG, InVals); 1537 } 1538 1539 /// HandleByVal - Every parameter *after* a byval parameter is passed 1540 /// on the stack. Remember the next parameter register to allocate, 1541 /// and then confiscate the rest of the parameter registers to insure 1542 /// this. 1543 void 1544 llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { 1545 unsigned reg = State->AllocateReg(GPRArgRegs, 4); 1546 assert((State->getCallOrPrologue() == Prologue || 1547 State->getCallOrPrologue() == Call) && 1548 "unhandled ParmContext"); 1549 if ((!State->isFirstByValRegValid()) && 1550 (ARM::R0 <= reg) && (reg <= ARM::R3)) { 1551 State->setFirstByValReg(reg); 1552 // At a call site, a byval parameter that is split between 1553 // registers and memory needs its size truncated here. In a 1554 // function prologue, such byval parameters are reassembled in 1555 // memory, and are not truncated. 1556 if (State->getCallOrPrologue() == Call) { 1557 unsigned excess = 4 * (ARM::R4 - reg); 1558 assert(size >= excess && "expected larger existing stack allocation"); 1559 size -= excess; 1560 } 1561 } 1562 // Confiscate any remaining parameter registers to preclude their 1563 // assignment to subsequent parameters. 1564 while (State->AllocateReg(GPRArgRegs, 4)) 1565 ; 1566 } 1567 1568 /// MatchingStackOffset - Return true if the given stack call argument is 1569 /// already available in the same position (relatively) of the caller's 1570 /// incoming argument stack. 1571 static 1572 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 1573 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 1574 const ARMInstrInfo *TII) { 1575 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 1576 int FI = INT_MAX; 1577 if (Arg.getOpcode() == ISD::CopyFromReg) { 1578 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 1579 if (!TargetRegisterInfo::isVirtualRegister(VR)) 1580 return false; 1581 MachineInstr *Def = MRI->getVRegDef(VR); 1582 if (!Def) 1583 return false; 1584 if (!Flags.isByVal()) { 1585 if (!TII->isLoadFromStackSlot(Def, FI)) 1586 return false; 1587 } else { 1588 return false; 1589 } 1590 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 1591 if (Flags.isByVal()) 1592 // ByVal argument is passed in as a pointer but it's now being 1593 // dereferenced. e.g. 1594 // define @foo(%struct.X* %A) { 1595 // tail call @bar(%struct.X* byval %A) 1596 // } 1597 return false; 1598 SDValue Ptr = Ld->getBasePtr(); 1599 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 1600 if (!FINode) 1601 return false; 1602 FI = FINode->getIndex(); 1603 } else 1604 return false; 1605 1606 assert(FI != INT_MAX); 1607 if (!MFI->isFixedObjectIndex(FI)) 1608 return false; 1609 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 1610 } 1611 1612 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 1613 /// for tail call optimization. Targets which want to do tail call 1614 /// optimization should implement this function. 1615 bool 1616 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 1617 CallingConv::ID CalleeCC, 1618 bool isVarArg, 1619 bool isCalleeStructRet, 1620 bool isCallerStructRet, 1621 const SmallVectorImpl<ISD::OutputArg> &Outs, 1622 const SmallVectorImpl<SDValue> &OutVals, 1623 const SmallVectorImpl<ISD::InputArg> &Ins, 1624 SelectionDAG& DAG) const { 1625 const Function *CallerF = DAG.getMachineFunction().getFunction(); 1626 CallingConv::ID CallerCC = CallerF->getCallingConv(); 1627 bool CCMatch = CallerCC == CalleeCC; 1628 1629 // Look for obvious safe cases to perform tail call optimization that do not 1630 // require ABI changes. This is what gcc calls sibcall. 1631 1632 // Do not sibcall optimize vararg calls unless the call site is not passing 1633 // any arguments. 1634 if (isVarArg && !Outs.empty()) 1635 return false; 1636 1637 // Also avoid sibcall optimization if either caller or callee uses struct 1638 // return semantics. 1639 if (isCalleeStructRet || isCallerStructRet) 1640 return false; 1641 1642 // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: 1643 // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as 1644 // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation 1645 // support in the assembler and linker to be used. This would need to be 1646 // fixed to fully support tail calls in Thumb1. 1647 // 1648 // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take 1649 // LR. This means if we need to reload LR, it takes an extra instructions, 1650 // which outweighs the value of the tail call; but here we don't know yet 1651 // whether LR is going to be used. Probably the right approach is to 1652 // generate the tail call here and turn it back into CALL/RET in 1653 // emitEpilogue if LR is used. 1654 1655 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 1656 // but we need to make sure there are enough registers; the only valid 1657 // registers are the 4 used for parameters. We don't currently do this 1658 // case. 1659 if (Subtarget->isThumb1Only()) 1660 return false; 1661 1662 // If the calling conventions do not match, then we'd better make sure the 1663 // results are returned in the same way as what the caller expects. 1664 if (!CCMatch) { 1665 SmallVector<CCValAssign, 16> RVLocs1; 1666 ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), 1667 getTargetMachine(), RVLocs1, *DAG.getContext(), Call); 1668 CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); 1669 1670 SmallVector<CCValAssign, 16> RVLocs2; 1671 ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), 1672 getTargetMachine(), RVLocs2, *DAG.getContext(), Call); 1673 CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); 1674 1675 if (RVLocs1.size() != RVLocs2.size()) 1676 return false; 1677 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { 1678 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) 1679 return false; 1680 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) 1681 return false; 1682 if (RVLocs1[i].isRegLoc()) { 1683 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) 1684 return false; 1685 } else { 1686 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) 1687 return false; 1688 } 1689 } 1690 } 1691 1692 // If the callee takes no arguments then go on to check the results of the 1693 // call. 1694 if (!Outs.empty()) { 1695 // Check if stack adjustment is needed. For now, do not do this if any 1696 // argument is passed on the stack. 1697 SmallVector<CCValAssign, 16> ArgLocs; 1698 ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), 1699 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 1700 CCInfo.AnalyzeCallOperands(Outs, 1701 CCAssignFnForNode(CalleeCC, false, isVarArg)); 1702 if (CCInfo.getNextStackOffset()) { 1703 MachineFunction &MF = DAG.getMachineFunction(); 1704 1705 // Check if the arguments are already laid out in the right way as 1706 // the caller's fixed stack objects. 1707 MachineFrameInfo *MFI = MF.getFrameInfo(); 1708 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 1709 const ARMInstrInfo *TII = 1710 ((ARMTargetMachine&)getTargetMachine()).getInstrInfo(); 1711 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1712 i != e; 1713 ++i, ++realArgIdx) { 1714 CCValAssign &VA = ArgLocs[i]; 1715 EVT RegVT = VA.getLocVT(); 1716 SDValue Arg = OutVals[realArgIdx]; 1717 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1718 if (VA.getLocInfo() == CCValAssign::Indirect) 1719 return false; 1720 if (VA.needsCustom()) { 1721 // f64 and vector types are split into multiple registers or 1722 // register/stack-slot combinations. The types will not match 1723 // the registers; give up on memory f64 refs until we figure 1724 // out what to do about this. 1725 if (!VA.isRegLoc()) 1726 return false; 1727 if (!ArgLocs[++i].isRegLoc()) 1728 return false; 1729 if (RegVT == MVT::v2f64) { 1730 if (!ArgLocs[++i].isRegLoc()) 1731 return false; 1732 if (!ArgLocs[++i].isRegLoc()) 1733 return false; 1734 } 1735 } else if (!VA.isRegLoc()) { 1736 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 1737 MFI, MRI, TII)) 1738 return false; 1739 } 1740 } 1741 } 1742 } 1743 1744 return true; 1745 } 1746 1747 SDValue 1748 ARMTargetLowering::LowerReturn(SDValue Chain, 1749 CallingConv::ID CallConv, bool isVarArg, 1750 const SmallVectorImpl<ISD::OutputArg> &Outs, 1751 const SmallVectorImpl<SDValue> &OutVals, 1752 DebugLoc dl, SelectionDAG &DAG) const { 1753 1754 // CCValAssign - represent the assignment of the return value to a location. 1755 SmallVector<CCValAssign, 16> RVLocs; 1756 1757 // CCState - Info about the registers and stack slots. 1758 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1759 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 1760 1761 // Analyze outgoing return values. 1762 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 1763 isVarArg)); 1764 1765 // If this is the first return lowered for this function, add 1766 // the regs to the liveout set for the function. 1767 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1768 for (unsigned i = 0; i != RVLocs.size(); ++i) 1769 if (RVLocs[i].isRegLoc()) 1770 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1771 } 1772 1773 SDValue Flag; 1774 1775 // Copy the result values into the output registers. 1776 for (unsigned i = 0, realRVLocIdx = 0; 1777 i != RVLocs.size(); 1778 ++i, ++realRVLocIdx) { 1779 CCValAssign &VA = RVLocs[i]; 1780 assert(VA.isRegLoc() && "Can only return in registers!"); 1781 1782 SDValue Arg = OutVals[realRVLocIdx]; 1783 1784 switch (VA.getLocInfo()) { 1785 default: llvm_unreachable("Unknown loc info!"); 1786 case CCValAssign::Full: break; 1787 case CCValAssign::BCvt: 1788 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1789 break; 1790 } 1791 1792 if (VA.needsCustom()) { 1793 if (VA.getLocVT() == MVT::v2f64) { 1794 // Extract the first half and return it in two registers. 1795 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1796 DAG.getConstant(0, MVT::i32)); 1797 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 1798 DAG.getVTList(MVT::i32, MVT::i32), Half); 1799 1800 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); 1801 Flag = Chain.getValue(1); 1802 VA = RVLocs[++i]; // skip ahead to next loc 1803 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 1804 HalfGPRs.getValue(1), Flag); 1805 Flag = Chain.getValue(1); 1806 VA = RVLocs[++i]; // skip ahead to next loc 1807 1808 // Extract the 2nd half and fall through to handle it as an f64 value. 1809 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1810 DAG.getConstant(1, MVT::i32)); 1811 } 1812 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 1813 // available. 1814 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1815 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); 1816 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); 1817 Flag = Chain.getValue(1); 1818 VA = RVLocs[++i]; // skip ahead to next loc 1819 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), 1820 Flag); 1821 } else 1822 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 1823 1824 // Guarantee that all emitted copies are 1825 // stuck together, avoiding something bad. 1826 Flag = Chain.getValue(1); 1827 } 1828 1829 SDValue result; 1830 if (Flag.getNode()) 1831 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 1832 else // Return Void 1833 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); 1834 1835 return result; 1836 } 1837 1838 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { 1839 if (N->getNumValues() != 1) 1840 return false; 1841 if (!N->hasNUsesOfValue(1, 0)) 1842 return false; 1843 1844 unsigned NumCopies = 0; 1845 SDNode* Copies[2]; 1846 SDNode *Use = *N->use_begin(); 1847 if (Use->getOpcode() == ISD::CopyToReg) { 1848 Copies[NumCopies++] = Use; 1849 } else if (Use->getOpcode() == ARMISD::VMOVRRD) { 1850 // f64 returned in a pair of GPRs. 1851 for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end(); 1852 UI != UE; ++UI) { 1853 if (UI->getOpcode() != ISD::CopyToReg) 1854 return false; 1855 Copies[UI.getUse().getResNo()] = *UI; 1856 ++NumCopies; 1857 } 1858 } else if (Use->getOpcode() == ISD::BITCAST) { 1859 // f32 returned in a single GPR. 1860 if (!Use->hasNUsesOfValue(1, 0)) 1861 return false; 1862 Use = *Use->use_begin(); 1863 if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0)) 1864 return false; 1865 Copies[NumCopies++] = Use; 1866 } else { 1867 return false; 1868 } 1869 1870 if (NumCopies != 1 && NumCopies != 2) 1871 return false; 1872 1873 bool HasRet = false; 1874 for (unsigned i = 0; i < NumCopies; ++i) { 1875 SDNode *Copy = Copies[i]; 1876 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); 1877 UI != UE; ++UI) { 1878 if (UI->getOpcode() == ISD::CopyToReg) { 1879 SDNode *Use = *UI; 1880 if (Use == Copies[0] || Use == Copies[1]) 1881 continue; 1882 return false; 1883 } 1884 if (UI->getOpcode() != ARMISD::RET_FLAG) 1885 return false; 1886 HasRet = true; 1887 } 1888 } 1889 1890 return HasRet; 1891 } 1892 1893 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { 1894 if (!EnableARMTailCalls) 1895 return false; 1896 1897 if (!CI->isTailCall()) 1898 return false; 1899 1900 return !Subtarget->isThumb1Only(); 1901 } 1902 1903 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 1904 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 1905 // one of the above mentioned nodes. It has to be wrapped because otherwise 1906 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 1907 // be used to form addressing mode. These wrapped nodes will be selected 1908 // into MOVi. 1909 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 1910 EVT PtrVT = Op.getValueType(); 1911 // FIXME there is no actual debug info here 1912 DebugLoc dl = Op.getDebugLoc(); 1913 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1914 SDValue Res; 1915 if (CP->isMachineConstantPoolEntry()) 1916 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 1917 CP->getAlignment()); 1918 else 1919 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 1920 CP->getAlignment()); 1921 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 1922 } 1923 1924 unsigned ARMTargetLowering::getJumpTableEncoding() const { 1925 return MachineJumpTableInfo::EK_Inline; 1926 } 1927 1928 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, 1929 SelectionDAG &DAG) const { 1930 MachineFunction &MF = DAG.getMachineFunction(); 1931 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1932 unsigned ARMPCLabelIndex = 0; 1933 DebugLoc DL = Op.getDebugLoc(); 1934 EVT PtrVT = getPointerTy(); 1935 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 1936 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1937 SDValue CPAddr; 1938 if (RelocM == Reloc::Static) { 1939 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 1940 } else { 1941 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 1942 ARMPCLabelIndex = AFI->createPICLabelUId(); 1943 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex, 1944 ARMCP::CPBlockAddress, 1945 PCAdj); 1946 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1947 } 1948 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 1949 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 1950 MachinePointerInfo::getConstantPool(), 1951 false, false, 0); 1952 if (RelocM == Reloc::Static) 1953 return Result; 1954 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1955 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 1956 } 1957 1958 // Lower ISD::GlobalTLSAddress using the "general dynamic" model 1959 SDValue 1960 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 1961 SelectionDAG &DAG) const { 1962 DebugLoc dl = GA->getDebugLoc(); 1963 EVT PtrVT = getPointerTy(); 1964 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 1965 MachineFunction &MF = DAG.getMachineFunction(); 1966 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1967 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1968 ARMConstantPoolValue *CPV = 1969 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, 1970 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); 1971 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1972 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 1973 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 1974 MachinePointerInfo::getConstantPool(), 1975 false, false, 0); 1976 SDValue Chain = Argument.getValue(1); 1977 1978 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1979 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 1980 1981 // call __tls_get_addr. 1982 ArgListTy Args; 1983 ArgListEntry Entry; 1984 Entry.Node = Argument; 1985 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); 1986 Args.push_back(Entry); 1987 // FIXME: is there useful debug info available here? 1988 std::pair<SDValue, SDValue> CallResult = 1989 LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()), 1990 false, false, false, false, 1991 0, CallingConv::C, false, /*isReturnValueUsed=*/true, 1992 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); 1993 return CallResult.first; 1994 } 1995 1996 // Lower ISD::GlobalTLSAddress using the "initial exec" or 1997 // "local exec" model. 1998 SDValue 1999 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 2000 SelectionDAG &DAG) const { 2001 const GlobalValue *GV = GA->getGlobal(); 2002 DebugLoc dl = GA->getDebugLoc(); 2003 SDValue Offset; 2004 SDValue Chain = DAG.getEntryNode(); 2005 EVT PtrVT = getPointerTy(); 2006 // Get the Thread Pointer 2007 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2008 2009 if (GV->isDeclaration()) { 2010 MachineFunction &MF = DAG.getMachineFunction(); 2011 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2012 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2013 // Initial exec model. 2014 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 2015 ARMConstantPoolValue *CPV = 2016 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, 2017 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true); 2018 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2019 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2020 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2021 MachinePointerInfo::getConstantPool(), 2022 false, false, 0); 2023 Chain = Offset.getValue(1); 2024 2025 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2026 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 2027 2028 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2029 MachinePointerInfo::getConstantPool(), 2030 false, false, 0); 2031 } else { 2032 // local exec model 2033 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMCP::TPOFF); 2034 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2035 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2036 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2037 MachinePointerInfo::getConstantPool(), 2038 false, false, 0); 2039 } 2040 2041 // The address of the thread local variable is the add of the thread 2042 // pointer with the offset of the variable. 2043 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 2044 } 2045 2046 SDValue 2047 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { 2048 // TODO: implement the "local dynamic" model 2049 assert(Subtarget->isTargetELF() && 2050 "TLS not implemented for non-ELF targets"); 2051 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 2052 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 2053 // otherwise use the "Local Exec" TLS Model 2054 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 2055 return LowerToTLSGeneralDynamicModel(GA, DAG); 2056 else 2057 return LowerToTLSExecModels(GA, DAG); 2058 } 2059 2060 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 2061 SelectionDAG &DAG) const { 2062 EVT PtrVT = getPointerTy(); 2063 DebugLoc dl = Op.getDebugLoc(); 2064 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2065 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2066 if (RelocM == Reloc::PIC_) { 2067 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 2068 ARMConstantPoolValue *CPV = 2069 new ARMConstantPoolValue(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); 2070 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2071 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2072 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 2073 CPAddr, 2074 MachinePointerInfo::getConstantPool(), 2075 false, false, 0); 2076 SDValue Chain = Result.getValue(1); 2077 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 2078 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 2079 if (!UseGOTOFF) 2080 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 2081 MachinePointerInfo::getGOT(), false, false, 0); 2082 return Result; 2083 } 2084 2085 // If we have T2 ops, we can materialize the address directly via movt/movw 2086 // pair. This is always cheaper. 2087 if (Subtarget->useMovt()) { 2088 ++NumMovwMovt; 2089 // FIXME: Once remat is capable of dealing with instructions with register 2090 // operands, expand this into two nodes. 2091 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2092 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2093 } else { 2094 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2095 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2096 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2097 MachinePointerInfo::getConstantPool(), 2098 false, false, 0); 2099 } 2100 } 2101 2102 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 2103 SelectionDAG &DAG) const { 2104 EVT PtrVT = getPointerTy(); 2105 DebugLoc dl = Op.getDebugLoc(); 2106 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2107 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2108 MachineFunction &MF = DAG.getMachineFunction(); 2109 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2110 2111 // FIXME: Enable this for static codegen when tool issues are fixed. 2112 if (Subtarget->useMovt() && RelocM != Reloc::Static) { 2113 ++NumMovwMovt; 2114 // FIXME: Once remat is capable of dealing with instructions with register 2115 // operands, expand this into two nodes. 2116 if (RelocM == Reloc::Static) 2117 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2118 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2119 2120 unsigned Wrapper = (RelocM == Reloc::PIC_) 2121 ? ARMISD::WrapperPIC : ARMISD::WrapperDYN; 2122 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, 2123 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2124 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2125 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, 2126 MachinePointerInfo::getGOT(), false, false, 0); 2127 return Result; 2128 } 2129 2130 unsigned ARMPCLabelIndex = 0; 2131 SDValue CPAddr; 2132 if (RelocM == Reloc::Static) { 2133 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2134 } else { 2135 ARMPCLabelIndex = AFI->createPICLabelUId(); 2136 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); 2137 ARMConstantPoolValue *CPV = 2138 new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj); 2139 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2140 } 2141 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2142 2143 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2144 MachinePointerInfo::getConstantPool(), 2145 false, false, 0); 2146 SDValue Chain = Result.getValue(1); 2147 2148 if (RelocM == Reloc::PIC_) { 2149 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2150 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2151 } 2152 2153 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2154 Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), 2155 false, false, 0); 2156 2157 return Result; 2158 } 2159 2160 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 2161 SelectionDAG &DAG) const { 2162 assert(Subtarget->isTargetELF() && 2163 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 2164 MachineFunction &MF = DAG.getMachineFunction(); 2165 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2166 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2167 EVT PtrVT = getPointerTy(); 2168 DebugLoc dl = Op.getDebugLoc(); 2169 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2170 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 2171 "_GLOBAL_OFFSET_TABLE_", 2172 ARMPCLabelIndex, PCAdj); 2173 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2174 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2175 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2176 MachinePointerInfo::getConstantPool(), 2177 false, false, 0); 2178 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2179 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2180 } 2181 2182 SDValue 2183 ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) 2184 const { 2185 DebugLoc dl = Op.getDebugLoc(); 2186 return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, 2187 Op.getOperand(0), Op.getOperand(1)); 2188 } 2189 2190 SDValue 2191 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { 2192 DebugLoc dl = Op.getDebugLoc(); 2193 SDValue Val = DAG.getConstant(0, MVT::i32); 2194 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0), 2195 Op.getOperand(1), Val); 2196 } 2197 2198 SDValue 2199 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { 2200 DebugLoc dl = Op.getDebugLoc(); 2201 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), 2202 Op.getOperand(1), DAG.getConstant(0, MVT::i32)); 2203 } 2204 2205 SDValue 2206 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 2207 const ARMSubtarget *Subtarget) const { 2208 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2209 DebugLoc dl = Op.getDebugLoc(); 2210 switch (IntNo) { 2211 default: return SDValue(); // Don't custom lower most intrinsics. 2212 case Intrinsic::arm_thread_pointer: { 2213 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2214 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2215 } 2216 case Intrinsic::eh_sjlj_lsda: { 2217 MachineFunction &MF = DAG.getMachineFunction(); 2218 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2219 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2220 EVT PtrVT = getPointerTy(); 2221 DebugLoc dl = Op.getDebugLoc(); 2222 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2223 SDValue CPAddr; 2224 unsigned PCAdj = (RelocM != Reloc::PIC_) 2225 ? 0 : (Subtarget->isThumb() ? 4 : 8); 2226 ARMConstantPoolValue *CPV = 2227 new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex, 2228 ARMCP::CPLSDA, PCAdj); 2229 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2230 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2231 SDValue Result = 2232 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2233 MachinePointerInfo::getConstantPool(), 2234 false, false, 0); 2235 2236 if (RelocM == Reloc::PIC_) { 2237 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2238 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2239 } 2240 return Result; 2241 } 2242 case Intrinsic::arm_neon_vmulls: 2243 case Intrinsic::arm_neon_vmullu: { 2244 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) 2245 ? ARMISD::VMULLs : ARMISD::VMULLu; 2246 return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(), 2247 Op.getOperand(1), Op.getOperand(2)); 2248 } 2249 } 2250 } 2251 2252 static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, 2253 const ARMSubtarget *Subtarget) { 2254 DebugLoc dl = Op.getDebugLoc(); 2255 if (!Subtarget->hasDataBarrier()) { 2256 // Some ARMv6 cpus can support data barriers with an mcr instruction. 2257 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 2258 // here. 2259 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && 2260 "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); 2261 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), 2262 DAG.getConstant(0, MVT::i32)); 2263 } 2264 2265 SDValue Op5 = Op.getOperand(5); 2266 bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0; 2267 unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 2268 unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); 2269 bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0); 2270 2271 ARM_MB::MemBOpt DMBOpt; 2272 if (isDeviceBarrier) 2273 DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY; 2274 else 2275 DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH; 2276 return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), 2277 DAG.getConstant(DMBOpt, MVT::i32)); 2278 } 2279 2280 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, 2281 const ARMSubtarget *Subtarget) { 2282 // ARM pre v5TE and Thumb1 does not have preload instructions. 2283 if (!(Subtarget->isThumb2() || 2284 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) 2285 // Just preserve the chain. 2286 return Op.getOperand(0); 2287 2288 DebugLoc dl = Op.getDebugLoc(); 2289 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; 2290 if (!isRead && 2291 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) 2292 // ARMv7 with MP extension has PLDW. 2293 return Op.getOperand(0); 2294 2295 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); 2296 if (Subtarget->isThumb()) { 2297 // Invert the bits. 2298 isRead = ~isRead & 1; 2299 isData = ~isData & 1; 2300 } 2301 2302 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), 2303 Op.getOperand(1), DAG.getConstant(isRead, MVT::i32), 2304 DAG.getConstant(isData, MVT::i32)); 2305 } 2306 2307 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { 2308 MachineFunction &MF = DAG.getMachineFunction(); 2309 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); 2310 2311 // vastart just stores the address of the VarArgsFrameIndex slot into the 2312 // memory location argument. 2313 DebugLoc dl = Op.getDebugLoc(); 2314 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2315 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2316 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2317 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2318 MachinePointerInfo(SV), false, false, 0); 2319 } 2320 2321 SDValue 2322 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 2323 SDValue &Root, SelectionDAG &DAG, 2324 DebugLoc dl) const { 2325 MachineFunction &MF = DAG.getMachineFunction(); 2326 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2327 2328 TargetRegisterClass *RC; 2329 if (AFI->isThumb1OnlyFunction()) 2330 RC = ARM::tGPRRegisterClass; 2331 else 2332 RC = ARM::GPRRegisterClass; 2333 2334 // Transform the arguments stored in physical registers into virtual ones. 2335 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2336 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2337 2338 SDValue ArgValue2; 2339 if (NextVA.isMemLoc()) { 2340 MachineFrameInfo *MFI = MF.getFrameInfo(); 2341 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); 2342 2343 // Create load node to retrieve arguments from the stack. 2344 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2345 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 2346 MachinePointerInfo::getFixedStack(FI), 2347 false, false, 0); 2348 } else { 2349 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 2350 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2351 } 2352 2353 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 2354 } 2355 2356 void 2357 ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, 2358 unsigned &VARegSize, unsigned &VARegSaveSize) 2359 const { 2360 unsigned NumGPRs; 2361 if (CCInfo.isFirstByValRegValid()) 2362 NumGPRs = ARM::R4 - CCInfo.getFirstByValReg(); 2363 else { 2364 unsigned int firstUnalloced; 2365 firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, 2366 sizeof(GPRArgRegs) / 2367 sizeof(GPRArgRegs[0])); 2368 NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; 2369 } 2370 2371 unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); 2372 VARegSize = NumGPRs * 4; 2373 VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); 2374 } 2375 2376 // The remaining GPRs hold either the beginning of variable-argument 2377 // data, or the beginning of an aggregate passed by value (usuall 2378 // byval). Either way, we allocate stack slots adjacent to the data 2379 // provided by our caller, and store the unallocated registers there. 2380 // If this is a variadic function, the va_list pointer will begin with 2381 // these values; otherwise, this reassembles a (byval) structure that 2382 // was split between registers and memory. 2383 void 2384 ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, 2385 DebugLoc dl, SDValue &Chain, 2386 unsigned ArgOffset) const { 2387 MachineFunction &MF = DAG.getMachineFunction(); 2388 MachineFrameInfo *MFI = MF.getFrameInfo(); 2389 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2390 unsigned firstRegToSaveIndex; 2391 if (CCInfo.isFirstByValRegValid()) 2392 firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0; 2393 else { 2394 firstRegToSaveIndex = CCInfo.getFirstUnallocated 2395 (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); 2396 } 2397 2398 unsigned VARegSize, VARegSaveSize; 2399 computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); 2400 if (VARegSaveSize) { 2401 // If this function is vararg, store any remaining integer argument regs 2402 // to their spots on the stack so that they may be loaded by deferencing 2403 // the result of va_next. 2404 AFI->setVarArgsRegSaveSize(VARegSaveSize); 2405 AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize, 2406 ArgOffset + VARegSaveSize 2407 - VARegSize, 2408 false)); 2409 SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), 2410 getPointerTy()); 2411 2412 SmallVector<SDValue, 4> MemOps; 2413 for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) { 2414 TargetRegisterClass *RC; 2415 if (AFI->isThumb1OnlyFunction()) 2416 RC = ARM::tGPRRegisterClass; 2417 else 2418 RC = ARM::GPRRegisterClass; 2419 2420 unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC); 2421 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2422 SDValue Store = 2423 DAG.getStore(Val.getValue(1), dl, Val, FIN, 2424 MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()), 2425 false, false, 0); 2426 MemOps.push_back(Store); 2427 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 2428 DAG.getConstant(4, getPointerTy())); 2429 } 2430 if (!MemOps.empty()) 2431 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2432 &MemOps[0], MemOps.size()); 2433 } else 2434 // This will point to the next argument passed via stack. 2435 AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); 2436 } 2437 2438 SDValue 2439 ARMTargetLowering::LowerFormalArguments(SDValue Chain, 2440 CallingConv::ID CallConv, bool isVarArg, 2441 const SmallVectorImpl<ISD::InputArg> 2442 &Ins, 2443 DebugLoc dl, SelectionDAG &DAG, 2444 SmallVectorImpl<SDValue> &InVals) 2445 const { 2446 MachineFunction &MF = DAG.getMachineFunction(); 2447 MachineFrameInfo *MFI = MF.getFrameInfo(); 2448 2449 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2450 2451 // Assign locations to all of the incoming arguments. 2452 SmallVector<CCValAssign, 16> ArgLocs; 2453 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2454 getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue); 2455 CCInfo.AnalyzeFormalArguments(Ins, 2456 CCAssignFnForNode(CallConv, /* Return*/ false, 2457 isVarArg)); 2458 2459 SmallVector<SDValue, 16> ArgValues; 2460 int lastInsIndex = -1; 2461 2462 SDValue ArgValue; 2463 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2464 CCValAssign &VA = ArgLocs[i]; 2465 2466 // Arguments stored in registers. 2467 if (VA.isRegLoc()) { 2468 EVT RegVT = VA.getLocVT(); 2469 2470 if (VA.needsCustom()) { 2471 // f64 and vector types are split up into multiple registers or 2472 // combinations of registers and stack slots. 2473 if (VA.getLocVT() == MVT::v2f64) { 2474 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 2475 Chain, DAG, dl); 2476 VA = ArgLocs[++i]; // skip ahead to next loc 2477 SDValue ArgValue2; 2478 if (VA.isMemLoc()) { 2479 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); 2480 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2481 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, 2482 MachinePointerInfo::getFixedStack(FI), 2483 false, false, 0); 2484 } else { 2485 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 2486 Chain, DAG, dl); 2487 } 2488 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 2489 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2490 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 2491 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2492 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 2493 } else 2494 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 2495 2496 } else { 2497 TargetRegisterClass *RC; 2498 2499 if (RegVT == MVT::f32) 2500 RC = ARM::SPRRegisterClass; 2501 else if (RegVT == MVT::f64) 2502 RC = ARM::DPRRegisterClass; 2503 else if (RegVT == MVT::v2f64) 2504 RC = ARM::QPRRegisterClass; 2505 else if (RegVT == MVT::i32) 2506 RC = (AFI->isThumb1OnlyFunction() ? 2507 ARM::tGPRRegisterClass : ARM::GPRRegisterClass); 2508 else 2509 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 2510 2511 // Transform the arguments in physical registers into virtual ones. 2512 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2513 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 2514 } 2515 2516 // If this is an 8 or 16-bit value, it is really passed promoted 2517 // to 32 bits. Insert an assert[sz]ext to capture this, then 2518 // truncate to the right size. 2519 switch (VA.getLocInfo()) { 2520 default: llvm_unreachable("Unknown loc info!"); 2521 case CCValAssign::Full: break; 2522 case CCValAssign::BCvt: 2523 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); 2524 break; 2525 case CCValAssign::SExt: 2526 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 2527 DAG.getValueType(VA.getValVT())); 2528 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2529 break; 2530 case CCValAssign::ZExt: 2531 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 2532 DAG.getValueType(VA.getValVT())); 2533 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2534 break; 2535 } 2536 2537 InVals.push_back(ArgValue); 2538 2539 } else { // VA.isRegLoc() 2540 2541 // sanity check 2542 assert(VA.isMemLoc()); 2543 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 2544 2545 int index = ArgLocs[i].getValNo(); 2546 2547 // Some Ins[] entries become multiple ArgLoc[] entries. 2548 // Process them only once. 2549 if (index != lastInsIndex) 2550 { 2551 ISD::ArgFlagsTy Flags = Ins[index].Flags; 2552 // FIXME: For now, all byval parameter objects are marked mutable. 2553 // This can be changed with more analysis. 2554 // In case of tail call optimization mark all arguments mutable. 2555 // Since they could be overwritten by lowering of arguments in case of 2556 // a tail call. 2557 if (Flags.isByVal()) { 2558 unsigned VARegSize, VARegSaveSize; 2559 computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); 2560 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0); 2561 unsigned Bytes = Flags.getByValSize() - VARegSize; 2562 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. 2563 int FI = MFI->CreateFixedObject(Bytes, 2564 VA.getLocMemOffset(), false); 2565 InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); 2566 } else { 2567 int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, 2568 VA.getLocMemOffset(), true); 2569 2570 // Create load nodes to retrieve arguments from the stack. 2571 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2572 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2573 MachinePointerInfo::getFixedStack(FI), 2574 false, false, 0)); 2575 } 2576 lastInsIndex = index; 2577 } 2578 } 2579 } 2580 2581 // varargs 2582 if (isVarArg) 2583 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset()); 2584 2585 return Chain; 2586 } 2587 2588 /// isFloatingPointZero - Return true if this is +0.0. 2589 static bool isFloatingPointZero(SDValue Op) { 2590 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 2591 return CFP->getValueAPF().isPosZero(); 2592 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 2593 // Maybe this has already been legalized into the constant pool? 2594 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 2595 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 2596 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 2597 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 2598 return CFP->getValueAPF().isPosZero(); 2599 } 2600 } 2601 return false; 2602 } 2603 2604 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for 2605 /// the given operands. 2606 SDValue 2607 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2608 SDValue &ARMcc, SelectionDAG &DAG, 2609 DebugLoc dl) const { 2610 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 2611 unsigned C = RHSC->getZExtValue(); 2612 if (!isLegalICmpImmediate(C)) { 2613 // Constant does not fit, try adjusting it by one? 2614 switch (CC) { 2615 default: break; 2616 case ISD::SETLT: 2617 case ISD::SETGE: 2618 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { 2619 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 2620 RHS = DAG.getConstant(C-1, MVT::i32); 2621 } 2622 break; 2623 case ISD::SETULT: 2624 case ISD::SETUGE: 2625 if (C != 0 && isLegalICmpImmediate(C-1)) { 2626 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 2627 RHS = DAG.getConstant(C-1, MVT::i32); 2628 } 2629 break; 2630 case ISD::SETLE: 2631 case ISD::SETGT: 2632 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { 2633 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 2634 RHS = DAG.getConstant(C+1, MVT::i32); 2635 } 2636 break; 2637 case ISD::SETULE: 2638 case ISD::SETUGT: 2639 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { 2640 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 2641 RHS = DAG.getConstant(C+1, MVT::i32); 2642 } 2643 break; 2644 } 2645 } 2646 } 2647 2648 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2649 ARMISD::NodeType CompareType; 2650 switch (CondCode) { 2651 default: 2652 CompareType = ARMISD::CMP; 2653 break; 2654 case ARMCC::EQ: 2655 case ARMCC::NE: 2656 // Uses only Z Flag 2657 CompareType = ARMISD::CMPZ; 2658 break; 2659 } 2660 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2661 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); 2662 } 2663 2664 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 2665 SDValue 2666 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 2667 DebugLoc dl) const { 2668 SDValue Cmp; 2669 if (!isFloatingPointZero(RHS)) 2670 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); 2671 else 2672 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); 2673 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); 2674 } 2675 2676 /// duplicateCmp - Glue values can have only one use, so this function 2677 /// duplicates a comparison node. 2678 SDValue 2679 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { 2680 unsigned Opc = Cmp.getOpcode(); 2681 DebugLoc DL = Cmp.getDebugLoc(); 2682 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) 2683 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 2684 2685 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); 2686 Cmp = Cmp.getOperand(0); 2687 Opc = Cmp.getOpcode(); 2688 if (Opc == ARMISD::CMPFP) 2689 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 2690 else { 2691 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); 2692 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); 2693 } 2694 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); 2695 } 2696 2697 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 2698 SDValue Cond = Op.getOperand(0); 2699 SDValue SelectTrue = Op.getOperand(1); 2700 SDValue SelectFalse = Op.getOperand(2); 2701 DebugLoc dl = Op.getDebugLoc(); 2702 2703 // Convert: 2704 // 2705 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) 2706 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) 2707 // 2708 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { 2709 const ConstantSDNode *CMOVTrue = 2710 dyn_cast<ConstantSDNode>(Cond.getOperand(0)); 2711 const ConstantSDNode *CMOVFalse = 2712 dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 2713 2714 if (CMOVTrue && CMOVFalse) { 2715 unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); 2716 unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); 2717 2718 SDValue True; 2719 SDValue False; 2720 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { 2721 True = SelectTrue; 2722 False = SelectFalse; 2723 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { 2724 True = SelectFalse; 2725 False = SelectTrue; 2726 } 2727 2728 if (True.getNode() && False.getNode()) { 2729 EVT VT = Op.getValueType(); 2730 SDValue ARMcc = Cond.getOperand(2); 2731 SDValue CCR = Cond.getOperand(3); 2732 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); 2733 assert(True.getValueType() == VT); 2734 return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); 2735 } 2736 } 2737 } 2738 2739 return DAG.getSelectCC(dl, Cond, 2740 DAG.getConstant(0, Cond.getValueType()), 2741 SelectTrue, SelectFalse, ISD::SETNE); 2742 } 2743 2744 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 2745 EVT VT = Op.getValueType(); 2746 SDValue LHS = Op.getOperand(0); 2747 SDValue RHS = Op.getOperand(1); 2748 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 2749 SDValue TrueVal = Op.getOperand(2); 2750 SDValue FalseVal = Op.getOperand(3); 2751 DebugLoc dl = Op.getDebugLoc(); 2752 2753 if (LHS.getValueType() == MVT::i32) { 2754 SDValue ARMcc; 2755 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2756 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2757 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp); 2758 } 2759 2760 ARMCC::CondCodes CondCode, CondCode2; 2761 FPCCToARMCC(CC, CondCode, CondCode2); 2762 2763 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 2764 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 2765 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2766 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, 2767 ARMcc, CCR, Cmp); 2768 if (CondCode2 != ARMCC::AL) { 2769 SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); 2770 // FIXME: Needs another CMP because flag can have but one use. 2771 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 2772 Result = DAG.getNode(ARMISD::CMOV, dl, VT, 2773 Result, TrueVal, ARMcc2, CCR, Cmp2); 2774 } 2775 return Result; 2776 } 2777 2778 /// canChangeToInt - Given the fp compare operand, return true if it is suitable 2779 /// to morph to an integer compare sequence. 2780 static bool canChangeToInt(SDValue Op, bool &SeenZero, 2781 const ARMSubtarget *Subtarget) { 2782 SDNode *N = Op.getNode(); 2783 if (!N->hasOneUse()) 2784 // Otherwise it requires moving the value from fp to integer registers. 2785 return false; 2786 if (!N->getNumValues()) 2787 return false; 2788 EVT VT = Op.getValueType(); 2789 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) 2790 // f32 case is generally profitable. f64 case only makes sense when vcmpe + 2791 // vmrs are very slow, e.g. cortex-a8. 2792 return false; 2793 2794 if (isFloatingPointZero(Op)) { 2795 SeenZero = true; 2796 return true; 2797 } 2798 return ISD::isNormalLoad(N); 2799 } 2800 2801 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { 2802 if (isFloatingPointZero(Op)) 2803 return DAG.getConstant(0, MVT::i32); 2804 2805 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) 2806 return DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2807 Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), 2808 Ld->isVolatile(), Ld->isNonTemporal(), 2809 Ld->getAlignment()); 2810 2811 llvm_unreachable("Unknown VFP cmp argument!"); 2812 } 2813 2814 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, 2815 SDValue &RetVal1, SDValue &RetVal2) { 2816 if (isFloatingPointZero(Op)) { 2817 RetVal1 = DAG.getConstant(0, MVT::i32); 2818 RetVal2 = DAG.getConstant(0, MVT::i32); 2819 return; 2820 } 2821 2822 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { 2823 SDValue Ptr = Ld->getBasePtr(); 2824 RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2825 Ld->getChain(), Ptr, 2826 Ld->getPointerInfo(), 2827 Ld->isVolatile(), Ld->isNonTemporal(), 2828 Ld->getAlignment()); 2829 2830 EVT PtrType = Ptr.getValueType(); 2831 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); 2832 SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(), 2833 PtrType, Ptr, DAG.getConstant(4, PtrType)); 2834 RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2835 Ld->getChain(), NewPtr, 2836 Ld->getPointerInfo().getWithOffset(4), 2837 Ld->isVolatile(), Ld->isNonTemporal(), 2838 NewAlign); 2839 return; 2840 } 2841 2842 llvm_unreachable("Unknown VFP cmp argument!"); 2843 } 2844 2845 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some 2846 /// f32 and even f64 comparisons to integer ones. 2847 SDValue 2848 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { 2849 SDValue Chain = Op.getOperand(0); 2850 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 2851 SDValue LHS = Op.getOperand(2); 2852 SDValue RHS = Op.getOperand(3); 2853 SDValue Dest = Op.getOperand(4); 2854 DebugLoc dl = Op.getDebugLoc(); 2855 2856 bool SeenZero = false; 2857 if (canChangeToInt(LHS, SeenZero, Subtarget) && 2858 canChangeToInt(RHS, SeenZero, Subtarget) && 2859 // If one of the operand is zero, it's safe to ignore the NaN case since 2860 // we only care about equality comparisons. 2861 (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) { 2862 // If unsafe fp math optimization is enabled and there are no other uses of 2863 // the CMP operands, and the condition code is EQ or NE, we can optimize it 2864 // to an integer comparison. 2865 if (CC == ISD::SETOEQ) 2866 CC = ISD::SETEQ; 2867 else if (CC == ISD::SETUNE) 2868 CC = ISD::SETNE; 2869 2870 SDValue ARMcc; 2871 if (LHS.getValueType() == MVT::f32) { 2872 LHS = bitcastf32Toi32(LHS, DAG); 2873 RHS = bitcastf32Toi32(RHS, DAG); 2874 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2875 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2876 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 2877 Chain, Dest, ARMcc, CCR, Cmp); 2878 } 2879 2880 SDValue LHS1, LHS2; 2881 SDValue RHS1, RHS2; 2882 expandf64Toi32(LHS, DAG, LHS1, LHS2); 2883 expandf64Toi32(RHS, DAG, RHS1, RHS2); 2884 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2885 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2886 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 2887 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; 2888 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); 2889 } 2890 2891 return SDValue(); 2892 } 2893 2894 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 2895 SDValue Chain = Op.getOperand(0); 2896 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 2897 SDValue LHS = Op.getOperand(2); 2898 SDValue RHS = Op.getOperand(3); 2899 SDValue Dest = Op.getOperand(4); 2900 DebugLoc dl = Op.getDebugLoc(); 2901 2902 if (LHS.getValueType() == MVT::i32) { 2903 SDValue ARMcc; 2904 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2905 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2906 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 2907 Chain, Dest, ARMcc, CCR, Cmp); 2908 } 2909 2910 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 2911 2912 if (UnsafeFPMath && 2913 (CC == ISD::SETEQ || CC == ISD::SETOEQ || 2914 CC == ISD::SETNE || CC == ISD::SETUNE)) { 2915 SDValue Result = OptimizeVFPBrcond(Op, DAG); 2916 if (Result.getNode()) 2917 return Result; 2918 } 2919 2920 ARMCC::CondCodes CondCode, CondCode2; 2921 FPCCToARMCC(CC, CondCode, CondCode2); 2922 2923 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 2924 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 2925 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2926 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 2927 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; 2928 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 2929 if (CondCode2 != ARMCC::AL) { 2930 ARMcc = DAG.getConstant(CondCode2, MVT::i32); 2931 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; 2932 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 2933 } 2934 return Res; 2935 } 2936 2937 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { 2938 SDValue Chain = Op.getOperand(0); 2939 SDValue Table = Op.getOperand(1); 2940 SDValue Index = Op.getOperand(2); 2941 DebugLoc dl = Op.getDebugLoc(); 2942 2943 EVT PTy = getPointerTy(); 2944 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 2945 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 2946 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 2947 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 2948 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 2949 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 2950 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 2951 if (Subtarget->isThumb2()) { 2952 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 2953 // which does another jump to the destination. This also makes it easier 2954 // to translate it to TBB / TBH later. 2955 // FIXME: This might not work if the function is extremely large. 2956 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 2957 Addr, Op.getOperand(2), JTI, UId); 2958 } 2959 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 2960 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 2961 MachinePointerInfo::getJumpTable(), 2962 false, false, 0); 2963 Chain = Addr.getValue(1); 2964 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 2965 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 2966 } else { 2967 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 2968 MachinePointerInfo::getJumpTable(), false, false, 0); 2969 Chain = Addr.getValue(1); 2970 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 2971 } 2972 } 2973 2974 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 2975 DebugLoc dl = Op.getDebugLoc(); 2976 unsigned Opc; 2977 2978 switch (Op.getOpcode()) { 2979 default: 2980 assert(0 && "Invalid opcode!"); 2981 case ISD::FP_TO_SINT: 2982 Opc = ARMISD::FTOSI; 2983 break; 2984 case ISD::FP_TO_UINT: 2985 Opc = ARMISD::FTOUI; 2986 break; 2987 } 2988 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 2989 return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 2990 } 2991 2992 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 2993 EVT VT = Op.getValueType(); 2994 DebugLoc dl = Op.getDebugLoc(); 2995 2996 EVT OperandVT = Op.getOperand(0).getValueType(); 2997 assert(OperandVT == MVT::v4i16 && "Invalid type for custom lowering!"); 2998 if (VT != MVT::v4f32) 2999 return DAG.UnrollVectorOp(Op.getNode()); 3000 3001 unsigned CastOpc; 3002 unsigned Opc; 3003 switch (Op.getOpcode()) { 3004 default: 3005 assert(0 && "Invalid opcode!"); 3006 case ISD::SINT_TO_FP: 3007 CastOpc = ISD::SIGN_EXTEND; 3008 Opc = ISD::SINT_TO_FP; 3009 break; 3010 case ISD::UINT_TO_FP: 3011 CastOpc = ISD::ZERO_EXTEND; 3012 Opc = ISD::UINT_TO_FP; 3013 break; 3014 } 3015 3016 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); 3017 return DAG.getNode(Opc, dl, VT, Op); 3018 } 3019 3020 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 3021 EVT VT = Op.getValueType(); 3022 if (VT.isVector()) 3023 return LowerVectorINT_TO_FP(Op, DAG); 3024 3025 DebugLoc dl = Op.getDebugLoc(); 3026 unsigned Opc; 3027 3028 switch (Op.getOpcode()) { 3029 default: 3030 assert(0 && "Invalid opcode!"); 3031 case ISD::SINT_TO_FP: 3032 Opc = ARMISD::SITOF; 3033 break; 3034 case ISD::UINT_TO_FP: 3035 Opc = ARMISD::UITOF; 3036 break; 3037 } 3038 3039 Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); 3040 return DAG.getNode(Opc, dl, VT, Op); 3041 } 3042 3043 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { 3044 // Implement fcopysign with a fabs and a conditional fneg. 3045 SDValue Tmp0 = Op.getOperand(0); 3046 SDValue Tmp1 = Op.getOperand(1); 3047 DebugLoc dl = Op.getDebugLoc(); 3048 EVT VT = Op.getValueType(); 3049 EVT SrcVT = Tmp1.getValueType(); 3050 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || 3051 Tmp0.getOpcode() == ARMISD::VMOVDRR; 3052 bool UseNEON = !InGPR && Subtarget->hasNEON(); 3053 3054 if (UseNEON) { 3055 // Use VBSL to copy the sign bit. 3056 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); 3057 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, 3058 DAG.getTargetConstant(EncodedVal, MVT::i32)); 3059 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; 3060 if (VT == MVT::f64) 3061 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3062 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), 3063 DAG.getConstant(32, MVT::i32)); 3064 else /*if (VT == MVT::f32)*/ 3065 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); 3066 if (SrcVT == MVT::f32) { 3067 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); 3068 if (VT == MVT::f64) 3069 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3070 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), 3071 DAG.getConstant(32, MVT::i32)); 3072 } else if (VT == MVT::f32) 3073 Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64, 3074 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), 3075 DAG.getConstant(32, MVT::i32)); 3076 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); 3077 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); 3078 3079 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), 3080 MVT::i32); 3081 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); 3082 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, 3083 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); 3084 3085 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, 3086 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), 3087 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); 3088 if (VT == MVT::f32) { 3089 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); 3090 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, 3091 DAG.getConstant(0, MVT::i32)); 3092 } else { 3093 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); 3094 } 3095 3096 return Res; 3097 } 3098 3099 // Bitcast operand 1 to i32. 3100 if (SrcVT == MVT::f64) 3101 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3102 &Tmp1, 1).getValue(1); 3103 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); 3104 3105 // Or in the signbit with integer operations. 3106 SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); 3107 SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); 3108 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); 3109 if (VT == MVT::f32) { 3110 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, 3111 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); 3112 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 3113 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); 3114 } 3115 3116 // f64: Or the high part with signbit and then combine two parts. 3117 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3118 &Tmp0, 1); 3119 SDValue Lo = Tmp0.getValue(0); 3120 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); 3121 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); 3122 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 3123 } 3124 3125 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ 3126 MachineFunction &MF = DAG.getMachineFunction(); 3127 MachineFrameInfo *MFI = MF.getFrameInfo(); 3128 MFI->setReturnAddressIsTaken(true); 3129 3130 EVT VT = Op.getValueType(); 3131 DebugLoc dl = Op.getDebugLoc(); 3132 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3133 if (Depth) { 3134 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 3135 SDValue Offset = DAG.getConstant(4, MVT::i32); 3136 return DAG.getLoad(VT, dl, DAG.getEntryNode(), 3137 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), 3138 MachinePointerInfo(), false, false, 0); 3139 } 3140 3141 // Return LR, which contains the return address. Mark it an implicit live-in. 3142 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); 3143 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); 3144 } 3145 3146 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { 3147 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 3148 MFI->setFrameAddressIsTaken(true); 3149 3150 EVT VT = Op.getValueType(); 3151 DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful 3152 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3153 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) 3154 ? ARM::R7 : ARM::R11; 3155 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 3156 while (Depth--) 3157 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, 3158 MachinePointerInfo(), 3159 false, false, 0); 3160 return FrameAddr; 3161 } 3162 3163 /// ExpandBITCAST - If the target supports VFP, this function is called to 3164 /// expand a bit convert where either the source or destination type is i64 to 3165 /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 3166 /// operand type is illegal (e.g., v2f32 for a target that doesn't support 3167 /// vectors), since the legalizer won't know what to do with that. 3168 static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { 3169 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3170 DebugLoc dl = N->getDebugLoc(); 3171 SDValue Op = N->getOperand(0); 3172 3173 // This function is only supposed to be called for i64 types, either as the 3174 // source or destination of the bit convert. 3175 EVT SrcVT = Op.getValueType(); 3176 EVT DstVT = N->getValueType(0); 3177 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && 3178 "ExpandBITCAST called for non-i64 type"); 3179 3180 // Turn i64->f64 into VMOVDRR. 3181 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { 3182 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3183 DAG.getConstant(0, MVT::i32)); 3184 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3185 DAG.getConstant(1, MVT::i32)); 3186 return DAG.getNode(ISD::BITCAST, dl, DstVT, 3187 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); 3188 } 3189 3190 // Turn f64->i64 into VMOVRRD. 3191 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { 3192 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 3193 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); 3194 // Merge the pieces into a single i64 value. 3195 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 3196 } 3197 3198 return SDValue(); 3199 } 3200 3201 /// getZeroVector - Returns a vector of specified type with all zero elements. 3202 /// Zero vectors are used to represent vector negation and in those cases 3203 /// will be implemented with the NEON VNEG instruction. However, VNEG does 3204 /// not support i64 elements, so sometimes the zero vectors will need to be 3205 /// explicitly constructed. Regardless, use a canonical VMOV to create the 3206 /// zero vector. 3207 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { 3208 assert(VT.isVector() && "Expected a vector type"); 3209 // The canonical modified immediate encoding of a zero vector is....0! 3210 SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); 3211 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 3212 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); 3213 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 3214 } 3215 3216 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two 3217 /// i32 values and take a 2 x i32 value to shift plus a shift amount. 3218 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, 3219 SelectionDAG &DAG) const { 3220 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3221 EVT VT = Op.getValueType(); 3222 unsigned VTBits = VT.getSizeInBits(); 3223 DebugLoc dl = Op.getDebugLoc(); 3224 SDValue ShOpLo = Op.getOperand(0); 3225 SDValue ShOpHi = Op.getOperand(1); 3226 SDValue ShAmt = Op.getOperand(2); 3227 SDValue ARMcc; 3228 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 3229 3230 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 3231 3232 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3233 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3234 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 3235 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3236 DAG.getConstant(VTBits, MVT::i32)); 3237 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 3238 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3239 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 3240 3241 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3242 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3243 ARMcc, DAG, dl); 3244 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 3245 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, 3246 CCR, Cmp); 3247 3248 SDValue Ops[2] = { Lo, Hi }; 3249 return DAG.getMergeValues(Ops, 2, dl); 3250 } 3251 3252 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 3253 /// i32 values and take a 2 x i32 value to shift plus a shift amount. 3254 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, 3255 SelectionDAG &DAG) const { 3256 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3257 EVT VT = Op.getValueType(); 3258 unsigned VTBits = VT.getSizeInBits(); 3259 DebugLoc dl = Op.getDebugLoc(); 3260 SDValue ShOpLo = Op.getOperand(0); 3261 SDValue ShOpHi = Op.getOperand(1); 3262 SDValue ShAmt = Op.getOperand(2); 3263 SDValue ARMcc; 3264 3265 assert(Op.getOpcode() == ISD::SHL_PARTS); 3266 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3267 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3268 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 3269 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3270 DAG.getConstant(VTBits, MVT::i32)); 3271 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 3272 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 3273 3274 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3275 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3276 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3277 ARMcc, DAG, dl); 3278 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 3279 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, 3280 CCR, Cmp); 3281 3282 SDValue Ops[2] = { Lo, Hi }; 3283 return DAG.getMergeValues(Ops, 2, dl); 3284 } 3285 3286 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 3287 SelectionDAG &DAG) const { 3288 // The rounding mode is in bits 23:22 of the FPSCR. 3289 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 3290 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) 3291 // so that the shift + and get folded into a bitfield extract. 3292 DebugLoc dl = Op.getDebugLoc(); 3293 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, 3294 DAG.getConstant(Intrinsic::arm_get_fpscr, 3295 MVT::i32)); 3296 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, 3297 DAG.getConstant(1U << 22, MVT::i32)); 3298 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, 3299 DAG.getConstant(22, MVT::i32)); 3300 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, 3301 DAG.getConstant(3, MVT::i32)); 3302 } 3303 3304 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 3305 const ARMSubtarget *ST) { 3306 EVT VT = N->getValueType(0); 3307 DebugLoc dl = N->getDebugLoc(); 3308 3309 if (!ST->hasV6T2Ops()) 3310 return SDValue(); 3311 3312 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 3313 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 3314 } 3315 3316 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 3317 const ARMSubtarget *ST) { 3318 EVT VT = N->getValueType(0); 3319 DebugLoc dl = N->getDebugLoc(); 3320 3321 if (!VT.isVector()) 3322 return SDValue(); 3323 3324 // Lower vector shifts on NEON to use VSHL. 3325 assert(ST->hasNEON() && "unexpected vector shift"); 3326 3327 // Left shifts translate directly to the vshiftu intrinsic. 3328 if (N->getOpcode() == ISD::SHL) 3329 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 3330 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 3331 N->getOperand(0), N->getOperand(1)); 3332 3333 assert((N->getOpcode() == ISD::SRA || 3334 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 3335 3336 // NEON uses the same intrinsics for both left and right shifts. For 3337 // right shifts, the shift amounts are negative, so negate the vector of 3338 // shift amounts. 3339 EVT ShiftVT = N->getOperand(1).getValueType(); 3340 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 3341 getZeroVector(ShiftVT, DAG, dl), 3342 N->getOperand(1)); 3343 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 3344 Intrinsic::arm_neon_vshifts : 3345 Intrinsic::arm_neon_vshiftu); 3346 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 3347 DAG.getConstant(vshiftInt, MVT::i32), 3348 N->getOperand(0), NegatedCount); 3349 } 3350 3351 static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, 3352 const ARMSubtarget *ST) { 3353 EVT VT = N->getValueType(0); 3354 DebugLoc dl = N->getDebugLoc(); 3355 3356 // We can get here for a node like i32 = ISD::SHL i32, i64 3357 if (VT != MVT::i64) 3358 return SDValue(); 3359 3360 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 3361 "Unknown shift to lower!"); 3362 3363 // We only lower SRA, SRL of 1 here, all others use generic lowering. 3364 if (!isa<ConstantSDNode>(N->getOperand(1)) || 3365 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 3366 return SDValue(); 3367 3368 // If we are in thumb mode, we don't have RRX. 3369 if (ST->isThumb1Only()) return SDValue(); 3370 3371 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 3372 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 3373 DAG.getConstant(0, MVT::i32)); 3374 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 3375 DAG.getConstant(1, MVT::i32)); 3376 3377 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 3378 // captures the result into a carry flag. 3379 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 3380 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); 3381 3382 // The low part is an ARMISD::RRX operand, which shifts the carry in. 3383 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 3384 3385 // Merge the pieces into a single i64 value. 3386 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 3387 } 3388 3389 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 3390 SDValue TmpOp0, TmpOp1; 3391 bool Invert = false; 3392 bool Swap = false; 3393 unsigned Opc = 0; 3394 3395 SDValue Op0 = Op.getOperand(0); 3396 SDValue Op1 = Op.getOperand(1); 3397 SDValue CC = Op.getOperand(2); 3398 EVT VT = Op.getValueType(); 3399 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3400 DebugLoc dl = Op.getDebugLoc(); 3401 3402 if (Op.getOperand(1).getValueType().isFloatingPoint()) { 3403 switch (SetCCOpcode) { 3404 default: llvm_unreachable("Illegal FP comparison"); break; 3405 case ISD::SETUNE: 3406 case ISD::SETNE: Invert = true; // Fallthrough 3407 case ISD::SETOEQ: 3408 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 3409 case ISD::SETOLT: 3410 case ISD::SETLT: Swap = true; // Fallthrough 3411 case ISD::SETOGT: 3412 case ISD::SETGT: Opc = ARMISD::VCGT; break; 3413 case ISD::SETOLE: 3414 case ISD::SETLE: Swap = true; // Fallthrough 3415 case ISD::SETOGE: 3416 case ISD::SETGE: Opc = ARMISD::VCGE; break; 3417 case ISD::SETUGE: Swap = true; // Fallthrough 3418 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 3419 case ISD::SETUGT: Swap = true; // Fallthrough 3420 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 3421 case ISD::SETUEQ: Invert = true; // Fallthrough 3422 case ISD::SETONE: 3423 // Expand this to (OLT | OGT). 3424 TmpOp0 = Op0; 3425 TmpOp1 = Op1; 3426 Opc = ISD::OR; 3427 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 3428 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); 3429 break; 3430 case ISD::SETUO: Invert = true; // Fallthrough 3431 case ISD::SETO: 3432 // Expand this to (OLT | OGE). 3433 TmpOp0 = Op0; 3434 TmpOp1 = Op1; 3435 Opc = ISD::OR; 3436 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 3437 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); 3438 break; 3439 } 3440 } else { 3441 // Integer comparisons. 3442 switch (SetCCOpcode) { 3443 default: llvm_unreachable("Illegal integer comparison"); break; 3444 case ISD::SETNE: Invert = true; 3445 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 3446 case ISD::SETLT: Swap = true; 3447 case ISD::SETGT: Opc = ARMISD::VCGT; break; 3448 case ISD::SETLE: Swap = true; 3449 case ISD::SETGE: Opc = ARMISD::VCGE; break; 3450 case ISD::SETULT: Swap = true; 3451 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 3452 case ISD::SETULE: Swap = true; 3453 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 3454 } 3455 3456 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 3457 if (Opc == ARMISD::VCEQ) { 3458 3459 SDValue AndOp; 3460 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 3461 AndOp = Op0; 3462 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 3463 AndOp = Op1; 3464 3465 // Ignore bitconvert. 3466 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST) 3467 AndOp = AndOp.getOperand(0); 3468 3469 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 3470 Opc = ARMISD::VTST; 3471 Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0)); 3472 Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1)); 3473 Invert = !Invert; 3474 } 3475 } 3476 } 3477 3478 if (Swap) 3479 std::swap(Op0, Op1); 3480 3481 // If one of the operands is a constant vector zero, attempt to fold the 3482 // comparison to a specialized compare-against-zero form. 3483 SDValue SingleOp; 3484 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 3485 SingleOp = Op0; 3486 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { 3487 if (Opc == ARMISD::VCGE) 3488 Opc = ARMISD::VCLEZ; 3489 else if (Opc == ARMISD::VCGT) 3490 Opc = ARMISD::VCLTZ; 3491 SingleOp = Op1; 3492 } 3493 3494 SDValue Result; 3495 if (SingleOp.getNode()) { 3496 switch (Opc) { 3497 case ARMISD::VCEQ: 3498 Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break; 3499 case ARMISD::VCGE: 3500 Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break; 3501 case ARMISD::VCLEZ: 3502 Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break; 3503 case ARMISD::VCGT: 3504 Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break; 3505 case ARMISD::VCLTZ: 3506 Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break; 3507 default: 3508 Result =