1 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that X86 uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86ISelLowering.h" 16 #include "Utils/X86ShuffleDecode.h" 17 #include "X86CallingConv.h" 18 #include "X86FrameLowering.h" 19 #include "X86InstrBuilder.h" 20 #include "X86MachineFunctionInfo.h" 21 #include "X86ShuffleDecodeConstantPool.h" 22 #include "X86TargetMachine.h" 23 #include "X86TargetObjectFile.h" 24 #include "llvm/ADT/SmallBitVector.h" 25 #include "llvm/ADT/SmallSet.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/ADT/StringExtras.h" 28 #include "llvm/ADT/StringSwitch.h" 29 #include "llvm/Analysis/EHPersonalities.h" 30 #include "llvm/CodeGen/IntrinsicLowering.h" 31 #include "llvm/CodeGen/MachineFrameInfo.h" 32 #include "llvm/CodeGen/MachineFunction.h" 33 #include "llvm/CodeGen/MachineInstrBuilder.h" 34 #include "llvm/CodeGen/MachineJumpTableInfo.h" 35 #include "llvm/CodeGen/MachineModuleInfo.h" 36 #include "llvm/CodeGen/MachineRegisterInfo.h" 37 #include "llvm/CodeGen/WinEHFuncInfo.h" 38 #include "llvm/IR/CallSite.h" 39 #include "llvm/IR/CallingConv.h" 40 #include "llvm/IR/Constants.h" 41 #include "llvm/IR/DerivedTypes.h" 42 #include "llvm/IR/Function.h" 43 #include "llvm/IR/GlobalAlias.h" 44 #include "llvm/IR/GlobalVariable.h" 45 #include "llvm/IR/Instructions.h" 46 #include "llvm/IR/Intrinsics.h" 47 #include "llvm/MC/MCAsmInfo.h" 48 #include "llvm/MC/MCContext.h" 49 #include "llvm/MC/MCExpr.h" 50 #include "llvm/MC/MCSymbol.h" 51 #include "llvm/Support/CommandLine.h" 52 #include "llvm/Support/Debug.h" 53 #include "llvm/Support/ErrorHandling.h" 54 #include "llvm/Support/MathExtras.h" 55 #include "llvm/Target/TargetOptions.h" 56 #include "X86IntrinsicsInfo.h" 57 #include <bitset> 58 #include <numeric> 59 #include <cctype> 60 using namespace llvm; 61 62 #define DEBUG_TYPE "x86-isel" 63 64 STATISTIC(NumTailCalls, "Number of tail calls"); 65 66 static cl::opt<bool> ExperimentalVectorWideningLegalization( 67 "x86-experimental-vector-widening-legalization", cl::init(false), 68 cl::desc("Enable an experimental vector type legalization through widening " 69 "rather than promotion."), 70 cl::Hidden); 71 72 X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, 73 const X86Subtarget &STI) 74 : TargetLowering(TM), Subtarget(STI) { 75 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87(); 76 X86ScalarSSEf64 = Subtarget.hasSSE2(); 77 X86ScalarSSEf32 = Subtarget.hasSSE1(); 78 MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize()); 79 80 // Set up the TargetLowering object. 81 82 // X86 is weird. It always uses i8 for shift amounts and setcc results. 83 setBooleanContents(ZeroOrOneBooleanContent); 84 // X86-SSE is even stranger. It uses -1 or 0 for vector masks. 85 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 86 87 // For 64-bit, since we have so many registers, use the ILP scheduler. 88 // For 32-bit, use the register pressure specific scheduling. 89 // For Atom, always use ILP scheduling. 90 if (Subtarget.isAtom()) 91 setSchedulingPreference(Sched::ILP); 92 else if (Subtarget.is64Bit()) 93 setSchedulingPreference(Sched::ILP); 94 else 95 setSchedulingPreference(Sched::RegPressure); 96 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 97 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); 98 99 // Bypass expensive divides on Atom when compiling with O2. 100 if (TM.getOptLevel() >= CodeGenOpt::Default) { 101 if (Subtarget.hasSlowDivide32()) 102 addBypassSlowDiv(32, 8); 103 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit()) 104 addBypassSlowDiv(64, 16); 105 } 106 107 if (Subtarget.isTargetKnownWindowsMSVC()) { 108 // Setup Windows compiler runtime calls. 109 setLibcallName(RTLIB::SDIV_I64, "_alldiv"); 110 setLibcallName(RTLIB::UDIV_I64, "_aulldiv"); 111 setLibcallName(RTLIB::SREM_I64, "_allrem"); 112 setLibcallName(RTLIB::UREM_I64, "_aullrem"); 113 setLibcallName(RTLIB::MUL_I64, "_allmul"); 114 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall); 115 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall); 116 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall); 117 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall); 118 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall); 119 } 120 121 if (Subtarget.isTargetDarwin()) { 122 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 123 setUseUnderscoreSetJmp(false); 124 setUseUnderscoreLongJmp(false); 125 } else if (Subtarget.isTargetWindowsGNU()) { 126 // MS runtime is weird: it exports _setjmp, but longjmp! 127 setUseUnderscoreSetJmp(true); 128 setUseUnderscoreLongJmp(false); 129 } else { 130 setUseUnderscoreSetJmp(true); 131 setUseUnderscoreLongJmp(true); 132 } 133 134 // Set up the register classes. 135 addRegisterClass(MVT::i8, &X86::GR8RegClass); 136 addRegisterClass(MVT::i16, &X86::GR16RegClass); 137 addRegisterClass(MVT::i32, &X86::GR32RegClass); 138 if (Subtarget.is64Bit()) 139 addRegisterClass(MVT::i64, &X86::GR64RegClass); 140 141 for (MVT VT : MVT::integer_valuetypes()) 142 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 143 144 // We don't accept any truncstore of integer registers. 145 setTruncStoreAction(MVT::i64, MVT::i32, Expand); 146 setTruncStoreAction(MVT::i64, MVT::i16, Expand); 147 setTruncStoreAction(MVT::i64, MVT::i8 , Expand); 148 setTruncStoreAction(MVT::i32, MVT::i16, Expand); 149 setTruncStoreAction(MVT::i32, MVT::i8 , Expand); 150 setTruncStoreAction(MVT::i16, MVT::i8, Expand); 151 152 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 153 154 // SETOEQ and SETUNE require checking two conditions. 155 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand); 156 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand); 157 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand); 158 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand); 159 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); 160 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand); 161 162 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 163 // operation. 164 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 165 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 166 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 167 168 if (Subtarget.is64Bit()) { 169 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) 170 // f32/f64 are legal, f80 is custom. 171 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); 172 else 173 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 174 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); 175 } else if (!Subtarget.useSoftFloat()) { 176 // We have an algorithm for SSE2->double, and we turn this into a 177 // 64-bit FILD followed by conditional FADD for other targets. 178 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); 179 // We have an algorithm for SSE2, and we turn this into a 64-bit 180 // FILD or VCVTUSI2SS/SD for other targets. 181 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); 182 } 183 184 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 185 // this operation. 186 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 187 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 188 189 if (!Subtarget.useSoftFloat()) { 190 // SSE has no i16 to fp conversion, only i32 191 if (X86ScalarSSEf32) { 192 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 193 // f32 and f64 cases are Legal, f80 case is not 194 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 195 } else { 196 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 197 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 198 } 199 } else { 200 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 201 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote); 202 } 203 204 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 205 // this operation. 206 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 207 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 208 209 if (!Subtarget.useSoftFloat()) { 210 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 211 // are Legal, f80 is custom lowered. 212 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 213 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 214 215 if (X86ScalarSSEf32) { 216 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 217 // f32 and f64 cases are Legal, f80 case is not 218 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 219 } else { 220 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 221 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 222 } 223 } else { 224 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 225 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand); 226 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand); 227 } 228 229 // Handle FP_TO_UINT by promoting the destination to a larger signed 230 // conversion. 231 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 232 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 233 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 234 235 if (Subtarget.is64Bit()) { 236 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { 237 // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80. 238 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); 239 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); 240 } else { 241 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 242 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 243 } 244 } else if (!Subtarget.useSoftFloat()) { 245 // Since AVX is a superset of SSE3, only check for SSE here. 246 if (Subtarget.hasSSE1() && !Subtarget.hasSSE3()) 247 // Expand FP_TO_UINT into a select. 248 // FIXME: We would like to use a Custom expander here eventually to do 249 // the optimal thing for SSE vs. the default expansion in the legalizer. 250 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 251 else 252 // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom. 253 // With SSE3 we can use fisttpll to convert to a signed i64; without 254 // SSE, we're stuck with a fistpll. 255 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); 256 257 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); 258 } 259 260 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 261 if (!X86ScalarSSEf64) { 262 setOperationAction(ISD::BITCAST , MVT::f32 , Expand); 263 setOperationAction(ISD::BITCAST , MVT::i32 , Expand); 264 if (Subtarget.is64Bit()) { 265 setOperationAction(ISD::BITCAST , MVT::f64 , Expand); 266 // Without SSE, i64->f64 goes through memory. 267 setOperationAction(ISD::BITCAST , MVT::i64 , Expand); 268 } 269 } else if (!Subtarget.is64Bit()) 270 setOperationAction(ISD::BITCAST , MVT::i64 , Custom); 271 272 // Scalar integer divide and remainder are lowered to use operations that 273 // produce two results, to match the available instructions. This exposes 274 // the two-result form to trivial CSE, which is able to combine x/y and x%y 275 // into a single instruction. 276 // 277 // Scalar integer multiply-high is also lowered to use two-result 278 // operations, to match the available instructions. However, plain multiply 279 // (low) operations are left as Legal, as there are single-result 280 // instructions for this in x86. Using the two-result multiply instructions 281 // when both high and low results are needed must be arranged by dagcombine. 282 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { 283 setOperationAction(ISD::MULHS, VT, Expand); 284 setOperationAction(ISD::MULHU, VT, Expand); 285 setOperationAction(ISD::SDIV, VT, Expand); 286 setOperationAction(ISD::UDIV, VT, Expand); 287 setOperationAction(ISD::SREM, VT, Expand); 288 setOperationAction(ISD::UREM, VT, Expand); 289 290 // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences. 291 setOperationAction(ISD::ADDC, VT, Custom); 292 setOperationAction(ISD::ADDE, VT, Custom); 293 setOperationAction(ISD::SUBC, VT, Custom); 294 setOperationAction(ISD::SUBE, VT, Custom); 295 } 296 297 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 298 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 299 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128, 300 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { 301 setOperationAction(ISD::BR_CC, VT, Expand); 302 setOperationAction(ISD::SELECT_CC, VT, Expand); 303 } 304 if (Subtarget.is64Bit()) 305 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 306 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); 307 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 308 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 309 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 310 311 setOperationAction(ISD::FREM , MVT::f32 , Expand); 312 setOperationAction(ISD::FREM , MVT::f64 , Expand); 313 setOperationAction(ISD::FREM , MVT::f80 , Expand); 314 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); 315 316 // Promote the i8 variants and force them on up to i32 which has a shorter 317 // encoding. 318 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32); 319 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32); 320 if (!Subtarget.hasBMI()) { 321 setOperationAction(ISD::CTTZ , MVT::i16 , Custom); 322 setOperationAction(ISD::CTTZ , MVT::i32 , Custom); 323 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal); 324 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal); 325 if (Subtarget.is64Bit()) { 326 setOperationAction(ISD::CTTZ , MVT::i64 , Custom); 327 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal); 328 } 329 } 330 331 if (Subtarget.hasLZCNT()) { 332 // When promoting the i8 variants, force them to i32 for a shorter 333 // encoding. 334 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32); 335 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32); 336 } else { 337 setOperationAction(ISD::CTLZ , MVT::i8 , Custom); 338 setOperationAction(ISD::CTLZ , MVT::i16 , Custom); 339 setOperationAction(ISD::CTLZ , MVT::i32 , Custom); 340 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom); 341 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom); 342 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom); 343 if (Subtarget.is64Bit()) { 344 setOperationAction(ISD::CTLZ , MVT::i64 , Custom); 345 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); 346 } 347 } 348 349 // Special handling for half-precision floating point conversions. 350 // If we don't have F16C support, then lower half float conversions 351 // into library calls. 352 if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) { 353 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); 354 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); 355 } 356 357 // There's never any support for operations beyond MVT::f32. 358 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); 359 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand); 360 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); 361 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand); 362 363 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 364 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 365 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand); 366 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 367 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 368 setTruncStoreAction(MVT::f80, MVT::f16, Expand); 369 370 if (Subtarget.hasPOPCNT()) { 371 setOperationAction(ISD::CTPOP , MVT::i8 , Promote); 372 } else { 373 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 374 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 375 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 376 if (Subtarget.is64Bit()) 377 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 378 } 379 380 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 381 382 if (!Subtarget.hasMOVBE()) 383 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 384 385 // These should be promoted to a larger select which is supported. 386 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 387 // X86 wants to expand cmov itself. 388 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) { 389 setOperationAction(ISD::SELECT, VT, Custom); 390 setOperationAction(ISD::SETCC, VT, Custom); 391 } 392 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { 393 if (VT == MVT::i64 && !Subtarget.is64Bit()) 394 continue; 395 setOperationAction(ISD::SELECT, VT, Custom); 396 setOperationAction(ISD::SETCC, VT, Custom); 397 setOperationAction(ISD::SETCCE, VT, Custom); 398 } 399 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); 400 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support 401 // SjLj exception handling but a light-weight setjmp/longjmp replacement to 402 // support continuation, user-level threading, and etc.. As a result, no 403 // other SjLj exception interfaces are implemented and please don't build 404 // your own exception handling based on them. 405 // LLVM/Clang supports zero-cost DWARF exception handling. 406 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 407 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 408 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); 409 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj) 410 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); 411 412 // Darwin ABI issue. 413 for (auto VT : { MVT::i32, MVT::i64 }) { 414 if (VT == MVT::i64 && !Subtarget.is64Bit()) 415 continue; 416 setOperationAction(ISD::ConstantPool , VT, Custom); 417 setOperationAction(ISD::JumpTable , VT, Custom); 418 setOperationAction(ISD::GlobalAddress , VT, Custom); 419 setOperationAction(ISD::GlobalTLSAddress, VT, Custom); 420 setOperationAction(ISD::ExternalSymbol , VT, Custom); 421 setOperationAction(ISD::BlockAddress , VT, Custom); 422 } 423 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 424 for (auto VT : { MVT::i32, MVT::i64 }) { 425 if (VT == MVT::i64 && !Subtarget.is64Bit()) 426 continue; 427 setOperationAction(ISD::SHL_PARTS, VT, Custom); 428 setOperationAction(ISD::SRA_PARTS, VT, Custom); 429 setOperationAction(ISD::SRL_PARTS, VT, Custom); 430 } 431 432 if (Subtarget.hasSSE1()) 433 setOperationAction(ISD::PREFETCH , MVT::Other, Legal); 434 435 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); 436 437 // Expand certain atomics 438 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { 439 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom); 440 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); 441 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom); 442 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom); 443 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom); 444 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom); 445 setOperationAction(ISD::ATOMIC_STORE, VT, Custom); 446 } 447 448 if (Subtarget.hasCmpxchg16b()) { 449 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); 450 } 451 452 // FIXME - use subtarget debug flags 453 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() && 454 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() && 455 TM.Options.ExceptionModel != ExceptionHandling::SjLj) { 456 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); 457 } 458 459 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); 460 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom); 461 462 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); 463 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); 464 465 setOperationAction(ISD::TRAP, MVT::Other, Legal); 466 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 467 468 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 469 setOperationAction(ISD::VASTART , MVT::Other, Custom); 470 setOperationAction(ISD::VAEND , MVT::Other, Expand); 471 bool Is64Bit = Subtarget.is64Bit(); 472 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand); 473 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand); 474 475 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 476 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 477 478 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); 479 480 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering. 481 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom); 482 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom); 483 484 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) { 485 // f32 and f64 use SSE. 486 // Set up the FP register classes. 487 addRegisterClass(MVT::f32, &X86::FR32RegClass); 488 addRegisterClass(MVT::f64, &X86::FR64RegClass); 489 490 for (auto VT : { MVT::f32, MVT::f64 }) { 491 // Use ANDPD to simulate FABS. 492 setOperationAction(ISD::FABS, VT, Custom); 493 494 // Use XORP to simulate FNEG. 495 setOperationAction(ISD::FNEG, VT, Custom); 496 497 // Use ANDPD and ORPD to simulate FCOPYSIGN. 498 setOperationAction(ISD::FCOPYSIGN, VT, Custom); 499 500 // We don't support sin/cos/fmod 501 setOperationAction(ISD::FSIN , VT, Expand); 502 setOperationAction(ISD::FCOS , VT, Expand); 503 setOperationAction(ISD::FSINCOS, VT, Expand); 504 } 505 506 // Lower this to MOVMSK plus an AND. 507 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom); 508 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom); 509 510 // Expand FP immediates into loads from the stack, except for the special 511 // cases we handle. 512 addLegalFPImmediate(APFloat(+0.0)); // xorpd 513 addLegalFPImmediate(APFloat(+0.0f)); // xorps 514 } else if (UseX87 && X86ScalarSSEf32) { 515 // Use SSE for f32, x87 for f64. 516 // Set up the FP register classes. 517 addRegisterClass(MVT::f32, &X86::FR32RegClass); 518 addRegisterClass(MVT::f64, &X86::RFP64RegClass); 519 520 // Use ANDPS to simulate FABS. 521 setOperationAction(ISD::FABS , MVT::f32, Custom); 522 523 // Use XORP to simulate FNEG. 524 setOperationAction(ISD::FNEG , MVT::f32, Custom); 525 526 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 527 528 // Use ANDPS and ORPS to simulate FCOPYSIGN. 529 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 530 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 531 532 // We don't support sin/cos/fmod 533 setOperationAction(ISD::FSIN , MVT::f32, Expand); 534 setOperationAction(ISD::FCOS , MVT::f32, Expand); 535 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 536 537 // Special cases we handle for FP constants. 538 addLegalFPImmediate(APFloat(+0.0f)); // xorps 539 addLegalFPImmediate(APFloat(+0.0)); // FLD0 540 addLegalFPImmediate(APFloat(+1.0)); // FLD1 541 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 542 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 543 544 if (!TM.Options.UnsafeFPMath) { 545 setOperationAction(ISD::FSIN , MVT::f64, Expand); 546 setOperationAction(ISD::FCOS , MVT::f64, Expand); 547 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 548 } 549 } else if (UseX87) { 550 // f32 and f64 in x87. 551 // Set up the FP register classes. 552 addRegisterClass(MVT::f64, &X86::RFP64RegClass); 553 addRegisterClass(MVT::f32, &X86::RFP32RegClass); 554 555 for (auto VT : { MVT::f32, MVT::f64 }) { 556 setOperationAction(ISD::UNDEF, VT, Expand); 557 setOperationAction(ISD::FCOPYSIGN, VT, Expand); 558 559 if (!TM.Options.UnsafeFPMath) { 560 setOperationAction(ISD::FSIN , VT, Expand); 561 setOperationAction(ISD::FCOS , VT, Expand); 562 setOperationAction(ISD::FSINCOS, VT, Expand); 563 } 564 } 565 addLegalFPImmediate(APFloat(+0.0)); // FLD0 566 addLegalFPImmediate(APFloat(+1.0)); // FLD1 567 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 568 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 569 addLegalFPImmediate(APFloat(+0.0f)); // FLD0 570 addLegalFPImmediate(APFloat(+1.0f)); // FLD1 571 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS 572 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS 573 } 574 575 // We don't support FMA. 576 setOperationAction(ISD::FMA, MVT::f64, Expand); 577 setOperationAction(ISD::FMA, MVT::f32, Expand); 578 579 // Long double always uses X87, except f128 in MMX. 580 if (UseX87) { 581 if (Subtarget.is64Bit() && Subtarget.hasMMX()) { 582 addRegisterClass(MVT::f128, &X86::FR128RegClass); 583 ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); 584 setOperationAction(ISD::FABS , MVT::f128, Custom); 585 setOperationAction(ISD::FNEG , MVT::f128, Custom); 586 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom); 587 } 588 589 addRegisterClass(MVT::f80, &X86::RFP80RegClass); 590 setOperationAction(ISD::UNDEF, MVT::f80, Expand); 591 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); 592 { 593 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended); 594 addLegalFPImmediate(TmpFlt); // FLD0 595 TmpFlt.changeSign(); 596 addLegalFPImmediate(TmpFlt); // FLD0/FCHS 597 598 bool ignored; 599 APFloat TmpFlt2(+1.0); 600 TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven, 601 &ignored); 602 addLegalFPImmediate(TmpFlt2); // FLD1 603 TmpFlt2.changeSign(); 604 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS 605 } 606 607 if (!TM.Options.UnsafeFPMath) { 608 setOperationAction(ISD::FSIN , MVT::f80, Expand); 609 setOperationAction(ISD::FCOS , MVT::f80, Expand); 610 setOperationAction(ISD::FSINCOS, MVT::f80, Expand); 611 } 612 613 setOperationAction(ISD::FFLOOR, MVT::f80, Expand); 614 setOperationAction(ISD::FCEIL, MVT::f80, Expand); 615 setOperationAction(ISD::FTRUNC, MVT::f80, Expand); 616 setOperationAction(ISD::FRINT, MVT::f80, Expand); 617 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand); 618 setOperationAction(ISD::FMA, MVT::f80, Expand); 619 } 620 621 // Always use a library call for pow. 622 setOperationAction(ISD::FPOW , MVT::f32 , Expand); 623 setOperationAction(ISD::FPOW , MVT::f64 , Expand); 624 setOperationAction(ISD::FPOW , MVT::f80 , Expand); 625 626 setOperationAction(ISD::FLOG, MVT::f80, Expand); 627 setOperationAction(ISD::FLOG2, MVT::f80, Expand); 628 setOperationAction(ISD::FLOG10, MVT::f80, Expand); 629 setOperationAction(ISD::FEXP, MVT::f80, Expand); 630 setOperationAction(ISD::FEXP2, MVT::f80, Expand); 631 setOperationAction(ISD::FMINNUM, MVT::f80, Expand); 632 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand); 633 634 // Some FP actions are always expanded for vector types. 635 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32, 636 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) { 637 setOperationAction(ISD::FSIN, VT, Expand); 638 setOperationAction(ISD::FSINCOS, VT, Expand); 639 setOperationAction(ISD::FCOS, VT, Expand); 640 setOperationAction(ISD::FREM, VT, Expand); 641 setOperationAction(ISD::FPOWI, VT, Expand); 642 setOperationAction(ISD::FCOPYSIGN, VT, Expand); 643 setOperationAction(ISD::FPOW, VT, Expand); 644 setOperationAction(ISD::FLOG, VT, Expand); 645 setOperationAction(ISD::FLOG2, VT, Expand); 646 setOperationAction(ISD::FLOG10, VT, Expand); 647 setOperationAction(ISD::FEXP, VT, Expand); 648 setOperationAction(ISD::FEXP2, VT, Expand); 649 } 650 651 // First set operation action for all vector types to either promote 652 // (for widening) or expand (for scalarization). Then we will selectively 653 // turn on ones that can be effectively codegen'd. 654 for (MVT VT : MVT::vector_valuetypes()) { 655 setOperationAction(ISD::SDIV, VT, Expand); 656 setOperationAction(ISD::UDIV, VT, Expand); 657 setOperationAction(ISD::SREM, VT, Expand); 658 setOperationAction(ISD::UREM, VT, Expand); 659 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand); 660 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 661 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand); 662 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand); 663 setOperationAction(ISD::FMA, VT, Expand); 664 setOperationAction(ISD::FFLOOR, VT, Expand); 665 setOperationAction(ISD::FCEIL, VT, Expand); 666 setOperationAction(ISD::FTRUNC, VT, Expand); 667 setOperationAction(ISD::FRINT, VT, Expand); 668 setOperationAction(ISD::FNEARBYINT, VT, Expand); 669 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 670 setOperationAction(ISD::MULHS, VT, Expand); 671 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 672 setOperationAction(ISD::MULHU, VT, Expand); 673 setOperationAction(ISD::SDIVREM, VT, Expand); 674 setOperationAction(ISD::UDIVREM, VT, Expand); 675 setOperationAction(ISD::CTPOP, VT, Expand); 676 setOperationAction(ISD::CTTZ, VT, Expand); 677 setOperationAction(ISD::CTLZ, VT, Expand); 678 setOperationAction(ISD::ROTL, VT, Expand); 679 setOperationAction(ISD::ROTR, VT, Expand); 680 setOperationAction(ISD::BSWAP, VT, Expand); 681 setOperationAction(ISD::SETCC, VT, Expand); 682 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 683 setOperationAction(ISD::FP_TO_SINT, VT, Expand); 684 setOperationAction(ISD::UINT_TO_FP, VT, Expand); 685 setOperationAction(ISD::SINT_TO_FP, VT, Expand); 686 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand); 687 setOperationAction(ISD::TRUNCATE, VT, Expand); 688 setOperationAction(ISD::SIGN_EXTEND, VT, Expand); 689 setOperationAction(ISD::ZERO_EXTEND, VT, Expand); 690 setOperationAction(ISD::ANY_EXTEND, VT, Expand); 691 setOperationAction(ISD::SELECT_CC, VT, Expand); 692 for (MVT InnerVT : MVT::vector_valuetypes()) { 693 setTruncStoreAction(InnerVT, VT, Expand); 694 695 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand); 696 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand); 697 698 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like 699 // types, we have to deal with them whether we ask for Expansion or not. 700 // Setting Expand causes its own optimisation problems though, so leave 701 // them legal. 702 if (VT.getVectorElementType() == MVT::i1) 703 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); 704 705 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are 706 // split/scalarized right now. 707 if (VT.getVectorElementType() == MVT::f16) 708 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); 709 } 710 } 711 712 // FIXME: In order to prevent SSE instructions being expanded to MMX ones 713 // with -msoft-float, disable use of MMX as well. 714 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) { 715 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass); 716 // No operations on x86mmx supported, everything uses intrinsics. 717 } 718 719 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) { 720 addRegisterClass(MVT::v4f32, &X86::VR128RegClass); 721 722 setOperationAction(ISD::FNEG, MVT::v4f32, Custom); 723 setOperationAction(ISD::FABS, MVT::v4f32, Custom); 724 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 725 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 726 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom); 727 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 728 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 729 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); 730 } 731 732 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { 733 addRegisterClass(MVT::v2f64, &X86::VR128RegClass); 734 735 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM 736 // registers cannot be used even for integer operations. 737 addRegisterClass(MVT::v16i8, &X86::VR128RegClass); 738 addRegisterClass(MVT::v8i16, &X86::VR128RegClass); 739 addRegisterClass(MVT::v4i32, &X86::VR128RegClass); 740 addRegisterClass(MVT::v2i64, &X86::VR128RegClass); 741 742 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 743 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 744 setOperationAction(ISD::MUL, MVT::v2i64, Custom); 745 setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom); 746 setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom); 747 setOperationAction(ISD::MULHU, MVT::v16i8, Custom); 748 setOperationAction(ISD::MULHS, MVT::v16i8, Custom); 749 setOperationAction(ISD::MULHU, MVT::v8i16, Legal); 750 setOperationAction(ISD::MULHS, MVT::v8i16, Legal); 751 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 752 setOperationAction(ISD::FNEG, MVT::v2f64, Custom); 753 setOperationAction(ISD::FABS, MVT::v2f64, Custom); 754 755 setOperationAction(ISD::SMAX, MVT::v8i16, Legal); 756 setOperationAction(ISD::UMAX, MVT::v16i8, Legal); 757 setOperationAction(ISD::SMIN, MVT::v8i16, Legal); 758 setOperationAction(ISD::UMIN, MVT::v16i8, Legal); 759 760 setOperationAction(ISD::SETCC, MVT::v2i64, Custom); 761 setOperationAction(ISD::SETCC, MVT::v16i8, Custom); 762 setOperationAction(ISD::SETCC, MVT::v8i16, Custom); 763 setOperationAction(ISD::SETCC, MVT::v4i32, Custom); 764 765 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 766 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 767 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 768 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 769 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 770 771 setOperationAction(ISD::CTPOP, MVT::v16i8, Custom); 772 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); 773 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); 774 setOperationAction(ISD::CTPOP, MVT::v2i64, Custom); 775 776 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom); 777 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom); 778 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom); 779 // ISD::CTTZ v2i64 - scalarization is faster. 780 781 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 782 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { 783 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 784 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 785 setOperationAction(ISD::VSELECT, VT, Custom); 786 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 787 } 788 789 // We support custom legalizing of sext and anyext loads for specific 790 // memory vector types which we can load as a scalar (or sequence of 791 // scalars) and extend in-register to a legal 128-bit vector type. For sext 792 // loads these must work with a single scalar load. 793 for (MVT VT : MVT::integer_vector_valuetypes()) { 794 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom); 795 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom); 796 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom); 797 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom); 798 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom); 799 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom); 800 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom); 801 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom); 802 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom); 803 } 804 805 for (auto VT : { MVT::v2f64, MVT::v2i64 }) { 806 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 807 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 808 setOperationAction(ISD::VSELECT, VT, Custom); 809 810 if (VT == MVT::v2i64 && !Subtarget.is64Bit()) 811 continue; 812 813 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 814 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 815 } 816 817 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 818 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { 819 setOperationPromotedToType(ISD::AND, VT, MVT::v2i64); 820 setOperationPromotedToType(ISD::OR, VT, MVT::v2i64); 821 setOperationPromotedToType(ISD::XOR, VT, MVT::v2i64); 822 setOperationPromotedToType(ISD::LOAD, VT, MVT::v2i64); 823 setOperationPromotedToType(ISD::SELECT, VT, MVT::v2i64); 824 } 825 826 // Custom lower v2i64 and v2f64 selects. 827 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 828 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 829 830 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 831 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 832 833 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); 834 835 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom); 836 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); 837 // As there is no 64-bit GPR available, we need build a special custom 838 // sequence to convert from v2i32 to v2f32. 839 if (!Subtarget.is64Bit()) 840 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom); 841 842 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); 843 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom); 844 845 for (MVT VT : MVT::fp_vector_valuetypes()) 846 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal); 847 848 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom); 849 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom); 850 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom); 851 852 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom); 853 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom); 854 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom); 855 856 for (auto VT : { MVT::v8i16, MVT::v16i8 }) { 857 setOperationAction(ISD::SRL, VT, Custom); 858 setOperationAction(ISD::SHL, VT, Custom); 859 setOperationAction(ISD::SRA, VT, Custom); 860 } 861 862 // In the customized shift lowering, the legal cases in AVX2 will be 863 // recognized. 864 for (auto VT : { MVT::v4i32, MVT::v2i64 }) { 865 setOperationAction(ISD::SRL, VT, Custom); 866 setOperationAction(ISD::SHL, VT, Custom); 867 setOperationAction(ISD::SRA, VT, Custom); 868 } 869 } 870 871 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) { 872 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom); 873 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom); 874 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom); 875 // ISD::CTLZ v4i32 - scalarization is faster. 876 // ISD::CTLZ v2i64 - scalarization is faster. 877 } 878 879 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) { 880 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { 881 setOperationAction(ISD::FFLOOR, RoundedTy, Legal); 882 setOperationAction(ISD::FCEIL, RoundedTy, Legal); 883 setOperationAction(ISD::FTRUNC, RoundedTy, Legal); 884 setOperationAction(ISD::FRINT, RoundedTy, Legal); 885 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal); 886 } 887 888 setOperationAction(ISD::SMAX, MVT::v16i8, Legal); 889 setOperationAction(ISD::SMAX, MVT::v4i32, Legal); 890 setOperationAction(ISD::UMAX, MVT::v8i16, Legal); 891 setOperationAction(ISD::UMAX, MVT::v4i32, Legal); 892 setOperationAction(ISD::SMIN, MVT::v16i8, Legal); 893 setOperationAction(ISD::SMIN, MVT::v4i32, Legal); 894 setOperationAction(ISD::UMIN, MVT::v8i16, Legal); 895 setOperationAction(ISD::UMIN, MVT::v4i32, Legal); 896 897 // FIXME: Do we need to handle scalar-to-vector here? 898 setOperationAction(ISD::MUL, MVT::v4i32, Legal); 899 900 // We directly match byte blends in the backend as they match the VSELECT 901 // condition form. 902 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); 903 904 // SSE41 brings specific instructions for doing vector sign extend even in 905 // cases where we don't have SRA. 906 for (MVT VT : MVT::integer_vector_valuetypes()) { 907 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom); 908 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom); 909 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom); 910 } 911 912 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X 913 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8, Legal); 914 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Legal); 915 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8, Legal); 916 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal); 917 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal); 918 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal); 919 920 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8, Legal); 921 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Legal); 922 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8, Legal); 923 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal); 924 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal); 925 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal); 926 927 // i8 vectors are custom because the source register and source 928 // source memory operand types are not the same width. 929 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); 930 } 931 932 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) { 933 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, 934 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) 935 setOperationAction(ISD::ROTL, VT, Custom); 936 937 // XOP can efficiently perform BITREVERSE with VPPERM. 938 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) 939 setOperationAction(ISD::BITREVERSE, VT, Custom); 940 941 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, 942 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) 943 setOperationAction(ISD::BITREVERSE, VT, Custom); 944 } 945 946 if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) { 947 bool HasInt256 = Subtarget.hasInt256(); 948 949 addRegisterClass(MVT::v32i8, &X86::VR256RegClass); 950 addRegisterClass(MVT::v16i16, &X86::VR256RegClass); 951 addRegisterClass(MVT::v8i32, &X86::VR256RegClass); 952 addRegisterClass(MVT::v8f32, &X86::VR256RegClass); 953 addRegisterClass(MVT::v4i64, &X86::VR256RegClass); 954 addRegisterClass(MVT::v4f64, &X86::VR256RegClass); 955 956 for (auto VT : { MVT::v8f32, MVT::v4f64 }) { 957 setOperationAction(ISD::FFLOOR, VT, Legal); 958 setOperationAction(ISD::FCEIL, VT, Legal); 959 setOperationAction(ISD::FTRUNC, VT, Legal); 960 setOperationAction(ISD::FRINT, VT, Legal); 961 setOperationAction(ISD::FNEARBYINT, VT, Legal); 962 setOperationAction(ISD::FNEG, VT, Custom); 963 setOperationAction(ISD::FABS, VT, Custom); 964 } 965 966 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted 967 // even though v8i16 is a legal type. 968 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Promote); 969 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Promote); 970 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); 971 972 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote); 973 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); 974 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal); 975 976 setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom); 977 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); 978 979 for (MVT VT : MVT::fp_vector_valuetypes()) 980 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal); 981 982 for (auto VT : { MVT::v32i8, MVT::v16i16 }) { 983 setOperationAction(ISD::SRL, VT, Custom); 984 setOperationAction(ISD::SHL, VT, Custom); 985 setOperationAction(ISD::SRA, VT, Custom); 986 } 987 988 setOperationAction(ISD::SETCC, MVT::v32i8, Custom); 989 setOperationAction(ISD::SETCC, MVT::v16i16, Custom); 990 setOperationAction(ISD::SETCC, MVT::v8i32, Custom); 991 setOperationAction(ISD::SETCC, MVT::v4i64, Custom); 992 993 setOperationAction(ISD::SELECT, MVT::v4f64, Custom); 994 setOperationAction(ISD::SELECT, MVT::v4i64, Custom); 995 setOperationAction(ISD::SELECT, MVT::v8f32, Custom); 996 997 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); 998 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); 999 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom); 1000 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); 1001 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); 1002 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom); 1003 setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom); 1004 setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom); 1005 setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom); 1006 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); 1007 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); 1008 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); 1009 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom); 1010 1011 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { 1012 setOperationAction(ISD::CTPOP, VT, Custom); 1013 setOperationAction(ISD::CTTZ, VT, Custom); 1014 } 1015 1016 // ISD::CTLZ v8i32/v4i64 - scalarization is faster without AVX2 1017 // as we end up splitting the 256-bit vectors. 1018 for (auto VT : { MVT::v32i8, MVT::v16i16 }) 1019 setOperationAction(ISD::CTLZ, VT, Custom); 1020 1021 if (HasInt256) 1022 for (auto VT : { MVT::v8i32, MVT::v4i64 }) 1023 setOperationAction(ISD::CTLZ, VT, Custom); 1024 1025 if (Subtarget.hasAnyFMA()) { 1026 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32, 1027 MVT::v2f64, MVT::v4f64 }) 1028 setOperationAction(ISD::FMA, VT, Legal); 1029 } 1030 1031 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { 1032 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom); 1033 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom); 1034 } 1035 1036 setOperationAction(ISD::MUL, MVT::v4i64, Custom); 1037 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom); 1038 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom); 1039 setOperationAction(ISD::MUL, MVT::v32i8, Custom); 1040 1041 setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom); 1042 setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom); 1043 1044 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom); 1045 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom); 1046 setOperationAction(ISD::MULHU, MVT::v32i8, Custom); 1047 setOperationAction(ISD::MULHS, MVT::v32i8, Custom); 1048 1049 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) { 1050 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom); 1051 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom); 1052 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom); 1053 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom); 1054 } 1055 1056 if (HasInt256) { 1057 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom); 1058 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i32, Custom); 1059 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i16, Custom); 1060 1061 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting 1062 // when we have a 256bit-wide blend with immediate. 1063 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom); 1064 1065 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X 1066 setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal); 1067 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8, Legal); 1068 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i8, Legal); 1069 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i16, Legal); 1070 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i16, Legal); 1071 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i32, Legal); 1072 1073 setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal); 1074 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i8, Legal); 1075 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i8, Legal); 1076 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i16, Legal); 1077 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i16, Legal); 1078 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i32, Legal); 1079 } 1080 1081 // In the customized shift lowering, the legal cases in AVX2 will be 1082 // recognized. 1083 for (auto VT : { MVT::v8i32, MVT::v4i64 }) { 1084 setOperationAction(ISD::SRL, VT, Custom); 1085 setOperationAction(ISD::SHL, VT, Custom); 1086 setOperationAction(ISD::SRA, VT, Custom); 1087 } 1088 1089 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, 1090 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) { 1091 setOperationAction(ISD::MLOAD, VT, Legal); 1092 setOperationAction(ISD::MSTORE, VT, Legal); 1093 } 1094 1095 // Extract subvector is special because the value type 1096 // (result) is 128-bit but the source is 256-bit wide. 1097 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, 1098 MVT::v4f32, MVT::v2f64 }) { 1099 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 1100 } 1101 1102 // Custom lower several nodes for 256-bit types. 1103 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, 1104 MVT::v8f32, MVT::v4f64 }) { 1105 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 1106 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 1107 setOperationAction(ISD::VSELECT, VT, Custom); 1108 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 1109 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 1110 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 1111 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 1112 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 1113 } 1114 1115 if (HasInt256) 1116 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); 1117 1118 // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64. 1119 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) { 1120 setOperationPromotedToType(ISD::AND, VT, MVT::v4i64); 1121 setOperationPromotedToType(ISD::OR, VT, MVT::v4i64); 1122 setOperationPromotedToType(ISD::XOR, VT, MVT::v4i64); 1123 setOperationPromotedToType(ISD::LOAD, VT, MVT::v4i64); 1124 setOperationPromotedToType(ISD::SELECT, VT, MVT::v4i64); 1125 } 1126 } 1127 1128 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { 1129 addRegisterClass(MVT::v16i32, &X86::VR512RegClass); 1130 addRegisterClass(MVT::v16f32, &X86::VR512RegClass); 1131 addRegisterClass(MVT::v8i64, &X86::VR512RegClass); 1132 addRegisterClass(MVT::v8f64, &X86::VR512RegClass); 1133 1134 addRegisterClass(MVT::i1, &X86::VK1RegClass); 1135 addRegisterClass(MVT::v8i1, &X86::VK8RegClass); 1136 addRegisterClass(MVT::v16i1, &X86::VK16RegClass); 1137 1138 for (MVT VT : MVT::fp_vector_valuetypes()) 1139 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal); 1140 1141 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) { 1142 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal); 1143 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal); 1144 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal); 1145 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal); 1146 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal); 1147 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal); 1148 } 1149 setOperationAction(ISD::BR_CC, MVT::i1, Expand); 1150 setOperationAction(ISD::SETCC, MVT::i1, Custom); 1151 setOperationAction(ISD::SETCCE, MVT::i1, Custom); 1152 setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); 1153 setOperationAction(ISD::XOR, MVT::i1, Legal); 1154 setOperationAction(ISD::OR, MVT::i1, Legal); 1155 setOperationAction(ISD::AND, MVT::i1, Legal); 1156 setOperationAction(ISD::SUB, MVT::i1, Custom); 1157 setOperationAction(ISD::ADD, MVT::i1, Custom); 1158 setOperationAction(ISD::MUL, MVT::i1, Custom); 1159 1160 for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i32, MVT::v4i64, MVT::v8i16, 1161 MVT::v16i8, MVT::v16i16, MVT::v32i8, MVT::v16i32, 1162 MVT::v8i64, MVT::v32i16, MVT::v64i8}) { 1163 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); 1164 setLoadExtAction(ISD::SEXTLOAD, VT, MaskVT, Custom); 1165 setLoadExtAction(ISD::ZEXTLOAD, VT, MaskVT, Custom); 1166 setLoadExtAction(ISD::EXTLOAD, VT, MaskVT, Custom); 1167 setTruncStoreAction(VT, MaskVT, Custom); 1168 } 1169 1170 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) { 1171 setOperationAction(ISD::FNEG, VT, Custom); 1172 setOperationAction(ISD::FABS, VT, Custom); 1173 setOperationAction(ISD::FMA, VT, Legal); 1174 } 1175 1176 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal); 1177 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal); 1178 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); 1179 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 1180 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); 1181 setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom); 1182 setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom); 1183 setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote); 1184 setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote); 1185 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); 1186 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); 1187 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 1188 setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom); 1189 setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Custom); 1190 setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal); 1191 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal); 1192 1193 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); 1194 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal); 1195 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal); 1196 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal); 1197 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal); 1198 if (Subtarget.hasVLX()){ 1199 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); 1200 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); 1201 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal); 1202 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal); 1203 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal); 1204 1205 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal); 1206 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal); 1207 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal); 1208 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); 1209 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); 1210 } else { 1211 setOperationAction(ISD::MLOAD, MVT::v8i32, Custom); 1212 setOperationAction(ISD::MLOAD, MVT::v8f32, Custom); 1213 setOperationAction(ISD::MSTORE, MVT::v8i32, Custom); 1214 setOperationAction(ISD::MSTORE, MVT::v8f32, Custom); 1215 } 1216 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); 1217 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); 1218 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); 1219 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i1, Custom); 1220 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i1, Custom); 1221 setOperationAction(ISD::VSELECT, MVT::v8i1, Expand); 1222 setOperationAction(ISD::VSELECT, MVT::v16i1, Expand); 1223 if (Subtarget.hasDQI()) { 1224 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal); 1225 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); 1226 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); 1227 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); 1228 if (Subtarget.hasVLX()) { 1229 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal); 1230 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); 1231 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal); 1232 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); 1233 setOperationAction(ISD::FP_TO_SINT, MVT::v4i64, Legal); 1234 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); 1235 setOperationAction(ISD::FP_TO_UINT, MVT::v4i64, Legal); 1236 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); 1237 } 1238 } 1239 if (Subtarget.hasVLX()) { 1240 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); 1241 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); 1242 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); 1243 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); 1244 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 1245 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 1246 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 1247 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 1248 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom); 1249 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom); 1250 1251 // FIXME. This commands are available on SSE/AVX2, add relevant patterns. 1252 setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i8, Legal); 1253 setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i16, Legal); 1254 setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Legal); 1255 setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i16, Legal); 1256 setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i8, Legal); 1257 setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i16, Legal); 1258 setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i32, Legal); 1259 setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i8, Legal); 1260 setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i16, Legal); 1261 setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i32, Legal); 1262 } 1263 1264 setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom); 1265 setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom); 1266 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom); 1267 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); 1268 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); 1269 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom); 1270 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom); 1271 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); 1272 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); 1273 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom); 1274 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom); 1275 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom); 1276 if (Subtarget.hasDQI()) { 1277 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom); 1278 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom); 1279 } 1280 for (auto VT : { MVT::v16f32, MVT::v8f64 }) { 1281 setOperationAction(ISD::FFLOOR, VT, Legal); 1282 setOperationAction(ISD::FCEIL, VT, Legal); 1283 setOperationAction(ISD::FTRUNC, VT, Legal); 1284 setOperationAction(ISD::FRINT, VT, Legal); 1285 setOperationAction(ISD::FNEARBYINT, VT, Legal); 1286 } 1287 1288 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom); 1289 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom); 1290 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom); 1291 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom); 1292 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom); 1293 1294 setOperationAction(ISD::SETCC, MVT::v16i1, Custom); 1295 setOperationAction(ISD::SETCC, MVT::v8i1, Custom); 1296 1297 setOperationAction(ISD::MUL, MVT::v8i64, Custom); 1298 1299 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom); 1300 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom); 1301 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom); 1302 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom); 1303 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom); 1304 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom); 1305 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom); 1306 setOperationAction(ISD::SELECT, MVT::v8f64, Custom); 1307 setOperationAction(ISD::SELECT, MVT::v8i64, Custom); 1308 setOperationAction(ISD::SELECT, MVT::v16f32, Custom); 1309 setOperationAction(ISD::SELECT, MVT::v16i1, Custom); 1310 setOperationAction(ISD::SELECT, MVT::v8i1, Custom); 1311 1312 setOperationAction(ISD::SMAX, MVT::v16i32, Legal); 1313 setOperationAction(ISD::SMAX, MVT::v8i64, Legal); 1314 setOperationAction(ISD::UMAX, MVT::v16i32, Legal); 1315 setOperationAction(ISD::UMAX, MVT::v8i64, Legal); 1316 setOperationAction(ISD::SMIN, MVT::v16i32, Legal); 1317 setOperationAction(ISD::SMIN, MVT::v8i64, Legal); 1318 setOperationAction(ISD::UMIN, MVT::v16i32, Legal); 1319 setOperationAction(ISD::UMIN, MVT::v8i64, Legal); 1320 1321 setOperationAction(ISD::ADD, MVT::v8i1, Expand); 1322 setOperationAction(ISD::ADD, MVT::v16i1, Expand); 1323 setOperationAction(ISD::SUB, MVT::v8i1, Expand); 1324 setOperationAction(ISD::SUB, MVT::v16i1, Expand); 1325 setOperationAction(ISD::MUL, MVT::v8i1, Expand); 1326 setOperationAction(ISD::MUL, MVT::v16i1, Expand); 1327 1328 setOperationAction(ISD::MUL, MVT::v16i32, Legal); 1329 1330 for (auto VT : { MVT::v16i32, MVT::v8i64 }) { 1331 setOperationAction(ISD::SRL, VT, Custom); 1332 setOperationAction(ISD::SHL, VT, Custom); 1333 setOperationAction(ISD::SRA, VT, Custom); 1334 setOperationAction(ISD::AND, VT, Legal); 1335 setOperationAction(ISD::OR, VT, Legal); 1336 setOperationAction(ISD::XOR, VT, Legal); 1337 setOperationAction(ISD::CTPOP, VT, Custom); 1338 setOperationAction(ISD::CTTZ, VT, Custom); 1339 } 1340 1341 if (Subtarget.hasCDI()) { 1342 setOperationAction(ISD::CTLZ, MVT::v8i64, Legal); 1343 setOperationAction(ISD::CTLZ, MVT::v16i32, Legal); 1344 1345 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom); 1346 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom); 1347 setOperationAction(ISD::CTLZ, MVT::v16i16, Custom); 1348 setOperationAction(ISD::CTLZ, MVT::v32i8, Custom); 1349 1350 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i64, Custom); 1351 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i32, Custom); 1352 1353 if (Subtarget.hasVLX()) { 1354 setOperationAction(ISD::CTLZ, MVT::v4i64, Legal); 1355 setOperationAction(ISD::CTLZ, MVT::v8i32, Legal); 1356 setOperationAction(ISD::CTLZ, MVT::v2i64, Legal); 1357 setOperationAction(ISD::CTLZ, MVT::v4i32, Legal); 1358 } else { 1359 setOperationAction(ISD::CTLZ, MVT::v4i64, Custom); 1360 setOperationAction(ISD::CTLZ, MVT::v8i32, Custom); 1361 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom); 1362 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom); 1363 } 1364 1365 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom); 1366 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom); 1367 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom); 1368 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom); 1369 } // Subtarget.hasCDI() 1370 1371 if (Subtarget.hasDQI()) { 1372 if (Subtarget.hasVLX()) { 1373 setOperationAction(ISD::MUL, MVT::v2i64, Legal); 1374 setOperationAction(ISD::MUL, MVT::v4i64, Legal); 1375 } 1376 setOperationAction(ISD::MUL, MVT::v8i64, Legal); 1377 } 1378 // Custom lower several nodes. 1379 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, 1380 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) { 1381 setOperationAction(ISD::MGATHER, VT, Custom); 1382 setOperationAction(ISD::MSCATTER, VT, Custom); 1383 } 1384 // Extract subvector is special because the value type 1385 // (result) is 256-bit but the source is 512-bit wide. 1386 // 128-bit was made Custom under AVX1. 1387 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, 1388 MVT::v8f32, MVT::v4f64 }) 1389 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 1390 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, 1391 MVT::v16i1, MVT::v32i1, MVT::v64i1 }) 1392 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); 1393 1394 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) { 1395 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 1396 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 1397 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 1398 setOperationAction(ISD::VSELECT, VT, Legal); 1399 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 1400 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 1401 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 1402 setOperationAction(ISD::MLOAD, VT, Legal); 1403 setOperationAction(ISD::MSTORE, VT, Legal); 1404 setOperationAction(ISD::MGATHER, VT, Legal); 1405 setOperationAction(ISD::MSCATTER, VT, Custom); 1406 } 1407 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) { 1408 setOperationPromotedToType(ISD::SELECT, VT, MVT::v8i64); 1409 } 1410 }// has AVX-512 1411 1412 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) { 1413 addRegisterClass(MVT::v32i16, &X86::VR512RegClass); 1414 addRegisterClass(MVT::v64i8, &X86::VR512RegClass); 1415 1416 addRegisterClass(MVT::v32i1, &X86::VK32RegClass); 1417 addRegisterClass(MVT::v64i1, &X86::VK64RegClass); 1418 1419 setOperationAction(ISD::ADD, MVT::v32i1, Expand); 1420 setOperationAction(ISD::ADD, MVT::v64i1, Expand); 1421 setOperationAction(ISD::SUB, MVT::v32i1, Expand); 1422 setOperationAction(ISD::SUB, MVT::v64i1, Expand); 1423 setOperationAction(ISD::MUL, MVT::v32i1, Expand); 1424 setOperationAction(ISD::MUL, MVT::v64i1, Expand); 1425 1426 setOperationAction(ISD::SETCC, MVT::v32i1, Custom); 1427 setOperationAction(ISD::SETCC, MVT::v64i1, Custom); 1428 setOperationAction(ISD::MUL, MVT::v32i16, Legal); 1429 setOperationAction(ISD::MUL, MVT::v64i8, Custom); 1430 setOperationAction(ISD::MULHS, MVT::v32i16, Legal); 1431 setOperationAction(ISD::MULHU, MVT::v32i16, Legal); 1432 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom); 1433 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); 1434 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom); 1435 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom); 1436 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom); 1437 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom); 1438 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Custom); 1439 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Custom); 1440 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom); 1441 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom); 1442 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom); 1443 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom); 1444 setOperationAction(ISD::SELECT, MVT::v32i1, Custom); 1445 setOperationAction(ISD::SELECT, MVT::v64i1, Custom); 1446 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom); 1447 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom); 1448 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom); 1449 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom); 1450 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom); 1451 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom); 1452 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom); 1453 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom); 1454 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom); 1455 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom); 1456 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom); 1457 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom); 1458 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom); 1459 setOperationAction(ISD::VSELECT, MVT::v32i16, Legal); 1460 setOperationAction(ISD::VSELECT, MVT::v64i8, Legal); 1461 setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom); 1462 setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom); 1463 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom); 1464 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i1, Custom); 1465 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i1, Custom); 1466 setOperationAction(ISD::BUILD_VECTOR, MVT::v32i1, Custom); 1467 setOperationAction(ISD::BUILD_VECTOR, MVT::v64i1, Custom); 1468 setOperationAction(ISD::VSELECT, MVT::v32i1, Expand); 1469 setOperationAction(ISD::VSELECT, MVT::v64i1, Expand); 1470 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom); 1471 1472 setOperationAction(ISD::SMAX, MVT::v64i8, Legal); 1473 setOperationAction(ISD::SMAX, MVT::v32i16, Legal); 1474 setOperationAction(ISD::UMAX, MVT::v64i8, Legal); 1475 setOperationAction(ISD::UMAX, MVT::v32i16, Legal); 1476 setOperationAction(ISD::SMIN, MVT::v64i8, Legal); 1477 setOperationAction(ISD::SMIN, MVT::v32i16, Legal); 1478 setOperationAction(ISD::UMIN, MVT::v64i8, Legal); 1479 setOperationAction(ISD::UMIN, MVT::v32i16, Legal); 1480 1481 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal); 1482 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal); 1483 if (Subtarget.hasVLX()) 1484 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); 1485 1486 LegalizeAction Action = Subtarget.hasVLX() ? Legal : Custom; 1487 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) { 1488 setOperationAction(ISD::MLOAD, VT, Action); 1489 setOperationAction(ISD::MSTORE, VT, Action); 1490 } 1491 1492 if (Subtarget.hasCDI()) { 1493 setOperationAction(ISD::CTLZ, MVT::v32i16, Custom); 1494 setOperationAction(ISD::CTLZ, MVT::v64i8, Custom); 1495 } 1496 1497 for (auto VT : { MVT::v64i8, MVT::v32i16 }) { 1498 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 1499 setOperationAction(ISD::VSELECT, VT, Legal); 1500 setOperationAction(ISD::SRL, VT, Custom); 1501 setOperationAction(ISD::SHL, VT, Custom); 1502 setOperationAction(ISD::SRA, VT, Custom); 1503 setOperationAction(ISD::MLOAD, VT, Legal); 1504 setOperationAction(ISD::MSTORE, VT, Legal); 1505 setOperationAction(ISD::CTPOP, VT, Custom); 1506 setOperationAction(ISD::CTTZ, VT, Custom); 1507 1508 setOperationPromotedToType(ISD::AND, VT, MVT::v8i64); 1509 setOperationPromotedToType(ISD::OR, VT, MVT::v8i64); 1510 setOperationPromotedToType(ISD::XOR, VT, MVT::v8i64); 1511 } 1512 1513 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) { 1514 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal); 1515 if (Subtarget.hasVLX()) { 1516 // FIXME. This commands are available on SSE/AVX2, add relevant patterns. 1517 setLoadExtAction(ExtType, MVT::v16i16, MVT::v16i8, Legal); 1518 setLoadExtAction(ExtType, MVT::v8i16, MVT::v8i8, Legal); 1519 } 1520 } 1521 } 1522 1523 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { 1524 addRegisterClass(MVT::v4i1, &X86::VK4RegClass); 1525 addRegisterClass(MVT::v2i1, &X86::VK2RegClass); 1526 1527 setOperationAction(ISD::ADD, MVT::v2i1, Expand); 1528 setOperationAction(ISD::ADD, MVT::v4i1, Expand); 1529 setOperationAction(ISD::SUB, MVT::v2i1, Expand); 1530 setOperationAction(ISD::SUB, MVT::v4i1, Expand); 1531 setOperationAction(ISD::MUL, MVT::v2i1, Expand); 1532 setOperationAction(ISD::MUL, MVT::v4i1, Expand); 1533 1534 setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom); 1535 setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom); 1536 setOperationAction(ISD::SETCC, MVT::v4i1, Custom); 1537 setOperationAction(ISD::SETCC, MVT::v2i1, Custom); 1538 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom); 1539 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom); 1540 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom); 1541 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom); 1542 setOperationAction(ISD::SELECT, MVT::v4i1, Custom); 1543 setOperationAction(ISD::SELECT, MVT::v2i1, Custom); 1544 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom); 1545 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i1, Custom); 1546 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i1, Custom); 1547 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i1, Custom); 1548 setOperationAction(ISD::VSELECT, MVT::v2i1, Expand); 1549 setOperationAction(ISD::VSELECT, MVT::v4i1, Expand); 1550 1551 for (auto VT : { MVT::v4i32, MVT::v8i32 }) { 1552 setOperationAction(ISD::AND, VT, Legal); 1553 setOperationAction(ISD::OR, VT, Legal); 1554 setOperationAction(ISD::XOR, VT, Legal); 1555 } 1556 1557 for (auto VT : { MVT::v2i64, MVT::v4i64 }) { 1558 setOperationAction(ISD::SMAX, VT, Legal); 1559 setOperationAction(ISD::UMAX, VT, Legal); 1560 setOperationAction(ISD::SMIN, VT, Legal); 1561 setOperationAction(ISD::UMIN, VT, Legal); 1562 } 1563 } 1564 1565 // We want to custom lower some of our intrinsics. 1566 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 1567 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 1568 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 1569 if (!Subtarget.is64Bit()) { 1570 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 1571 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 1572 } 1573 1574 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't 1575 // handle type legalization for these operations here. 1576 // 1577 // FIXME: We really should do custom legalization for addition and 1578 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better 1579 // than generic legalization for 64-bit multiplication-with-overflow, though. 1580 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { 1581 if (VT == MVT::i64 && !Subtarget.is64Bit()) 1582 continue; 1583 // Add/Sub/Mul with overflow operations are custom lowered. 1584 setOperationAction(ISD::SADDO, VT, Custom); 1585 setOperationAction(ISD::UADDO, VT, Custom); 1586 setOperationAction(ISD::SSUBO, VT, Custom); 1587 setOperationAction(ISD::USUBO, VT, Custom); 1588 setOperationAction(ISD::SMULO, VT, Custom); 1589 setOperationAction(ISD::UMULO, VT, Custom); 1590 } 1591 1592 if (!Subtarget.is64Bit()) { 1593 // These libcalls are not available in 32-bit. 1594 setLibcallName(RTLIB::SHL_I128, nullptr); 1595 setLibcallName(RTLIB::SRL_I128, nullptr); 1596 setLibcallName(RTLIB::SRA_I128, nullptr); 1597 } 1598 1599 // Combine sin / cos into one node or libcall if possible. 1600 if (Subtarget.hasSinCos()) { 1601 setLibcallName(RTLIB::SINCOS_F32, "sincosf"); 1602 setLibcallName(RTLIB::SINCOS_F64, "sincos"); 1603 if (Subtarget.isTargetDarwin()) { 1604 // For MacOSX, we don't want the normal expansion of a libcall to sincos. 1605 // We want to issue a libcall to __sincos_stret to avoid memory traffic. 1606 setOperationAction(ISD::FSINCOS, MVT::f64, Custom); 1607 setOperationAction(ISD::FSINCOS, MVT::f32, Custom); 1608 } 1609 } 1610 1611 if (Subtarget.isTargetWin64()) { 1612 setOperationAction(ISD::SDIV, MVT::i128, Custom); 1613 setOperationAction(ISD::UDIV, MVT::i128, Custom); 1614 setOperationAction(ISD::SREM, MVT::i128, Custom); 1615 setOperationAction(ISD::UREM, MVT::i128, Custom); 1616 setOperationAction(ISD::SDIVREM, MVT::i128, Custom); 1617 setOperationAction(ISD::UDIVREM, MVT::i128, Custom); 1618 } 1619 1620 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)` 1621 // is. We should promote the value to 64-bits to solve this. 1622 // This is what the CRT headers do - `fmodf` is an inline header 1623 // function casting to f64 and calling `fmod`. 1624 if (Subtarget.is32Bit() && Subtarget.isTargetKnownWindowsMSVC()) 1625 for (ISD::NodeType Op : 1626 {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG, 1627 ISD::FLOG10, ISD::FPOW, ISD::FSIN}) 1628 if (isOperationExpand(Op, MVT::f32)) 1629 setOperationAction(Op, MVT::f32, Promote); 1630 1631 // We have target-specific dag combine patterns for the following nodes: 1632 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 1633 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); 1634 setTargetDAGCombine(ISD::BITCAST); 1635 setTargetDAGCombine(ISD::VSELECT); 1636 setTargetDAGCombine(ISD::SELECT); 1637 setTargetDAGCombine(ISD::SHL); 1638 setTargetDAGCombine(ISD::SRA); 1639 setTargetDAGCombine(ISD::SRL); 1640 setTargetDAGCombine(ISD::OR); 1641 setTargetDAGCombine(ISD::AND); 1642 setTargetDAGCombine(ISD::ADD); 1643 setTargetDAGCombine(ISD::FADD); 1644 setTargetDAGCombine(ISD::FSUB); 1645 setTargetDAGCombine(ISD::FNEG); 1646 setTargetDAGCombine(ISD::FMA); 1647 setTargetDAGCombine(ISD::FMINNUM); 1648 setTargetDAGCombine(ISD::FMAXNUM); 1649 setTargetDAGCombine(ISD::SUB); 1650 setTargetDAGCombine(ISD::LOAD); 1651 setTargetDAGCombine(ISD::MLOAD); 1652 setTargetDAGCombine(ISD::STORE); 1653 setTargetDAGCombine(ISD::MSTORE); 1654 setTargetDAGCombine(ISD::TRUNCATE); 1655 setTargetDAGCombine(ISD::ZERO_EXTEND); 1656 setTargetDAGCombine(ISD::ANY_EXTEND); 1657 setTargetDAGCombine(ISD::SIGN_EXTEND); 1658 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); 1659 setTargetDAGCombine(ISD::SINT_TO_FP); 1660 setTargetDAGCombine(ISD::UINT_TO_FP); 1661 setTargetDAGCombine(ISD::SETCC); 1662 setTargetDAGCombine(ISD::MUL); 1663 setTargetDAGCombine(ISD::XOR); 1664 setTargetDAGCombine(ISD::MSCATTER); 1665 setTargetDAGCombine(ISD::MGATHER); 1666 1667 computeRegisterProperties(Subtarget.getRegisterInfo()); 1668 1669 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores 1670 MaxStoresPerMemsetOptSize = 8; 1671 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores 1672 MaxStoresPerMemcpyOptSize = 4; 1673 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores 1674 MaxStoresPerMemmoveOptSize = 4; 1675 setPrefLoopAlignment(4); // 2^4 bytes. 1676 1677 // An out-of-order CPU can speculatively execute past a predictable branch, 1678 // but a conditional move could be stalled by an expensive earlier operation. 1679 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder(); 1680 EnableExtLdPromotion = true; 1681 setPrefFunctionAlignment(4); // 2^4 bytes. 1682 1683 verifyIntrinsicTables(); 1684 } 1685 1686 // This has so far only been implemented for 64-bit MachO. 1687 bool X86TargetLowering::useLoadStackGuardNode() const { 1688 return Subtarget.isTargetMachO() && Subtarget.is64Bit(); 1689 } 1690 1691 TargetLoweringBase::LegalizeTypeAction 1692 X86TargetLowering::getPreferredVectorAction(EVT VT) const { 1693 if (ExperimentalVectorWideningLegalization && 1694 VT.getVectorNumElements() != 1 && 1695 VT.getVectorElementType().getSimpleVT() != MVT::i1) 1696 return TypeWidenVector; 1697 1698 return TargetLoweringBase::getPreferredVectorAction(VT); 1699 } 1700 1701 EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, 1702 LLVMContext& Context, 1703 EVT VT) const { 1704 if (!VT.isVector()) 1705 return Subtarget.hasAVX512() ? MVT::i1: MVT::i8; 1706 1707 if (VT.isSimple()) { 1708 MVT VVT = VT.getSimpleVT(); 1709 const unsigned NumElts = VVT.getVectorNumElements(); 1710 MVT EltVT = VVT.getVectorElementType(); 1711 if (VVT.is512BitVector()) { 1712 if (Subtarget.hasAVX512()) 1713 if (EltVT == MVT::i32 || EltVT == MVT::i64 || 1714 EltVT == MVT::f32 || EltVT == MVT::f64) 1715 switch(NumElts) { 1716 case 8: return MVT::v8i1; 1717 case 16: return MVT::v16i1; 1718 } 1719 if (Subtarget.hasBWI()) 1720 if (EltVT == MVT::i8 || EltVT == MVT::i16) 1721 switch(NumElts) { 1722 case 32: return MVT::v32i1; 1723 case 64: return MVT::v64i1; 1724 } 1725 } 1726 1727 if (Subtarget.hasBWI() && Subtarget.hasVLX()) 1728 return MVT::getVectorVT(MVT::i1, NumElts); 1729 1730 if (!isTypeLegal(VT) && getTypeAction(Context, VT) == TypePromoteInteger) { 1731 EVT LegalVT = getTypeToTransformTo(Context, VT); 1732 EltVT = LegalVT.getVectorElementType().getSimpleVT(); 1733 } 1734 1735 if (Subtarget.hasVLX() && EltVT.getSizeInBits() >= 32) 1736 switch(NumElts) { 1737 case 2: return MVT::v2i1; 1738 case 4: return MVT::v4i1; 1739 case 8: return MVT::v8i1; 1740 } 1741 } 1742 1743 return VT.changeVectorElementTypeToInteger(); 1744 } 1745 1746 /// Helper for getByValTypeAlignment to determine 1747 /// the desired ByVal argument alignment. 1748 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) { 1749 if (MaxAlign == 16) 1750 return; 1751 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 1752 if (VTy->getBitWidth() == 128) 1753 MaxAlign = 16; 1754 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 1755 unsigned EltAlign = 0; 1756 getMaxByValAlign(ATy->getElementType(), EltAlign); 1757 if (EltAlign > MaxAlign) 1758 MaxAlign = EltAlign; 1759 } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 1760 for (auto *EltTy : STy->elements()) { 1761 unsigned EltAlign = 0; 1762 getMaxByValAlign(EltTy, EltAlign); 1763 if (EltAlign > MaxAlign) 1764 MaxAlign = EltAlign; 1765 if (MaxAlign == 16) 1766 break; 1767 } 1768 } 1769 } 1770 1771 /// Return the desired alignment for ByVal aggregate 1772 /// function arguments in the caller parameter area. For X86, aggregates 1773 /// that contain SSE vectors are placed at 16-byte boundaries while the rest 1774 /// are at 4-byte boundaries. 1775 unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty, 1776 const DataLayout &DL) const { 1777 if (Subtarget.is64Bit()) { 1778 // Max of 8 and alignment of type. 1779 unsigned TyAlign = DL.getABITypeAlignment(Ty); 1780 if (TyAlign > 8) 1781 return TyAlign; 1782 return 8; 1783 } 1784 1785 unsigned Align = 4; 1786 if (Subtarget.hasSSE1()) 1787 getMaxByValAlign(Ty, Align); 1788 return Align; 1789 } 1790 1791 /// Returns the target specific optimal type for load 1792 /// and store operations as a result of memset, memcpy, and memmove 1793 /// lowering. If DstAlign is zero that means it's safe to destination 1794 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 1795 /// means there isn't a need to check it against alignment requirement, 1796 /// probably because the source does not need to be loaded. If 'IsMemset' is 1797 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 1798 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 1799 /// source is constant so it does not need to be loaded. 1800 /// It returns EVT::Other if the type should be determined using generic 1801 /// target-independent logic. 1802 EVT 1803 X86TargetLowering::getOptimalMemOpType(uint64_t Size, 1804 unsigned DstAlign, unsigned SrcAlign, 1805 bool IsMemset, bool ZeroMemset, 1806 bool MemcpyStrSrc, 1807 MachineFunction &MF) const { 1808 const Function *F = MF.getFunction(); 1809 if (!F->hasFnAttribute(Attribute::NoImplicitFloat)) { 1810 if (Size >= 16 && 1811 (!Subtarget.isUnalignedMem16Slow() || 1812 ((DstAlign == 0 || DstAlign >= 16) && 1813 (SrcAlign == 0 || SrcAlign >= 16)))) { 1814 // FIXME: Check if unaligned 32-byte accesses are slow. 1815 if (Size >= 32 && Subtarget.hasAVX()) { 1816 // Although this isn't a well-supported type for AVX1, we'll let 1817 // legalization and shuffle lowering produce the optimal codegen. If we 1818 // choose an optimal type with a vector element larger than a byte, 1819 // getMemsetStores() may create an intermediate splat (using an integer 1820 // multiply) before we splat as a vector. 1821 return MVT::v32i8; 1822 } 1823 if (Subtarget.hasSSE2()) 1824 return MVT::v16i8; 1825 // TODO: Can SSE1 handle a byte vector? 1826 if (Subtarget.hasSSE1()) 1827 return MVT::v4f32; 1828 } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 && 1829 !Subtarget.is64Bit() && Subtarget.hasSSE2()) { 1830 // Do not use f64 to lower memcpy if source is string constant. It's 1831 // better to use i32 to avoid the loads. 1832 // Also, do not use f64 to lower memset unless this is a memset of zeros. 1833 // The gymnastics of splatting a byte value into an XMM register and then 1834 // only using 8-byte stores (because this is a CPU with slow unaligned 1835 // 16-byte accesses) makes that a loser. 1836 return MVT::f64; 1837 } 1838 } 1839 // This is a compromise. If we reach here, unaligned accesses may be slow on 1840 // this target. However, creating smaller, aligned accesses could be even 1841 // slower and would certainly be a lot more code. 1842 if (Subtarget.is64Bit() && Size >= 8) 1843 return MVT::i64; 1844 return MVT::i32; 1845 } 1846 1847 bool X86TargetLowering::isSafeMemOpType(MVT VT) const { 1848 if (VT == MVT::f32) 1849 return X86ScalarSSEf32; 1850 else if (VT == MVT::f64) 1851 return X86ScalarSSEf64; 1852 return true; 1853 } 1854 1855 bool 1856 X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 1857 unsigned, 1858 unsigned, 1859 bool *Fast) const { 1860 if (Fast) { 1861 switch (VT.getSizeInBits()) { 1862 default: 1863 // 8-byte and under are always assumed to be fast. 1864 *Fast = true; 1865 break; 1866 case 128: 1867 *Fast = !Subtarget.isUnalignedMem16Slow(); 1868 break; 1869 case 256: 1870 *Fast = !Subtarget.isUnalignedMem32Slow(); 1871 break; 1872 // TODO: What about AVX-512 (512-bit) accesses? 1873 } 1874 } 1875 // Misaligned accesses of any size are always allowed. 1876 return true; 1877 } 1878 1879 /// Return the entry encoding for a jump table in the 1880 /// current function. The returned value is a member of the 1881 /// MachineJumpTableInfo::JTEntryKind enum. 1882 unsigned X86TargetLowering::getJumpTableEncoding() const { 1883 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF 1884 // symbol. 1885 if (isPositionIndependent() && Subtarget.isPICStyleGOT()) 1886 return MachineJumpTableInfo::EK_Custom32; 1887 1888 // Otherwise, use the normal jump table encoding heuristics. 1889 return TargetLowering::getJumpTableEncoding(); 1890 } 1891 1892 bool X86TargetLowering::useSoftFloat() const { 1893 return Subtarget.useSoftFloat(); 1894 } 1895 1896 const MCExpr * 1897 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 1898 const MachineBasicBlock *MBB, 1899 unsigned uid,MCContext &Ctx) const{ 1900 assert(isPositionIndependent() && Subtarget.isPICStyleGOT()); 1901 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF 1902 // entries. 1903 return MCSymbolRefExpr::create(MBB->getSymbol(), 1904 MCSymbolRefExpr::VK_GOTOFF, Ctx); 1905 } 1906 1907 /// Returns relocation base for the given PIC jumptable. 1908 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, 1909 SelectionDAG &DAG) const { 1910 if (!Subtarget.is64Bit()) 1911 // This doesn't have SDLoc associated with it, but is not really the 1912 // same as a Register. 1913 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), 1914 getPointerTy(DAG.getDataLayout())); 1915 return Table; 1916 } 1917 1918 /// This returns the relocation base for the given PIC jumptable, 1919 /// the same as getPICJumpTableRelocBase, but as an MCExpr. 1920 const MCExpr *X86TargetLowering:: 1921 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, 1922 MCContext &Ctx) const { 1923 // X86-64 uses RIP relative addressing based on the jump table label. 1924 if (Subtarget.isPICStyleRIPRel()) 1925 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); 1926 1927 // Otherwise, the reference is relative to the PIC base. 1928 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); 1929 } 1930 1931 std::pair<const TargetRegisterClass *, uint8_t> 1932 X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, 1933 MVT VT) const { 1934 const TargetRegisterClass *RRC = nullptr; 1935 uint8_t Cost = 1; 1936 switch (VT.SimpleTy) { 1937 default: 1938 return TargetLowering::findRepresentativeClass(TRI, VT); 1939 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: 1940 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass; 1941 break; 1942 case MVT::x86mmx: 1943 RRC = &X86::VR64RegClass; 1944 break; 1945 case MVT::f32: case MVT::f64: 1946 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 1947 case MVT::v4f32: case MVT::v2f64: 1948 case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32: 1949 case MVT::v4f64: 1950 RRC = &X86::VR128RegClass; 1951 break; 1952 } 1953 return std::make_pair(RRC, Cost); 1954 } 1955 1956 unsigned X86TargetLowering::getAddressSpace() const { 1957 if (Subtarget.is64Bit()) 1958 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257; 1959 return 256; 1960 } 1961 1962 Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const { 1963 // glibc has a special slot for the stack guard in tcbhead_t, use it instead 1964 // of the usual global variable (see sysdeps/{i386,x86_64}/nptl/tls.h) 1965 if (!Subtarget.isTargetGlibc()) 1966 return TargetLowering::getIRStackGuard(IRB); 1967 1968 // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs: 1969 // %gs:0x14 on i386 1970 unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14; 1971 unsigned AddressSpace = getAddressSpace(); 1972 return ConstantExpr::getIntToPtr( 1973 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset), 1974 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace)); 1975 } 1976 1977 void X86TargetLowering::insertSSPDeclarations(Module &M) const { 1978 // MSVC CRT provides functionalities for stack protection. 1979 if (Subtarget.getTargetTriple().isOSMSVCRT()) { 1980 // MSVC CRT has a global variable holding security cookie. 1981 M.getOrInsertGlobal("__security_cookie", 1982 Type::getInt8PtrTy(M.getContext())); 1983 1984 // MSVC CRT has a function to validate security cookie. 1985 auto *SecurityCheckCookie = cast<Function>( 1986 M.getOrInsertFunction("__security_check_cookie", 1987 Type::getVoidTy(M.getContext()), 1988 Type::getInt8PtrTy(M.getContext()), nullptr)); 1989 SecurityCheckCookie->setCallingConv(CallingConv::X86_FastCall); 1990 SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg); 1991 return; 1992 } 1993 // glibc has a special slot for the stack guard. 1994 if (Subtarget.isTargetGlibc()) 1995 return; 1996 TargetLowering::insertSSPDeclarations(M); 1997 } 1998 1999 Value *X86TargetLowering::getSDagStackGuard(const Module &M) const { 2000 // MSVC CRT has a global variable holding security cookie. 2001 if (Subtarget.getTargetTriple().isOSMSVCRT()) 2002 return M.getGlobalVariable("__security_cookie"); 2003 return TargetLowering::getSDagStackGuard(M); 2004 } 2005 2006 Value *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const { 2007 // MSVC CRT has a function to validate security cookie. 2008 if (Subtarget.getTargetTriple().isOSMSVCRT()) 2009 return M.getFunction("__security_check_cookie"); 2010 return TargetLowering::getSSPStackGuardCheck(M); 2011 } 2012 2013 Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const { 2014 if (!Subtarget.isTargetAndroid()) 2015 return TargetLowering::getSafeStackPointerLocation(IRB); 2016 2017 // Android provides a fixed TLS slot for the SafeStack pointer. See the 2018 // definition of TLS_SLOT_SAFESTACK in 2019 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h 2020 unsigned AddressSpace, Offset; 2021 2022 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs: 2023 // %gs:0x24 on i386 2024 Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24; 2025 AddressSpace = getAddressSpace(); 2026 return ConstantExpr::getIntToPtr( 2027 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset), 2028 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace)); 2029 } 2030 2031 bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, 2032 unsigned DestAS) const { 2033 assert(SrcAS != DestAS && "Expected different address spaces!"); 2034 2035 return SrcAS < 256 && DestAS < 256; 2036 } 2037 2038 //===----------------------------------------------------------------------===// 2039 // Return Value Calling Convention Implementation 2040 //===----------------------------------------------------------------------===// 2041 2042 #include "X86GenCallingConv.inc" 2043 2044 bool X86TargetLowering::CanLowerReturn( 2045 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, 2046 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 2047 SmallVector<CCValAssign, 16> RVLocs; 2048 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); 2049 return CCInfo.CheckReturn(Outs, RetCC_X86); 2050 } 2051 2052 const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const { 2053 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 }; 2054 return ScratchRegs; 2055 } 2056 2057 SDValue 2058 X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 2059 bool isVarArg, 2060 const SmallVectorImpl<ISD::OutputArg> &Outs, 2061 const SmallVectorImpl<SDValue> &OutVals, 2062 const SDLoc &dl, SelectionDAG &DAG) const { 2063 MachineFunction &MF = DAG.getMachineFunction(); 2064 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 2065 2066 if (CallConv == CallingConv::X86_INTR && !Outs.empty()) 2067 report_fatal_error("X86 interrupts may not return any value"); 2068 2069 SmallVector<CCValAssign, 16> RVLocs; 2070 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext()); 2071 CCInfo.AnalyzeReturn(Outs, RetCC_X86); 2072 2073 SDValue Flag; 2074 SmallVector<SDValue, 6> RetOps; 2075 RetOps.push_back(Chain); // Operand #0 = Chain (updated below) 2076 // Operand #1 = Bytes To Pop 2077 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl, 2078 MVT::i32)); 2079 2080 // Copy the result values into the output registers. 2081 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 2082 CCValAssign &VA = RVLocs[i]; 2083 assert(VA.isRegLoc() && "Can only return in registers!"); 2084 SDValue ValToCopy = OutVals[i]; 2085 EVT ValVT = ValToCopy.getValueType(); 2086 2087 // Promote values to the appropriate types. 2088 if (VA.getLocInfo() == CCValAssign::SExt) 2089 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy); 2090 else if (VA.getLocInfo() == CCValAssign::ZExt) 2091 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy); 2092 else if (VA.getLocInfo() == CCValAssign::AExt) { 2093 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) 2094 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy); 2095 else 2096 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy); 2097 } 2098 else if (VA.getLocInfo() == CCValAssign::BCvt) 2099 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy); 2100 2101 assert(VA.getLocInfo() != CCValAssign::FPExt && 2102 "Unexpected FP-extend for return value."); 2103 2104 // If this is x86-64, and we disabled SSE, we can't return FP values, 2105 // or SSE or MMX vectors. 2106 if ((ValVT == MVT::f32 || ValVT == MVT::f64 || 2107 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) && 2108 (Subtarget.is64Bit() && !Subtarget.hasSSE1())) { 2109 report_fatal_error("SSE register return with SSE disabled"); 2110 } 2111 // Likewise we can't return F64 values with SSE1 only. gcc does so, but 2112 // llvm-gcc has never done it right and no one has noticed, so this 2113 // should be OK for now. 2114 if (ValVT == MVT::f64 && 2115 (Subtarget.is64Bit() && !Subtarget.hasSSE2())) 2116 report_fatal_error("SSE2 register return with SSE2 disabled"); 2117 2118 // Returns in ST0/ST1 are handled specially: these are pushed as operands to 2119 // the RET instruction and handled by the FP Stackifier. 2120 if (VA.getLocReg() == X86::FP0 || 2121 VA.getLocReg() == X86::FP1) { 2122 // If this is a copy from an xmm register to ST(0), use an FPExtend to 2123 // change the value to the FP stack register class. 2124 if (isScalarFPTypeInSSEReg(VA.getValVT())) 2125 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy); 2126 RetOps.push_back(ValToCopy); 2127 // Don't emit a copytoreg. 2128 continue; 2129 } 2130 2131 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 2132 // which is returned in RAX / RDX. 2133 if (Subtarget.is64Bit()) { 2134 if (ValVT == MVT::x86mmx) { 2135 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { 2136 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy); 2137 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, 2138 ValToCopy); 2139 // If we don't have SSE2 available, convert to v4f32 so the generated 2140 // register is legal. 2141 if (!Subtarget.hasSSE2()) 2142 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy); 2143 } 2144 } 2145 } 2146 2147 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag); 2148 Flag = Chain.getValue(1); 2149 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2150 } 2151 2152 // Swift calling convention does not require we copy the sret argument 2153 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift. 2154 2155 // All x86 ABIs require that for returning structs by value we copy 2156 // the sret argument into %rax/%eax (depending on ABI) for the return. 2157 // We saved the argument into a virtual register in the entry block, 2158 // so now we copy the value out and into %rax/%eax. 2159 // 2160 // Checking Function.hasStructRetAttr() here is insufficient because the IR 2161 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is 2162 // false, then an sret argument may be implicitly inserted in the SelDAG. In 2163 // either case FuncInfo->setSRetReturnReg() will have been called. 2164 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) { 2165 // When we have both sret and another return value, we should use the 2166 // original Chain stored in RetOps[0], instead of the current Chain updated 2167 // in the above loop. If we only have sret, RetOps[0] equals to Chain. 2168 2169 // For the case of sret and another return value, we have 2170 // Chain_0 at the function entry 2171 // Chain_1 = getCopyToReg(Chain_0) in the above loop 2172 // If we use Chain_1 in getCopyFromReg, we will have 2173 // Val = getCopyFromReg(Chain_1) 2174 // Chain_2 = getCopyToReg(Chain_1, Val) from below 2175 2176 // getCopyToReg(Chain_0) will be glued together with 2177 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be 2178 // in Unit B, and we will have cyclic dependency between Unit A and Unit B: 2179 // Data dependency from Unit B to Unit A due to usage of Val in 2180 // getCopyToReg(Chain_1, Val) 2181 // Chain dependency from Unit A to Unit B 2182 2183 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg. 2184 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg, 2185 getPointerTy(MF.getDataLayout())); 2186 2187 unsigned RetValReg 2188 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ? 2189 X86::RAX : X86::EAX; 2190 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag); 2191 Flag = Chain.getValue(1); 2192 2193 // RAX/EAX now acts like a return value. 2194 RetOps.push_back( 2195 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); 2196 } 2197 2198 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); 2199 const MCPhysReg *I = 2200 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); 2201 if (I) { 2202 for (; *I; ++I) { 2203 if (X86::GR64RegClass.contains(*I)) 2204 RetOps.push_back(DAG.getRegister(*I, MVT::i64)); 2205 else 2206 llvm_unreachable("Unexpected register class in CSRsViaCopy!"); 2207 } 2208 } 2209 2210 RetOps[0] = Chain; // Update chain. 2211 2212 // Add the flag if we have it. 2213 if (Flag.getNode()) 2214 RetOps.push_back(Flag); 2215 2216 X86ISD::NodeType opcode = X86ISD::RET_FLAG; 2217 if (CallConv == CallingConv::X86_INTR) 2218 opcode = X86ISD::IRET; 2219 return DAG.getNode(opcode, dl, MVT::Other, RetOps); 2220 } 2221 2222 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { 2223 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0)) 2224 return false; 2225 2226 SDValue TCChain = Chain; 2227 SDNode *Copy = *N->use_begin(); 2228 if (Copy->getOpcode() == ISD::CopyToReg) { 2229 // If the copy has a glue operand, we conservatively assume it isn't safe to 2230 // perform a tail call. 2231 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) 2232 return false; 2233 TCChain = Copy->getOperand(0); 2234 } else if (Copy->getOpcode() != ISD::FP_EXTEND) 2235 return false; 2236 2237 bool HasRet = false; 2238 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); 2239 UI != UE; ++UI) { 2240 if (UI->getOpcode() != X86ISD::RET_FLAG) 2241 return false; 2242 // If we are returning more than one value, we can definitely 2243 // not make a tail call see PR19530 2244 if (UI->getNumOperands() > 4) 2245 return false; 2246 if (UI->getNumOperands() == 4 && 2247 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue) 2248 return false; 2249 HasRet = true; 2250 } 2251 2252 if (!HasRet) 2253 return false; 2254 2255 Chain = TCChain; 2256 return true; 2257 } 2258 2259 EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT, 2260 ISD::NodeType ExtendKind) const { 2261 MVT ReturnMVT = MVT::i32; 2262 2263 bool Darwin = Subtarget.getTargetTriple().isOSDarwin(); 2264 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) { 2265 // The ABI does not require i1, i8 or i16 to be extended. 2266 // 2267 // On Darwin, there is code in the wild relying on Clang's old behaviour of 2268 // always extending i8/i16 return values, so keep doing that for now. 2269 // (PR26665). 2270 ReturnMVT = MVT::i8; 2271 } 2272 2273 EVT MinVT = getRegisterType(Context, ReturnMVT); 2274 return VT.bitsLT(MinVT) ? MinVT : VT; 2275 } 2276 2277 /// Lower the result values of a call into the 2278 /// appropriate copies out of appropriate physical registers. 2279 /// 2280 SDValue X86TargetLowering::LowerCallResult( 2281 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, 2282 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 2283 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 2284 2285 // Assign locations to each value returned by this call. 2286 SmallVector<CCValAssign, 16> RVLocs; 2287 bool Is64Bit = Subtarget.is64Bit(); 2288 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 2289 *DAG.getContext()); 2290 CCInfo.AnalyzeCallResult(Ins, RetCC_X86); 2291 2292 // Copy all of the result registers out of their specified physreg. 2293 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 2294 CCValAssign &VA = RVLocs[i]; 2295 EVT CopyVT = VA.getLocVT(); 2296 2297 // If this is x86-64, and we disabled SSE, we can't return FP values 2298 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) && 2299 ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget.hasSSE1())) { 2300 report_fatal_error("SSE register return with SSE disabled"); 2301 } 2302 2303 // If we prefer to use the value in xmm registers, copy it out as f80 and 2304 // use a truncate to move it from fp stack reg to xmm reg. 2305 bool RoundAfterCopy = false; 2306 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) && 2307 isScalarFPTypeInSSEReg(VA.getValVT())) { 2308 if (!Subtarget.hasX87()) 2309 report_fatal_error("X87 register return with X87 disabled"); 2310 CopyVT = MVT::f80; 2311 RoundAfterCopy = (CopyVT != VA.getLocVT()); 2312 } 2313 2314 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), 2315 CopyVT, InFlag).getValue(1); 2316 SDValue Val = Chain.getValue(0); 2317 2318 if (RoundAfterCopy) 2319 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, 2320 // This truncation won't change the value. 2321 DAG.getIntPtrConstant(1, dl)); 2322 2323 if (VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1) 2324 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 2325 2326 InFlag = Chain.getValue(2); 2327 InVals.push_back(Val); 2328 } 2329 2330 return Chain; 2331 } 2332 2333 //===----------------------------------------------------------------------===// 2334 // C & StdCall & Fast Calling Convention implementation 2335 //===----------------------------------------------------------------------===// 2336 // StdCall calling convention seems to be standard for many Windows' API 2337 // routines and around. It differs from C calling convention just a little: 2338 // callee should clean up the stack, not caller. Symbols should be also 2339 // decorated in some fancy way :) It doesn't support any vector arguments. 2340 // For info on fast calling convention see Fast Calling Convention (tail call) 2341 // implementation LowerX86_32FastCCCallTo. 2342 2343 /// CallIsStructReturn - Determines whether a call uses struct return 2344 /// semantics. 2345 enum StructReturnType { 2346 NotStructReturn, 2347 RegStructReturn, 2348 StackStructReturn 2349 }; 2350 static StructReturnType 2351 callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsMCU) { 2352 if (Outs.empty()) 2353 return NotStructReturn; 2354 2355 const ISD::ArgFlagsTy &Flags = Outs[0].Flags; 2356 if (!Flags.isSRet()) 2357 return NotStructReturn; 2358 if (Flags.isInReg() || IsMCU) 2359 return RegStructReturn; 2360 return StackStructReturn; 2361 } 2362 2363 /// Determines whether a function uses struct return semantics. 2364 static StructReturnType 2365 argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsMCU) { 2366 if (Ins.empty()) 2367 return NotStructReturn; 2368 2369 const ISD::ArgFlagsTy &Flags = Ins[0].Flags; 2370 if (!Flags.isSRet()) 2371 return NotStructReturn; 2372 if (Flags.isInReg() || IsMCU) 2373 return RegStructReturn; 2374 return StackStructReturn; 2375 } 2376 2377 /// Make a copy of an aggregate at address specified by "Src" to address 2378 /// "Dst" with size and alignment information specified by the specific 2379 /// parameter attribute. The copy will be passed as a byval function parameter. 2380 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, 2381 SDValue Chain, ISD::ArgFlagsTy Flags, 2382 SelectionDAG &DAG, const SDLoc &dl) { 2383 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); 2384 2385 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 2386 /*isVolatile*/false, /*AlwaysInline=*/true, 2387 /*isTailCall*/false, 2388 MachinePointerInfo(), MachinePointerInfo()); 2389 } 2390 2391 /// Return true if the calling convention is one that we can guarantee TCO for. 2392 static bool canGuaranteeTCO(CallingConv::ID CC) { 2393 return (CC == CallingConv::Fast || CC == CallingConv::GHC || 2394 CC == CallingConv::HiPE || CC == CallingConv::HHVM); 2395 } 2396 2397 /// Return true if we might ever do TCO for calls with this calling convention. 2398 static bool mayTailCallThisCC(CallingConv::ID CC) { 2399 switch (CC) { 2400 // C calling conventions: 2401 case CallingConv::C: 2402 case CallingConv::X86_64_Win64: 2403 case CallingConv::X86_64_SysV: 2404 // Callee pop conventions: 2405 case CallingConv::X86_ThisCall: 2406 case CallingConv::X86_StdCall: 2407 case CallingConv::X86_VectorCall: 2408 case CallingConv::X86_FastCall: 2409 return true; 2410 default: 2411 return canGuaranteeTCO(CC); 2412 } 2413 } 2414 2415 /// Return true if the function is being made into a tailcall target by 2416 /// changing its ABI. 2417 static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { 2418 return GuaranteedTailCallOpt && canGuaranteeTCO(CC); 2419 } 2420 2421 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { 2422 auto Attr = 2423 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls"); 2424 if (!CI->isTailCall() || Attr.getValueAsString() == "true") 2425 return false; 2426 2427 CallSite CS(CI); 2428 CallingConv::ID CalleeCC = CS.getCallingConv(); 2429 if (!mayTailCallThisCC(CalleeCC)) 2430 return false; 2431 2432 return true; 2433 } 2434 2435 SDValue 2436 X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, 2437 const SmallVectorImpl<ISD::InputArg> &Ins, 2438 const SDLoc &dl, SelectionDAG &DAG, 2439 const CCValAssign &VA, 2440 MachineFrameInfo *MFI, unsigned i) const { 2441 // Create the nodes corresponding to a load from this parameter slot. 2442 ISD::ArgFlagsTy Flags = Ins[i].Flags; 2443 bool AlwaysUseMutable = shouldGuaranteeTCO( 2444 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt); 2445 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); 2446 EVT ValVT; 2447 2448 // If value is passed by pointer we have address passed instead of the value 2449 // itself. 2450 bool ExtendedInMem = VA.isExtInLoc() && 2451 VA.getValVT().getScalarType() == MVT::i1; 2452 2453 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem) 2454 ValVT = VA.getLocVT(); 2455 else 2456 ValVT = VA.getValVT(); 2457 2458 // Calculate SP offset of interrupt parameter, re-arrange the slot normally 2459 // taken by a return address. 2460 int Offset = 0; 2461 if (CallConv == CallingConv::X86_INTR) { 2462 const X86Subtarget& Subtarget = 2463 static_cast<const X86Subtarget&>(DAG.getSubtarget()); 2464 // X86 interrupts may take one or two arguments. 2465 // On the stack there will be no return address as in regular call. 2466 // Offset of last argument need to be set to -4/-8 bytes. 2467 // Where offset of the first argument out of two, should be set to 0 bytes. 2468 Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1); 2469 } 2470 2471 // FIXME: For now, all byval parameter objects are marked mutable. This can be 2472 // changed with more analysis. 2473 // In case of tail call optimization mark all arguments mutable. Since they 2474 // could be overwritten by lowering of arguments in case of a tail call. 2475 if (Flags.isByVal()) { 2476 unsigned Bytes = Flags.getByValSize(); 2477 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. 2478 int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable); 2479 // Adjust SP offset of interrupt parameter. 2480 if (CallConv == CallingConv::X86_INTR) { 2481 MFI->setObjectOffset(FI, Offset); 2482 } 2483 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 2484 } else { 2485 int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, 2486 VA.getLocMemOffset(), isImmutable); 2487 2488 // Set SExt or ZExt flag. 2489 if (VA.getLocInfo() == CCValAssign::ZExt) { 2490 MFI->setObjectZExt(FI, true); 2491 } else if (VA.getLocInfo() == CCValAssign::SExt) { 2492 MFI->setObjectSExt(FI, true); 2493 } 2494 2495 // Adjust SP offset of interrupt parameter. 2496 if (CallConv == CallingConv::X86_INTR) { 2497 MFI->setObjectOffset(FI, Offset); 2498 } 2499 2500 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 2501 SDValue Val = DAG.getLoad( 2502 ValVT, dl, Chain, FIN, 2503 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false, 2504 false, false, 0); 2505 return ExtendedInMem ? 2506 DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val; 2507 } 2508 } 2509 2510 // FIXME: Get this from tablegen. 2511 static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv, 2512 const X86Subtarget &Subtarget) { 2513 assert(Subtarget.is64Bit()); 2514 2515 if (Subtarget.isCallingConvWin64(CallConv)) { 2516 static const MCPhysReg GPR64ArgRegsWin64[] = { 2517 X86::RCX, X86::RDX, X86::R8, X86::R9 2518 }; 2519 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64)); 2520 } 2521 2522 static const MCPhysReg GPR64ArgRegs64Bit[] = { 2523 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 2524 }; 2525 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit)); 2526 } 2527 2528 // FIXME: Get this from tablegen. 2529 static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF, 2530 CallingConv::ID CallConv, 2531 const X86Subtarget &Subtarget) { 2532 assert(Subtarget.is64Bit()); 2533 if (Subtarget.isCallingConvWin64(CallConv)) { 2534 // The XMM registers which might contain var arg parameters are shadowed 2535 // in their paired GPR. So we only need to save the GPR to their home 2536 // slots. 2537 // TODO: __vectorcall will change this. 2538 return None; 2539 } 2540 2541 const Function *Fn = MF.getFunction(); 2542 bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat); 2543 bool isSoftFloat = Subtarget.useSoftFloat(); 2544 assert(!(isSoftFloat && NoImplicitFloatOps) && 2545 "SSE register cannot be used when SSE is disabled!"); 2546 if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1()) 2547 // Kernel mode asks for SSE to be disabled, so there are no XMM argument 2548 // registers. 2549 return None; 2550 2551 static const MCPhysReg XMMArgRegs64Bit[] = { 2552 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 2553 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 2554 }; 2555 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit)); 2556 } 2557 2558 SDValue X86TargetLowering::LowerFormalArguments( 2559 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 2560 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 2561 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 2562 MachineFunction &MF = DAG.getMachineFunction(); 2563 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 2564 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); 2565 2566 const Function *Fn = MF.getFunction(); 2567 if (Fn->hasExternalLinkage() && 2568 Subtarget.isTargetCygMing() && 2569 Fn->getName() == "main") 2570 FuncInfo->setForceFramePointer(true); 2571 2572 MachineFrameInfo *MFI = MF.getFrameInfo(); 2573 bool Is64Bit = Subtarget.is64Bit(); 2574 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); 2575 2576 assert(!(isVarArg && canGuaranteeTCO(CallConv)) && 2577 "Var args not supported with calling convention fastcc, ghc or hipe"); 2578 2579 if (CallConv == CallingConv::X86_INTR) { 2580 bool isLegal = Ins.size() == 1 || 2581 (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) || 2582 (!Is64Bit && Ins[1].VT == MVT::i32))); 2583 if (!isLegal) 2584 report_fatal_error("X86 interrupts may take one or two arguments"); 2585 } 2586 2587 // Assign locations to all of the incoming arguments. 2588 SmallVector<CCValAssign, 16> ArgLocs; 2589 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); 2590 2591 // Allocate shadow area for Win64 2592 if (IsWin64) 2593 CCInfo.AllocateStack(32, 8); 2594 2595 CCInfo.AnalyzeFormalArguments(Ins, CC_X86); 2596 2597 unsigned LastVal = ~0U; 2598 SDValue ArgValue; 2599 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2600 CCValAssign &VA = ArgLocs[i]; 2601 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 2602 // places. 2603 assert(VA.getValNo() != LastVal && 2604 "Don't support value assigned to multiple locs yet"); 2605 (void)LastVal; 2606 LastVal = VA.getValNo(); 2607 2608 if (VA.isRegLoc()) { 2609 EVT RegVT = VA.getLocVT(); 2610 const TargetRegisterClass *RC; 2611 if (RegVT == MVT::i32) 2612 RC = &X86::GR32RegClass; 2613 else if (Is64Bit && RegVT == MVT::i64) 2614 RC = &X86::GR64RegClass; 2615 else if (RegVT == MVT::f32) 2616 RC = &X86::FR32RegClass; 2617 else if (RegVT == MVT::f64) 2618 RC = &X86::FR64RegClass; 2619 else if (RegVT == MVT::f128) 2620 RC = &X86::FR128RegClass; 2621 else if (RegVT.is512BitVector()) 2622 RC = &X86::VR512RegClass; 2623 else if (RegVT.is256BitVector()) 2624 RC = &X86::VR256RegClass; 2625 else if (RegVT.is128BitVector()) 2626 RC = &X86::VR128RegClass; 2627 else if (RegVT == MVT::x86mmx) 2628 RC = &X86::VR64RegClass; 2629 else if (RegVT == MVT::i1) 2630 RC = &X86::VK1RegClass; 2631 else if (RegVT == MVT::v8i1) 2632 RC = &X86::VK8RegClass; 2633 else if (RegVT == MVT::v16i1) 2634 RC = &X86::VK16RegClass; 2635 else if (RegVT == MVT::v32i1) 2636 RC = &X86::VK32RegClass; 2637 else if (RegVT == MVT::v64i1) 2638 RC = &X86::VK64RegClass; 2639 else 2640 llvm_unreachable("Unknown argument type!"); 2641 2642 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2643 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 2644 2645 // If this is an 8 or 16-bit value, it is really passed promoted to 32 2646 // bits. Insert an assert[sz]ext to capture this, then truncate to the 2647 // right size. 2648 if (VA.getLocInfo() == CCValAssign::SExt) 2649 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 2650 DAG.getValueType(VA.getValVT())); 2651 else if (VA.getLocInfo() == CCValAssign::ZExt) 2652 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 2653 DAG.getValueType(VA.getValVT())); 2654 else if (VA.getLocInfo() == CCValAssign::BCvt) 2655 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue); 2656 2657 if (VA.isExtInLoc()) { 2658 // Handle MMX values passed in XMM regs. 2659 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1) 2660 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue); 2661 else 2662 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2663 } 2664 } else { 2665 assert(VA.isMemLoc()); 2666 ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i); 2667 } 2668 2669 // If value is passed via pointer - do a load. 2670 if (VA.getLocInfo() == CCValAssign::Indirect) 2671 ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, 2672 MachinePointerInfo(), false, false, false, 0); 2673 2674 InVals.push_back(ArgValue); 2675 } 2676 2677 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2678 // Swift calling convention does not require we copy the sret argument 2679 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift. 2680 if (CallConv == CallingConv::Swift) 2681 continue; 2682 2683 // All x86 ABIs require that for returning structs by value we copy the 2684 // sret argument into %rax/%eax (depending on ABI) for the return. Save 2685 // the argument into a virtual register so that we can access it from the 2686 // return points. 2687 if (Ins[i].Flags.isSRet()) { 2688 unsigned Reg = FuncInfo->getSRetReturnReg(); 2689 if (!Reg) { 2690 MVT PtrTy = getPointerTy(DAG.getDataLayout()); 2691 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); 2692 FuncInfo->setSRetReturnReg(Reg); 2693 } 2694 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]); 2695 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); 2696 break; 2697 } 2698 } 2699 2700 unsigned StackSize = CCInfo.getNextStackOffset(); 2701 // Align stack specially for tail calls. 2702 if (shouldGuaranteeTCO(CallConv, 2703 MF.getTarget().Options.GuaranteedTailCallOpt)) 2704 StackSize = GetAlignedArgumentStackSize(StackSize, DAG); 2705 2706 // If the function takes variable number of arguments, make a frame index for 2707 // the start of the first vararg value... for expansion of llvm.va_start. We 2708 // can skip this if there are no va_start calls. 2709 if (MFI->hasVAStart() && 2710 (Is64Bit || (CallConv != CallingConv::X86_FastCall && 2711 CallConv != CallingConv::X86_ThisCall))) { 2712 FuncInfo->setVarArgsFrameIndex( 2713 MFI->CreateFixedObject(1, StackSize, true)); 2714 } 2715 2716 // Figure out if XMM registers are in use. 2717 assert(!(Subtarget.useSoftFloat() && 2718 Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && 2719 "SSE register cannot be used when SSE is disabled!"); 2720 2721 // 64-bit calling conventions support varargs and register parameters, so we 2722 // have to do extra work to spill them in the prologue. 2723 if (Is64Bit && isVarArg && MFI->hasVAStart()) { 2724 // Find the first unallocated argument registers. 2725 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget); 2726 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget); 2727 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs); 2728 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs); 2729 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) && 2730 "SSE register cannot be used when SSE is disabled!"); 2731 2732 // Gather all the live in physical registers. 2733 SmallVector<SDValue, 6> LiveGPRs; 2734 SmallVector<SDValue, 8> LiveXMMRegs; 2735 SDValue ALVal; 2736 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) { 2737 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass); 2738 LiveGPRs.push_back( 2739 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64)); 2740 } 2741 if (!ArgXMMs.empty()) { 2742 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass); 2743 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8); 2744 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) { 2745 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass); 2746 LiveXMMRegs.push_back( 2747 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32)); 2748 } 2749 } 2750 2751 if (IsWin64) { 2752 // Get to the caller-allocated home save location. Add 8 to account 2753 // for the return address. 2754 int HomeOffset = TFI.getOffsetOfLocalArea() + 8; 2755 FuncInfo->setRegSaveFrameIndex( 2756 MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false)); 2757 // Fixup to set vararg frame on shadow area (4 x i64). 2758 if (NumIntRegs < 4) 2759 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); 2760 } else { 2761 // For X86-64, if there are vararg parameters that are passed via 2762 // registers, then we must store them to their spots on the stack so 2763 // they may be loaded by dereferencing the result of va_next. 2764 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); 2765 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16); 2766 FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject( 2767 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false)); 2768 } 2769 2770 // Store the integer parameter registers. 2771 SmallVector<SDValue, 8> MemOps; 2772 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), 2773 getPointerTy(DAG.getDataLayout())); 2774 unsigned Offset = FuncInfo->getVarArgsGPOffset(); 2775 for (SDValue Val : LiveGPRs) { 2776 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), 2777 RSFIN, DAG.getIntPtrConstant(Offset, dl)); 2778 SDValue Store = 2779 DAG.getStore(Val.getValue(1), dl, Val, FIN, 2780 MachinePointerInfo::getFixedStack( 2781 DAG.getMachineFunction(), 2782 FuncInfo->getRegSaveFrameIndex(), Offset), 2783 false, false, 0); 2784 MemOps.push_back(Store); 2785 Offset += 8; 2786 } 2787 2788 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) { 2789 // Now store the XMM (fp + vector) parameter registers. 2790 SmallVector<SDValue, 12> SaveXMMOps; 2791 SaveXMMOps.push_back(Chain); 2792 SaveXMMOps.push_back(ALVal); 2793 SaveXMMOps.push_back(DAG.getIntPtrConstant( 2794 FuncInfo->getRegSaveFrameIndex(), dl)); 2795 SaveXMMOps.push_back(DAG.getIntPtrConstant( 2796 FuncInfo->getVarArgsFPOffset(), dl)); 2797 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(), 2798 LiveXMMRegs.end()); 2799 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl, 2800 MVT::Other, SaveXMMOps)); 2801 } 2802 2803 if (!MemOps.empty()) 2804 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 2805 } 2806 2807 if (isVarArg && MFI->hasMustTailInVarArgFunc()) { 2808 // Find the largest legal vector type. 2809 MVT VecVT = MVT::Other; 2810 // FIXME: Only some x86_32 calling conventions support AVX512. 2811 if (Subtarget.hasAVX512() && 2812 (Is64Bit || (CallConv == CallingConv::X86_VectorCall || 2813 CallConv == CallingConv::Intel_OCL_BI))) 2814 VecVT = MVT::v16f32; 2815 else if (Subtarget.hasAVX()) 2816 VecVT = MVT::v8f32; 2817 else if (Subtarget.hasSSE2()) 2818 VecVT = MVT::v4f32; 2819 2820 // We forward some GPRs and some vector types. 2821 SmallVector<MVT, 2> RegParmTypes; 2822 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32; 2823 RegParmTypes.push_back(IntVT); 2824 if (VecVT != MVT::Other) 2825 RegParmTypes.push_back(VecVT); 2826 2827 // Compute the set of forwarded registers. The rest are scratch. 2828 SmallVectorImpl<ForwardedRegister> &Forwards = 2829 FuncInfo->getForwardedMustTailRegParms(); 2830 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86); 2831 2832 // Conservatively forward AL on x86_64, since it might be used for varargs. 2833 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) { 2834 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass); 2835 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8)); 2836 } 2837 2838 // Copy all forwards from physical to virtual registers. 2839 for (ForwardedRegister &F : Forwards) { 2840 // FIXME: Can we use a less constrained schedule? 2841 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT); 2842 F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT)); 2843 Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal); 2844 } 2845 } 2846 2847 // Some CCs need callee pop. 2848 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, 2849 MF.getTarget().Options.GuaranteedTailCallOpt)) { 2850 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything. 2851 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) { 2852 // X86 interrupts must pop the error code if present 2853 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 8 : 4); 2854 } else { 2855 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. 2856 // If this is an sret function, the return should pop the hidden pointer. 2857 if (!Is64Bit && !canGuaranteeTCO(CallConv) && 2858 !Subtarget.getTargetTriple().isOSMSVCRT() && 2859 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn) 2860 FuncInfo->setBytesToPopOnReturn(4); 2861 } 2862 2863 if (!Is64Bit) { 2864 // RegSaveFrameIndex is X86-64 only. 2865 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); 2866 if (CallConv == CallingConv::X86_FastCall || 2867 CallConv == CallingConv::X86_ThisCall) 2868 // fastcc functions can't have varargs. 2869 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); 2870 } 2871 2872 FuncInfo->setArgumentStackSize(StackSize); 2873 2874 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) { 2875 EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn()); 2876 if (Personality == EHPersonality::CoreCLR) { 2877 assert(Is64Bit); 2878 // TODO: Add a mechanism to frame lowering that will allow us to indicate 2879 // that we'd prefer this slot be allocated towards the bottom of the frame 2880 // (i.e. near the stack pointer after allocating the frame). Every 2881 // funclet needs a copy of this slot in its (mostly empty) frame, and the 2882 // offset from the bottom of this and each funclet's frame must be the 2883 // same, so the size of funclets' (mostly empty) frames is dictated by 2884 // how far this slot is from the bottom (since they allocate just enough 2885 // space to accommodate holding this slot at the correct offset). 2886 int PSPSymFI = MFI->CreateStackObject(8, 8, /*isSS=*/false); 2887 EHInfo->PSPSymFrameIdx = PSPSymFI; 2888 } 2889 } 2890 2891 return Chain; 2892 } 2893 2894 SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, 2895 SDValue Arg, const SDLoc &dl, 2896 SelectionDAG &DAG, 2897 const CCValAssign &VA, 2898 ISD::ArgFlagsTy Flags) const { 2899 unsigned LocMemOffset = VA.getLocMemOffset(); 2900 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); 2901 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), 2902 StackPtr, PtrOff); 2903 if (Flags.isByVal()) 2904 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); 2905 2906 return DAG.getStore( 2907 Chain, dl, Arg, PtrOff, 2908 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset), 2909 false, false, 0); 2910 } 2911 2912 /// Emit a load of return address if tail call 2913 /// optimization is performed and it is required. 2914 SDValue X86TargetLowering::EmitTailCallLoadRetAddr( 2915 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, 2916 bool Is64Bit, int FPDiff, const SDLoc &dl) const { 2917 // Adjust the Return address stack slot. 2918 EVT VT = getPointerTy(DAG.getDataLayout()); 2919 OutRetAddr = getReturnAddressFrameIndex(DAG); 2920 2921 // Load the "old" Return address. 2922 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(), 2923 false, false, false, 0); 2924 return SDValue(OutRetAddr.getNode(), 1); 2925 } 2926 2927 /// Emit a store of the return address if tail call 2928 /// optimization is performed and it is required (FPDiff!=0). 2929 static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, 2930 SDValue Chain, SDValue RetAddrFrIdx, 2931 EVT PtrVT, unsigned SlotSize, 2932 int FPDiff, const SDLoc &dl) { 2933 // Store the return address to the appropriate stack slot. 2934 if (!FPDiff) return Chain; 2935 // Calculate the new stack slot for the return address. 2936 int NewReturnAddrFI = 2937 MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize, 2938 false); 2939 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT); 2940 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, 2941 MachinePointerInfo::getFixedStack( 2942 DAG.getMachineFunction(), NewReturnAddrFI), 2943 false, false, 0); 2944 return Chain; 2945 } 2946 2947 /// Returns a vector_shuffle mask for an movs{s|d}, movd 2948 /// operation of specified width. 2949 static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1, 2950 SDValue V2) { 2951 unsigned NumElems = VT.getVectorNumElements(); 2952 SmallVector<int, 8> Mask; 2953 Mask.push_back(NumElems); 2954 for (unsigned i = 1; i != NumElems; ++i) 2955 Mask.push_back(i); 2956 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); 2957 } 2958 2959 SDValue 2960 X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 2961 SmallVectorImpl<SDValue> &InVals) const { 2962 SelectionDAG &DAG = CLI.DAG; 2963 SDLoc &dl = CLI.DL; 2964 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 2965 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 2966 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 2967 SDValue Chain = CLI.Chain; 2968 SDValue Callee = CLI.Callee; 2969 CallingConv::ID CallConv = CLI.CallConv; 2970 bool &isTailCall = CLI.IsTailCall; 2971 bool isVarArg = CLI.IsVarArg; 2972 2973 MachineFunction &MF = DAG.getMachineFunction(); 2974 bool Is64Bit = Subtarget.is64Bit(); 2975 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); 2976 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU()); 2977 bool IsSibcall = false; 2978 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>(); 2979 auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls"); 2980 2981 if (CallConv == CallingConv::X86_INTR) 2982 report_fatal_error("X86 interrupts may not be called directly"); 2983 2984 if (Attr.getValueAsString() == "true") 2985 isTailCall = false; 2986 2987 if (Subtarget.isPICStyleGOT() && 2988 !MF.getTarget().Options.GuaranteedTailCallOpt) { 2989 // If we are using a GOT, disable tail calls to external symbols with 2990 // default visibility. Tail calling such a symbol requires using a GOT 2991 // relocation, which forces early binding of the symbol. This breaks code 2992 // that require lazy function symbol resolution. Using musttail or 2993 // GuaranteedTailCallOpt will override this. 2994 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 2995 if (!G || (!G->getGlobal()->hasLocalLinkage() && 2996 G->getGlobal()->hasDefaultVisibility())) 2997 isTailCall = false; 2998 } 2999 3000 bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall(); 3001 if (IsMustTail) { 3002 // Force this to be a tail call. The verifier rules are enough to ensure 3003 // that we can lower this successfully without moving the return address 3004 // around. 3005 isTailCall = true; 3006 } else if (isTailCall) { 3007 // Check if it's really possible to do a tail call. 3008 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 3009 isVarArg, SR != NotStructReturn, 3010 MF.getFunction()->hasStructRetAttr(), CLI.RetTy, 3011 Outs, OutVals, Ins, DAG); 3012 3013 // Sibcalls are automatically detected tailcalls which do not require 3014 // ABI changes. 3015 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall) 3016 IsSibcall = true; 3017 3018 if (isTailCall) 3019 ++NumTailCalls; 3020 } 3021 3022 assert(!(isVarArg && canGuaranteeTCO(CallConv)) && 3023 "Var args not supported with calling convention fastcc, ghc or hipe"); 3024 3025 // Analyze operands of the call, assigning locations to each operand. 3026 SmallVector<CCValAssign, 16> ArgLocs; 3027 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); 3028 3029 // Allocate shadow area for Win64 3030 if (IsWin64) 3031 CCInfo.AllocateStack(32, 8); 3032 3033 CCInfo.AnalyzeCallOperands(Outs, CC_X86); 3034 3035 // Get a count of how many bytes are to be pushed on the stack. 3036 unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); 3037 if (IsSibcall) 3038 // This is a sibcall. The memory operands are available in caller's 3039 // own caller's stack. 3040 NumBytes = 0; 3041 else if (MF.getTarget().Options.GuaranteedTailCallOpt && 3042 canGuaranteeTCO(CallConv)) 3043 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); 3044 3045 int FPDiff = 0; 3046 if (isTailCall && !IsSibcall && !IsMustTail) { 3047 // Lower arguments at fp - stackoffset + fpdiff. 3048 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn(); 3049 3050 FPDiff = NumBytesCallerPushed - NumBytes; 3051 3052 // Set the delta of movement of the returnaddr stackslot. 3053 // But only set if delta is greater than previous delta. 3054 if (FPDiff < X86Info->getTCReturnAddrDelta()) 3055 X86Info->setTCReturnAddrDelta(FPDiff); 3056 } 3057 3058 unsigned NumBytesToPush = NumBytes; 3059 unsigned NumBytesToPop = NumBytes; 3060 3061 // If we have an inalloca argument, all stack space has already been allocated 3062 // for us and be right at the top of the stack. We don't support multiple 3063 // arguments passed in memory when using inalloca. 3064 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) { 3065 NumBytesToPush = 0; 3066 if (!ArgLocs.back().isMemLoc()) 3067 report_fatal_error("cannot use inalloca attribute on a register " 3068 "parameter"); 3069 if (ArgLocs.back().getLocMemOffset() != 0) 3070 report_fatal_error("any parameter with the inalloca attribute must be " 3071 "the only memory argument"); 3072 } 3073 3074 if (!IsSibcall) 3075 Chain = DAG.getCALLSEQ_START( 3076 Chain, DAG.getIntPtrConstant(NumBytesToPush, dl, true), dl); 3077 3078 SDValue RetAddrFrIdx; 3079 // Load return address for tail calls. 3080 if (isTailCall && FPDiff) 3081 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, 3082 Is64Bit, FPDiff, dl); 3083 3084 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 3085 SmallVector<SDValue, 8> MemOpChains; 3086 SDValue StackPtr; 3087 3088 // Walk the register/memloc assignments, inserting copies/loads. In the case 3089 // of tail call optimization arguments are handle later. 3090 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 3091 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 3092 // Skip inalloca arguments, they have already been written. 3093 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3094 if (Flags.isInAlloca()) 3095 continue; 3096 3097 CCValAssign &VA = ArgLocs[i]; 3098 EVT RegVT = VA.getLocVT(); 3099 SDValue Arg = OutVals[i]; 3100 bool isByVal = Flags.isByVal(); 3101 3102 // Promote the value if needed. 3103 switch (VA.getLocInfo()) { 3104 default: llvm_unreachable("Unknown loc info!"); 3105 case CCValAssign::Full: break; 3106 case CCValAssign::SExt: 3107 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); 3108 break; 3109 case CCValAssign::ZExt: 3110 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); 3111 break; 3112 case CCValAssign::AExt: 3113 if (Arg.getValueType().isVector() && 3114 Arg.getValueType().getVectorElementType() == MVT::i1) 3115 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); 3116 else if (RegVT.is128BitVector()) { 3117 // Special case: passing MMX values in XMM registers. 3118 Arg = DAG.getBitcast(MVT::i64, Arg); 3119 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); 3120 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); 3121 } else 3122 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); 3123 break; 3124 case CCValAssign::BCvt: 3125 Arg = DAG.getBitcast(RegVT, Arg); 3126 break; 3127 case CCValAssign::Indirect: { 3128 // Store the argument. 3129 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); 3130 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 3131 Chain = DAG.getStore( 3132 Chain, dl, Arg, SpillSlot, 3133 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), 3134 false, false, 0); 3135 Arg = SpillSlot; 3136 break; 3137 } 3138 } 3139 3140 if (VA.isRegLoc()) { 3141 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 3142 if (isVarArg && IsWin64) { 3143 // Win64 ABI requires argument XMM reg to be copied to the corresponding 3144 // shadow reg if callee is a varargs function. 3145 unsigned ShadowReg = 0; 3146 switch (VA.getLocReg()) { 3147 case X86::XMM0: ShadowReg = X86::RCX; break; 3148 case X86::XMM1: ShadowReg = X86::RDX; break; 3149 case X86::XMM2: ShadowReg = X86::R8; break; 3150 case X86::XMM3: ShadowReg = X86::R9; break; 3151 } 3152 if (ShadowReg) 3153 RegsToPass.push_back(std::make_pair(ShadowReg, Arg)); 3154 } 3155 } else if (!IsSibcall && (!isTailCall || isByVal)) { 3156 assert(VA.isMemLoc()); 3157 if (!StackPtr.getNode()) 3158 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), 3159 getPointerTy(DAG.getDataLayout())); 3160 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 3161 dl, DAG, VA, Flags)); 3162 } 3163 } 3164 3165 if (!MemOpChains.empty()) 3166 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 3167 3168 if (Subtarget.isPICStyleGOT()) { 3169 // ELF / PIC requires GOT in the EBX register before function calls via PLT 3170 // GOT pointer. 3171 if (!isTailCall) { 3172 RegsToPass.push_back(std::make_pair( 3173 unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), 3174 getPointerTy(DAG.getDataLayout())))); 3175 } else { 3176 // If we are tail calling and generating PIC/GOT style code load the 3177 // address of the callee into ECX. The value in ecx is used as target of 3178 // the tail jump. This is done to circumvent the ebx/callee-saved problem 3179 // for tail calls on PIC/GOT architectures. Normally we would just put the 3180 // address of GOT into ebx and then call target@PLT. But for tail calls 3181 // ebx would be restored (since ebx is callee saved) before jumping to the 3182 // target@PLT. 3183 3184 // Note: The actual moving to ECX is done further down. 3185 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 3186 if (G && !G->getGlobal()->hasLocalLinkage() && 3187 G->getGlobal()->hasDefaultVisibility()) 3188 Callee = LowerGlobalAddress(Callee, DAG); 3189 else if (isa<ExternalSymbolSDNode>(Callee)) 3190 Callee = LowerExternalSymbol(Callee, DAG); 3191 } 3192 } 3193 3194 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) { 3195 // From AMD64 ABI document: 3196 // For calls that may call functions that use varargs or stdargs 3197 // (prototype-less calls or calls to functions containing ellipsis (...) in 3198 // the declaration) %al is used as hidden argument to specify the number 3199 // of SSE registers used. The contents of %al do not need to match exactly 3200 // the number of registers, but must be an ubound on the number of SSE 3201 // registers used and is in the range 0 - 8 inclusive. 3202 3203 // Count the number of XMM registers allocated. 3204 static const MCPhysReg XMMArgRegs[] = { 3205 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 3206 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 3207 }; 3208 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs); 3209 assert((Subtarget.hasSSE1() || !NumXMMRegs) 3210 && "SSE registers cannot be used when SSE is disabled"); 3211 3212 RegsToPass.push_back(std::make_pair(unsigned(X86::AL), 3213 DAG.getConstant(NumXMMRegs, dl, 3214 MVT::i8))); 3215 } 3216 3217 if (isVarArg && IsMustTail) { 3218 const auto &Forwards = X86Info->getForwardedMustTailRegParms(); 3219 for (const auto &F : Forwards) { 3220 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT); 3221 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val)); 3222 } 3223 } 3224 3225 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls 3226 // don't need this because the eligibility check rejects calls that require 3227 // shuffling arguments passed in memory. 3228 if (!IsSibcall && isTailCall) { 3229 // Force all the incoming stack arguments to be loaded from the stack 3230 // before any new outgoing arguments are stored to the stack, because the 3231 // outgoing stack slots may alias the incoming argument stack slots, and 3232 // the alias isn't otherwise explicit. This is slightly more conservative 3233 // than necessary, because it means that each store effectively depends 3234 // on every argument instead of just those arguments it would clobber. 3235 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain); 3236 3237 SmallVector<SDValue, 8> MemOpChains2; 3238 SDValue FIN; 3239 int FI = 0; 3240 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 3241 CCValAssign &VA = ArgLocs[i]; 3242 if (VA.isRegLoc()) 3243 continue; 3244 assert(VA.isMemLoc()); 3245 SDValue Arg = OutVals[i]; 3246 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3247 // Skip inalloca arguments. They don't require any work. 3248 if (Flags.isInAlloca()) 3249 continue; 3250 // Create frame index. 3251 int32_t Offset = VA.getLocMemOffset()+FPDiff; 3252 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; 3253 FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); 3254 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 3255 3256 if (Flags.isByVal()) { 3257 // Copy relative to framepointer. 3258 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl); 3259 if (!StackPtr.getNode()) 3260 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), 3261 getPointerTy(DAG.getDataLayout())); 3262 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), 3263 StackPtr, Source); 3264 3265 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, 3266 ArgChain, 3267 Flags, DAG, dl)); 3268 } else { 3269 // Store relative to framepointer. 3270 MemOpChains2.push_back(DAG.getStore( 3271 ArgChain, dl, Arg, FIN, 3272 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), 3273 false, false, 0)); 3274 } 3275 } 3276 3277 if (!MemOpChains2.empty()) 3278 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); 3279 3280 // Store the return address to the appropriate stack slot. 3281 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, 3282 getPointerTy(DAG.getDataLayout()), 3283 RegInfo->getSlotSize(), FPDiff, dl); 3284 } 3285 3286 // Build a sequence of copy-to-reg nodes chained together with token chain 3287 // and flag operands which copy the outgoing args into registers. 3288 SDValue InFlag; 3289 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 3290 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 3291 RegsToPass[i].second, InFlag); 3292 InFlag = Chain.getValue(1); 3293 } 3294 3295 if (DAG.getTarget().getCodeModel() == CodeModel::Large) { 3296 assert(Is64Bit && "Large code model is only legal in 64-bit mode."); 3297 // In the 64-bit large code model, we have to make all calls 3298 // through a register, since the call instruction's 32-bit 3299 // pc-relative offset may not be large enough to hold the whole 3300 // address. 3301 } else if (Callee->getOpcode() == ISD::GlobalAddress) { 3302 // If the callee is a GlobalAddress node (quite common, every direct call 3303 // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack 3304 // it. 3305 GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee); 3306 3307 // We should use extra load for direct calls to dllimported functions in 3308 // non-JIT mode. 3309 const GlobalValue *GV = G->getGlobal(); 3310 if (!GV->hasDLLImportStorageClass()) { 3311 unsigned char OpFlags = Subtarget.classifyGlobalFunctionReference(GV); 3312 3313 Callee = DAG.getTargetGlobalAddress( 3314 GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags); 3315 3316 if (OpFlags == X86II::MO_GOTPCREL) { 3317 // Add a wrapper. 3318 Callee = DAG.getNode(X86ISD::WrapperRIP, dl, 3319 getPointerTy(DAG.getDataLayout()), Callee); 3320 // Add extra indirection 3321 Callee = DAG.getLoad( 3322 getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee, 3323 MachinePointerInfo::getGOT(DAG.getMachineFunction()), false, false, 3324 false, 0); 3325 } 3326 } 3327 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 3328 const Module *Mod = DAG.getMachineFunction().getFunction()->getParent(); 3329 unsigned char OpFlags = 3330 Subtarget.classifyGlobalFunctionReference(nullptr, *Mod); 3331 3332 Callee = DAG.getTargetExternalSymbol( 3333 S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags); 3334 } else if (Subtarget.isTarget64BitILP32() && 3335 Callee->getValueType(0) == MVT::i32) { 3336 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI 3337 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee); 3338 } 3339 3340 // Returns a chain & a flag for retval copy to use. 3341 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 3342 SmallVector<SDValue, 8> Ops; 3343 3344 if (!IsSibcall && isTailCall) { 3345 Chain = DAG.getCALLSEQ_END(Chain, 3346 DAG.getIntPtrConstant(NumBytesToPop, dl, true), 3347 DAG.getIntPtrConstant(0, dl, true), InFlag, dl); 3348 InFlag = Chain.getValue(1); 3349 } 3350 3351 Ops.push_back(Chain); 3352 Ops.push_back(Callee); 3353 3354 if (isTailCall) 3355 Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32)); 3356 3357 // Add argument registers to the end of the list so that they are known live 3358 // into the call. 3359 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 3360 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 3361 RegsToPass[i].second.getValueType())); 3362 3363 // Add a register mask operand representing the call-preserved registers. 3364 const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv); 3365 assert(Mask && "Missing call preserved mask for calling convention"); 3366 3367 // If this is an invoke in a 32-bit function using a funclet-based 3368 // personality, assume the function clobbers all registers. If an exception 3369 // is thrown, the runtime will not restore CSRs. 3370 // FIXME: Model this more precisely so that we can register allocate across 3371 // the normal edge and spill and fill across the exceptional edge. 3372 if (!Is64Bit && CLI.CS && CLI.CS->isInvoke()) { 3373 const Function *CallerFn = MF.getFunction(); 3374 EHPersonality Pers = 3375 CallerFn->hasPersonalityFn() 3376 ? classifyEHPersonality(CallerFn->getPersonalityFn()) 3377 : EHPersonality::Unknown; 3378 if (isFuncletEHPersonality(Pers)) 3379 Mask = RegInfo->getNoPreservedMask(); 3380 } 3381 3382 Ops.push_back(DAG.getRegisterMask(Mask)); 3383 3384 if (InFlag.getNode()) 3385 Ops.push_back(InFlag); 3386 3387 if (isTailCall) { 3388 // We used to do: 3389 //// If this is the first return lowered for this function, add the regs 3390 //// to the liveout set for the function. 3391 // This isn't right, although it's probably harmless on x86; liveouts 3392 // should be computed from returns not tail calls. Consider a void 3393 // function making a tail call to a function returning int. 3394 MF.getFrameInfo()->setHasTailCall(); 3395 return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops); 3396 } 3397 3398 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops); 3399 InFlag = Chain.getValue(1); 3400 3401 // Create the CALLSEQ_END node. 3402 unsigned NumBytesForCalleeToPop; 3403 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, 3404 DAG.getTarget().Options.GuaranteedTailCallOpt)) 3405 NumBytesForCalleeToPop = NumBytes; // Callee pops everything 3406 else if (!Is64Bit && !canGuaranteeTCO(CallConv) && 3407 !Subtarget.getTargetTriple().isOSMSVCRT() && 3408 SR == StackStructReturn) 3409 // If this is a call to a struct-return function, the callee 3410 // pops the hidden struct pointer, so we have to push it back. 3411 // This is common for Darwin/X86, Linux & Mingw32 targets. 3412 // For MSVC Win32 targets, the caller pops the hidden struct pointer. 3413 NumBytesForCalleeToPop = 4; 3414 else 3415 NumBytesForCalleeToPop = 0; // Callee pops nothing. 3416 3417 if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) { 3418 // No need to reset the stack after the call if the call doesn't return. To 3419 // make the MI verify, we'll pretend the callee does it for us. 3420 NumBytesForCalleeToPop = NumBytes; 3421 } 3422 3423 // Returns a flag for retval copy to use. 3424 if (!IsSibcall) { 3425 Chain = DAG.getCALLSEQ_END(Chain, 3426 DAG.getIntPtrConstant(NumBytesToPop, dl, true), 3427 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl, 3428 true), 3429 InFlag, dl); 3430 InFlag = Chain.getValue(1); 3431 } 3432 3433 // Handle result values, copying them out of physregs into vregs that we 3434 // return. 3435 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 3436 Ins, dl, DAG, InVals); 3437 } 3438 3439 //===----------------------------------------------------------------------===// 3440 // Fast Calling Convention (tail call) implementation 3441 //===----------------------------------------------------------------------===// 3442 3443 // Like std call, callee cleans arguments, convention except that ECX is 3444 // reserved for storing the tail called function address. Only 2 registers are 3445 // free for argument passing (inreg). Tail call optimization is performed 3446 // provided: 3447 // * tailcallopt is enabled 3448 // * caller/callee are fastcc 3449 // On X86_64 architecture with GOT-style position independent code only local 3450 // (within module) calls are supported at the moment. 3451 // To keep the stack aligned according to platform abi the function 3452 // GetAlignedArgumentStackSize ensures that argument delta is always multiples 3453 // of stack alignment. (Dynamic linkers need this - darwin's dyld for example) 3454 // If a tail called function callee has more arguments than the caller the 3455 // caller needs to make sure that there is room to move the RETADDR to. This is 3456 // achieved by reserving an area the size of the argument delta right after the 3457 // original RETADDR, but before the saved framepointer or the spilled registers 3458 // e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) 3459 // stack layout: 3460 // arg1 3461 // arg2 3462 // RETADDR 3463 // [ new RETADDR 3464 // move area ] 3465 // (possible EBP) 3466 // ESI 3467 // EDI 3468 // local1 .. 3469 3470 /// Make the stack size align e.g 16n + 12 aligned for a 16-byte align 3471 /// requirement. 3472 unsigned 3473 X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, 3474 SelectionDAG& DAG) const { 3475 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 3476 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); 3477 unsigned StackAlignment = TFI.getStackAlignment(); 3478 uint64_t AlignMask = StackAlignment - 1; 3479 int64_t Offset = StackSize; 3480 unsigned SlotSize = RegInfo->getSlotSize(); 3481 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { 3482 // Number smaller than 12 so just add the difference. 3483 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); 3484 } else { 3485 // Mask out lower bits, add stackalignment once plus the 12 bytes. 3486 Offset = ((~AlignMask) & Offset) + StackAlignment + 3487 (StackAlignment-SlotSize); 3488 } 3489 return Offset; 3490 } 3491 3492 /// Return true if the given stack call argument is already available in the 3493 /// same position (relatively) of the caller's incoming argument stack. 3494 static 3495 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 3496 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 3497 const X86InstrInfo *TII, const CCValAssign &VA) { 3498 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 3499 3500 for (;;) { 3501 // Look through nodes that don't alter the bits of the incoming value. 3502 unsigned Op = Arg.getOpcode(); 3503 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) { 3504 Arg = Arg.getOperand(0); 3505 continue; 3506 } 3507 if (Op == ISD::TRUNCATE) { 3508 const SDValue &TruncInput = Arg.getOperand(0); 3509 if (TruncInput.getOpcode() == ISD::AssertZext && 3510 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() == 3511 Arg.getValueType()) { 3512 Arg = TruncInput.getOperand(0); 3513 continue; 3514 } 3515 } 3516 break; 3517 } 3518 3519 int FI = INT_MAX; 3520 if (Arg.getOpcode() == ISD::CopyFromReg) { 3521 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 3522 if (!TargetRegisterInfo::isVirtualRegister(VR)) 3523 return false; 3524 MachineInstr *Def = MRI->getVRegDef(VR); 3525 if (!Def) 3526 return false; 3527 if (!Flags.isByVal()) { 3528 if (!TII->isLoadFromStackSlot(*Def, FI)) 3529 return false; 3530 } else { 3531 unsigned Opcode = Def->getOpcode(); 3532 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r || 3533 Opcode == X86::LEA64_32r) && 3534 Def->getOperand(1).isFI()) { 3535 FI = Def->getOperand(1).getIndex(); 3536 Bytes = Flags.getByValSize(); 3537 } else 3538 return false; 3539 } 3540 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 3541 if (Flags.isByVal()) 3542 // ByVal argument is passed in as a pointer but it's now being 3543 // dereferenced. e.g. 3544 // define @foo(%struct.X* %A) { 3545 // tail call @bar(%struct.X* byval %A) 3546 // } 3547 return false; 3548 SDValue Ptr = Ld->getBasePtr(); 3549 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 3550 if (!FINode) 3551 return false; 3552 FI = FINode->getIndex(); 3553 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) { 3554 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg); 3555 FI = FINode->getIndex(); 3556 Bytes = Flags.getByValSize(); 3557 } else 3558 return false; 3559 3560 assert(FI != INT_MAX); 3561 if (!MFI->isFixedObjectIndex(FI)) 3562 return false; 3563 3564 if (Offset != MFI->getObjectOffset(FI)) 3565 return false; 3566 3567 if (VA.getLocVT().getSizeInBits() > Arg.getValueType().getSizeInBits()) { 3568 // If the argument location is wider than the argument type, check that any 3569 // extension flags match. 3570 if (Flags.isZExt() != MFI->isObjectZExt(FI) || 3571 Flags.isSExt() != MFI->isObjectSExt(FI)) { 3572 return false; 3573 } 3574 } 3575 3576 return Bytes == MFI->getObjectSize(FI); 3577 } 3578 3579 /// Check whether the call is eligible for tail call optimization. Targets 3580 /// that want to do tail call optimization should implement this function. 3581 bool X86TargetLowering::IsEligibleForTailCallOptimization( 3582 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, 3583 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy, 3584 const SmallVectorImpl<ISD::OutputArg> &Outs, 3585 const SmallVectorImpl<SDValue> &OutVals, 3586 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const { 3587 if (!mayTailCallThisCC(CalleeCC)) 3588 return false; 3589 3590 // If -tailcallopt is specified, make fastcc functions tail-callable. 3591 MachineFunction &MF = DAG.getMachineFunction(); 3592 const Function *CallerF = MF.getFunction(); 3593 3594 // If the function return type is x86_fp80 and the callee return type is not, 3595 // then the FP_EXTEND of the call result is not a nop. It's not safe to 3596 // perform a tailcall optimization here. 3597 if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty()) 3598 return false; 3599 3600 CallingConv::ID CallerCC = CallerF->getCallingConv(); 3601 bool CCMatch = CallerCC == CalleeCC; 3602 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC); 3603 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC); 3604 3605 // Win64 functions have extra shadow space for argument homing. Don't do the 3606 // sibcall if the caller and callee have mismatched expectations for this 3607 // space. 3608 if (IsCalleeWin64 != IsCallerWin64) 3609 return false; 3610 3611 if (DAG.getTarget().Options.GuaranteedTailCallOpt) { 3612 if (canGuaranteeTCO(CalleeCC) && CCMatch) 3613 return true; 3614 return false; 3615 } 3616 3617 // Look for obvious safe cases to perform tail call optimization that do not 3618 // require ABI changes. This is what gcc calls sibcall. 3619 3620 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to 3621 // emit a special epilogue. 3622 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 3623 if (RegInfo->needsStackRealignment(MF)) 3624 return false; 3625 3626 // Also avoid sibcall optimization if either caller or callee uses struct 3627 // return semantics. 3628 if (isCalleeStructRet || isCallerStructRet) 3629 return false; 3630 3631 // Do not sibcall optimize vararg calls unless all arguments are passed via 3632 // registers. 3633 LLVMContext &C = *DAG.getContext(); 3634 if (isVarArg && !Outs.empty()) { 3635 // Optimizing for varargs on Win64 is unlikely to be safe without 3636 // additional testing. 3637 if (IsCalleeWin64 || IsCallerWin64) 3638 return false; 3639 3640 SmallVector<CCValAssign, 16> ArgLocs; 3641 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); 3642 3643 CCInfo.AnalyzeCallOperands(Outs, CC_X86); 3644 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) 3645 if (!ArgLocs[i].isRegLoc()) 3646 return false; 3647 } 3648 3649 // If the call result is in ST0 / ST1, it needs to be popped off the x87 3650 // stack. Therefore, if it's not used by the call it is not safe to optimize 3651 // this into a sibcall. 3652 bool Unused = false; 3653 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 3654 if (!Ins[i].Used) { 3655 Unused = true; 3656 break; 3657 } 3658 } 3659 if (Unused) { 3660 SmallVector<CCValAssign, 16> RVLocs; 3661 CCState CCInfo(CalleeCC, false, MF, RVLocs, C); 3662 CCInfo.AnalyzeCallResult(Ins, RetCC_X86); 3663 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 3664 CCValAssign &VA = RVLocs[i]; 3665 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) 3666 return false; 3667 } 3668 } 3669 3670 // Check that the call results are passed in the same way. 3671 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, 3672 RetCC_X86, RetCC_X86)) 3673 return false; 3674 // The callee has to preserve all registers the caller needs to preserve. 3675 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); 3676 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 3677 if (!CCMatch) { 3678 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 3679 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 3680 return false; 3681 } 3682 3683 unsigned StackArgsSize = 0; 3684 3685 // If the callee takes no arguments then go on to check the results of the 3686 // call. 3687 if (!Outs.empty()) { 3688 // Check if stack adjustment is needed. For now, do not do this if any 3689 // argument is passed on the stack. 3690 SmallVector<CCValAssign, 16> ArgLocs; 3691 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); 3692 3693 // Allocate shadow area for Win64 3694 if (IsCalleeWin64) 3695 CCInfo.AllocateStack(32, 8); 3696 3697 CCInfo.AnalyzeCallOperands(Outs, CC_X86); 3698 StackArgsSize = CCInfo.getNextStackOffset(); 3699 3700 if (CCInfo.getNextStackOffset()) { 3701 // Check if the arguments are already laid out in the right way as 3702 // the caller's fixed stack objects. 3703 MachineFrameInfo *MFI = MF.getFrameInfo(); 3704 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 3705 const X86InstrInfo *TII = Subtarget.getInstrInfo(); 3706 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 3707 CCValAssign &VA = ArgLocs[i]; 3708 SDValue Arg = OutVals[i]; 3709 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3710 if (VA.getLocInfo() == CCValAssign::Indirect) 3711 return false; 3712 if (!VA.isRegLoc()) { 3713 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 3714 MFI, MRI, TII, VA)) 3715 return false; 3716 } 3717 } 3718 } 3719 3720 bool PositionIndependent = isPositionIndependent(); 3721 // If the tailcall address may be in a register, then make sure it's 3722 // possible to register allocate for it. In 32-bit, the call address can 3723 // only target EAX, EDX, or ECX since the tail call must be scheduled after 3724 // callee-saved registers are restored. These happen to be the same 3725 // registers used to pass 'inreg' arguments so watch out for those. 3726 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) && 3727 !isa<ExternalSymbolSDNode>(Callee)) || 3728 PositionIndependent)) { 3729 unsigned NumInRegs = 0; 3730 // In PIC we need an extra register to formulate the address computation 3731 // for the callee. 3732 unsigned MaxInRegs = PositionIndependent ? 2 : 3; 3733 3734 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 3735 CCValAssign &VA = ArgLocs[i]; 3736 if (!VA.isRegLoc()) 3737 continue; 3738 unsigned Reg = VA.getLocReg(); 3739 switch (Reg) { 3740 default: break; 3741 case X86::EAX: case X86::EDX: case X86::ECX: 3742 if (++NumInRegs == MaxInRegs) 3743 return false; 3744 break; 3745 } 3746 } 3747 } 3748 3749 const MachineRegisterInfo &MRI = MF.getRegInfo(); 3750 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) 3751 return false; 3752 } 3753 3754 bool CalleeWillPop = 3755 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg, 3756 MF.getTarget().Options.GuaranteedTailCallOpt); 3757 3758 if (unsigned BytesToPop = 3759 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) { 3760 // If we have bytes to pop, the callee must pop them. 3761 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize; 3762 if (!CalleePopMatches) 3763 return false; 3764 } else if (CalleeWillPop && StackArgsSize > 0) { 3765 // If we don't have bytes to pop, make sure the callee doesn't pop any. 3766 return false; 3767 } 3768 3769 return true; 3770 } 3771 3772 FastISel * 3773 X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, 3774 const TargetLibraryInfo *libInfo) const { 3775 return X86::createFastISel(funcInfo, libInfo); 3776 } 3777 3778 //===----------------------------------------------------------------------===// 3779 // Other Lowering Hooks 3780 //===----------------------------------------------------------------------===// 3781 3782 static bool MayFoldLoad(SDValue Op) { 3783 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode()); 3784 } 3785 3786 static bool MayFoldIntoStore(SDValue Op) { 3787 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin()); 3788 } 3789 3790 static bool isTargetShuffle(unsigned Opcode) { 3791 switch(Opcode) { 3792 default: return false; 3793 case X86ISD::BLENDI: 3794 case X86ISD::PSHUFB: 3795 case X86ISD::PSHUFD: 3796 case X86ISD::PSHUFHW: 3797 case X86ISD::PSHUFLW: 3798 case X86ISD::SHUFP: 3799 case X86ISD::INSERTPS: 3800 case X86ISD::PALIGNR: 3801 case X86ISD::VSHLDQ: 3802 case X86ISD::VSRLDQ: 3803 case X86ISD::MOVLHPS: 3804 case X86ISD::MOVLHPD: 3805 case X86ISD::MOVHLPS: 3806 case X86ISD::MOVLPS: 3807 case X86ISD::MOVLPD: 3808 case X86ISD::MOVSHDUP: 3809 case X86ISD::MOVSLDUP: 3810 case X86ISD::MOVDDUP: 3811 case X86ISD::MOVSS: 3812 case X86ISD::MOVSD: 3813 case X86ISD::UNPCKL: 3814 case X86ISD::UNPCKH: 3815 case X86ISD::VPERMILPI: 3816 case X86ISD::VPERMILPV: 3817 case X86ISD::VPERM2X128: 3818 case X86ISD::VPERMIL2: 3819 case X86ISD::VPERMI: 3820 case X86ISD::VPPERM: 3821 case X86ISD::VPERMV: 3822 case X86ISD::VPERMV3: 3823 case X86ISD::VZEXT_MOVL: 3824 return true; 3825 } 3826 } 3827 3828 static bool isTargetShuffleVariableMask(unsigned Opcode) { 3829 switch (Opcode) { 3830 default: return false; 3831 case X86ISD::PSHUFB: 3832 case X86ISD::VPERMILPV: 3833 return true; 3834 } 3835 } 3836 3837 static SDValue getTargetShuffleNode(unsigned Opc, const SDLoc &dl, MVT VT, 3838 SDValue V1, unsigned TargetMask, 3839 SelectionDAG &DAG) { 3840 switch(Opc) { 3841 default: llvm_unreachable("Unknown x86 shuffle node"); 3842 case X86ISD::PSHUFD: 3843 case X86ISD::PSHUFHW: 3844 case X86ISD::PSHUFLW: 3845 case X86ISD::VPERMILPI: 3846 case X86ISD::VPERMI: 3847 return DAG.getNode(Opc, dl, VT, V1, 3848 DAG.getConstant(TargetMask, dl, MVT::i8)); 3849 } 3850 } 3851 3852 static SDValue getTargetShuffleNode(unsigned Opc, const SDLoc &dl, MVT VT, 3853 SDValue V1, SDValue V2, SelectionDAG &DAG) { 3854 switch(Opc) { 3855 default: llvm_unreachable("Unknown x86 shuffle node"); 3856 case X86ISD::MOVLHPS: 3857 case X86ISD::MOVLHPD: 3858 case X86ISD::MOVHLPS: 3859 case X86ISD::MOVLPS: 3860 case X86ISD::MOVLPD: 3861 case X86ISD::MOVSS: 3862 case X86ISD::MOVSD: 3863 case X86ISD::UNPCKL: 3864 case X86ISD::UNPCKH: 3865 return DAG.getNode(Opc, dl, VT, V1, V2); 3866 } 3867 } 3868 3869 SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { 3870 MachineFunction &MF = DAG.getMachineFunction(); 3871 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 3872 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 3873 int ReturnAddrIndex = FuncInfo->getRAIndex(); 3874 3875 if (ReturnAddrIndex == 0) { 3876 // Set up a frame object for the return address. 3877 unsigned SlotSize = RegInfo->getSlotSize(); 3878 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, 3879 -(int64_t)SlotSize, 3880 false); 3881 FuncInfo->setRAIndex(ReturnAddrIndex); 3882 } 3883 3884 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout())); 3885 } 3886 3887 bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 3888 bool hasSymbolicDisplacement) { 3889 // Offset should fit into 32 bit immediate field. 3890 if (!isInt<32>(Offset)) 3891 return false; 3892 3893 // If we don't have a symbolic displacement - we don't have any extra 3894 // restrictions. 3895 if (!hasSymbolicDisplacement) 3896 return true; 3897 3898 // FIXME: Some tweaks might be needed for medium code model. 3899 if (M != CodeModel::Small && M != CodeModel::Kernel) 3900 return false; 3901 3902 // For small code model we assume that latest object is 16MB before end of 31 3903 // bits boundary. We may also accept pretty large negative constants knowing 3904 // that all objects are in the positive half of address space. 3905 if (M == CodeModel::Small && Offset < 16*1024*1024) 3906 return true; 3907 3908 // For kernel code model we know that all object resist in the negative half 3909 // of 32bits address space. We may not accept negative offsets, since they may 3910 // be just off and we may accept pretty large positive ones. 3911 if (M == CodeModel::Kernel && Offset >= 0) 3912 return true; 3913 3914 return false; 3915 } 3916 3917 /// Determines whether the callee is required to pop its own arguments. 3918 /// Callee pop is necessary to support tail calls. 3919 bool X86::isCalleePop(CallingConv::ID CallingConv, 3920 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) { 3921 // If GuaranteeTCO is true, we force some calls to be callee pop so that we 3922 // can guarantee TCO. 3923 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO)) 3924 return true; 3925 3926 switch (CallingConv) { 3927 default: 3928 return false; 3929 case CallingConv::X86_StdCall: 3930 case CallingConv::X86_FastCall: 3931 case CallingConv::X86_ThisCall: 3932 case CallingConv::X86_VectorCall: 3933 return !is64Bit; 3934 } 3935 } 3936 3937 /// \brief Return true if the condition is an unsigned comparison operation. 3938 static bool isX86CCUnsigned(unsigned X86CC) { 3939 switch (X86CC) { 3940 default: 3941 llvm_unreachable("Invalid integer condition!"); 3942 case X86::COND_E: 3943 case X86::COND_NE: 3944 case X86::COND_B: 3945 case X86::COND_A: 3946 case X86::COND_BE: 3947 case X86::COND_AE: 3948 return true; 3949 case X86::COND_G: 3950 case X86::COND_GE: 3951 case X86::COND_L: 3952 case X86::COND_LE: 3953 return false; 3954 } 3955 } 3956 3957 static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) { 3958 switch (SetCCOpcode) { 3959 default: llvm_unreachable("Invalid integer condition!"); 3960 case ISD::SETEQ: return X86::COND_E; 3961 case ISD::SETGT: return X86::COND_G; 3962 case ISD::SETGE: return X86::COND_GE; 3963 case ISD::SETLT: return X86::COND_L; 3964 case ISD::SETLE: return X86::COND_LE; 3965 case ISD::SETNE: return X86::COND_NE; 3966 case ISD::SETULT: return X86::COND_B; 3967 case ISD::SETUGT: return X86::COND_A; 3968 case ISD::SETULE: return X86::COND_BE; 3969 case ISD::SETUGE: return X86::COND_AE; 3970 } 3971 } 3972 3973 /// Do a one-to-one translation of a ISD::CondCode to the X86-specific 3974 /// condition code, returning the condition code and the LHS/RHS of the 3975 /// comparison to make. 3976 static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL, 3977 bool isFP, SDValue &LHS, SDValue &RHS, 3978 SelectionDAG &DAG) { 3979 if (!isFP) { 3980 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 3981 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 3982 // X > -1 -> X == 0, jump !sign. 3983 RHS = DAG.getConstant(0, DL, RHS.getValueType()); 3984 return X86::COND_NS; 3985 } 3986 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 3987 // X < 0 -> X == 0, jump on sign. 3988 return X86::COND_S; 3989 } 3990 if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) { 3991 // X < 1 -> X <= 0 3992 RHS = DAG.getConstant(0, DL, RHS.getValueType()); 3993 return X86::COND_LE; 3994 } 3995 } 3996 3997 return TranslateIntegerX86CC(SetCCOpcode); 3998 } 3999 4000 // First determine if it is required or is profitable to flip the operands. 4001 4002 // If LHS is a foldable load, but RHS is not, flip the condition. 4003 if (ISD::isNON_EXTLoad(LHS.getNode()) && 4004 !ISD::isNON_EXTLoad(RHS.getNode())) { 4005 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode); 4006 std::swap(LHS, RHS); 4007 } 4008 4009 switch (SetCCOpcode) { 4010 default: break; 4011 case ISD::SETOLT: 4012 case ISD::SETOLE: 4013 case ISD::SETUGT: 4014 case ISD::SETUGE: 4015 std::swap(LHS, RHS); 4016 break; 4017 } 4018 4019 // On a floating point condition, the flags are set as follows: 4020 // ZF PF CF op 4021 // 0 | 0 | 0 | X > Y 4022 // 0 | 0 | 1 | X < Y 4023 // 1 | 0 | 0 | X == Y 4024 // 1 | 1 | 1 | unordered 4025 switch (SetCCOpcode) { 4026 default: llvm_unreachable("Condcode should be pre-legalized away"); 4027 case ISD::SETUEQ: 4028 case ISD::SETEQ: return X86::COND_E; 4029 case ISD::SETOLT: // flipped 4030 case ISD::SETOGT: 4031 case ISD::SETGT: return X86::COND_A; 4032 case ISD::SETOLE: // flipped 4033 case ISD::SETOGE: 4034 case ISD::SETGE: return X86::COND_AE; 4035 case ISD::SETUGT: // flipped 4036 case ISD::SETULT: 4037 case ISD::SETLT: return X86::COND_B; 4038 case ISD::SETUGE: // flipped 4039 case ISD::SETULE: 4040 case ISD::SETLE: return X86::COND_BE; 4041 case ISD::SETONE: 4042 case ISD::SETNE: return X86::COND_NE; 4043 case ISD::SETUO: return X86::COND_P; 4044 case ISD::SETO: return X86::COND_NP; 4045 case ISD::SETOEQ: 4046 case ISD::SETUNE: return X86::COND_INVALID; 4047 } 4048 } 4049 4050 /// Is there a floating point cmov for the specific X86 condition code? 4051 /// Current x86 isa includes the following FP cmov instructions: 4052 /// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 4053 static bool hasFPCMov(unsigned X86CC) { 4054 switch (X86CC) { 4055 default: 4056 return false; 4057 case X86::COND_B: 4058 case X86::COND_BE: 4059 case X86::COND_E: 4060 case X86::COND_P: 4061 case X86::COND_A: 4062 case X86::COND_AE: 4063 case X86::COND_NE: 4064 case X86::COND_NP: 4065 return true; 4066 } 4067 } 4068 4069 4070 bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 4071 const