Home | History | Annotate | Download | only in X86
      1 //===-- X86InstrFragmentsSIMD.td - x86 SIMD ISA ------------*- tablegen -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file provides pattern fragments useful for SIMD instructions.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 //===----------------------------------------------------------------------===//
     15 // MMX specific DAG Nodes.
     16 //===----------------------------------------------------------------------===//
     17 
     18 // Low word of MMX to GPR.
     19 def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
     20                             [SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>;
     21 // GPR to low word of MMX.
     22 def MMX_X86movw2d : SDNode<"X86ISD::MMX_MOVW2D", SDTypeProfile<1, 1,
     23                             [SDTCisVT<0, x86mmx>, SDTCisVT<1, i32>]>>;
     24 
     25 //===----------------------------------------------------------------------===//
     26 // MMX Pattern Fragments
     27 //===----------------------------------------------------------------------===//
     28 
     29 def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>;
     30 def load_mvmmx : PatFrag<(ops node:$ptr),
     31                          (x86mmx (MMX_X86movw2d (load node:$ptr)))>;
     32 def bc_mmx  : PatFrag<(ops node:$in), (x86mmx  (bitconvert node:$in))>;
     33 
     34 //===----------------------------------------------------------------------===//
     35 // SSE specific DAG Nodes.
     36 //===----------------------------------------------------------------------===//
     37 
     38 def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<1, 2>,
     39                                        SDTCisFP<1>, SDTCisVT<3, i8>,
     40                                        SDTCisVec<1>]>;
     41 def SDTX86CmpTestSae : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, 
     42                                      SDTCisSameAs<1, 2>, SDTCisInt<3>]>;
     43 
     44 def X86fmin    : SDNode<"X86ISD::FMIN",      SDTFPBinOp>;
     45 def X86fmax    : SDNode<"X86ISD::FMAX",      SDTFPBinOp>;
     46 
     47 // Commutative and Associative FMIN and FMAX.
     48 def X86fminc    : SDNode<"X86ISD::FMINC", SDTFPBinOp,
     49     [SDNPCommutative, SDNPAssociative]>;
     50 def X86fmaxc    : SDNode<"X86ISD::FMAXC", SDTFPBinOp,
     51     [SDNPCommutative, SDNPAssociative]>;
     52 
     53 def X86fand    : SDNode<"X86ISD::FAND",      SDTFPBinOp,
     54                         [SDNPCommutative, SDNPAssociative]>;
     55 def X86for     : SDNode<"X86ISD::FOR",       SDTFPBinOp,
     56                         [SDNPCommutative, SDNPAssociative]>;
     57 def X86fxor    : SDNode<"X86ISD::FXOR",      SDTFPBinOp,
     58                         [SDNPCommutative, SDNPAssociative]>;
     59 def X86fandn   : SDNode<"X86ISD::FANDN",     SDTFPBinOp,
     60                         [SDNPCommutative, SDNPAssociative]>;
     61 def X86frsqrt  : SDNode<"X86ISD::FRSQRT",    SDTFPUnaryOp>;
     62 def X86frcp    : SDNode<"X86ISD::FRCP",      SDTFPUnaryOp>;
     63 def X86frsqrt14s: SDNode<"X86ISD::FRSQRTS",  SDTFPBinOp>;
     64 def X86frcp14s : SDNode<"X86ISD::FRCPS",    SDTFPBinOp>;
     65 def X86fhadd   : SDNode<"X86ISD::FHADD",     SDTFPBinOp>;
     66 def X86fhsub   : SDNode<"X86ISD::FHSUB",     SDTFPBinOp>;
     67 def X86hadd    : SDNode<"X86ISD::HADD",      SDTIntBinOp>;
     68 def X86hsub    : SDNode<"X86ISD::HSUB",      SDTIntBinOp>;
     69 def X86comi    : SDNode<"X86ISD::COMI",      SDTX86CmpTest>;
     70 def X86comiSae : SDNode<"X86ISD::COMI",      SDTX86CmpTestSae>;
     71 def X86ucomi   : SDNode<"X86ISD::UCOMI",     SDTX86CmpTest>;
     72 def X86ucomiSae: SDNode<"X86ISD::UCOMI",     SDTX86CmpTestSae>;
     73 def X86cmps    : SDNode<"X86ISD::FSETCC",     SDTX86Cmps>;
     74 def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD",
     75                  SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,
     76                                       SDTCisVT<1, v4i32>]>>;
     77 def X86cvtudq2pd: SDNode<"X86ISD::CVTUDQ2PD",
     78                  SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,
     79                                       SDTCisVT<1, v4i32>]>>;
     80 def X86pshufb  : SDNode<"X86ISD::PSHUFB",
     81                  SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i8>, SDTCisSameAs<0,1>,
     82                                       SDTCisSameAs<0,2>]>>;
     83 def X86psadbw  : SDNode<"X86ISD::PSADBW",
     84                  SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
     85                                       SDTCVecEltisVT<1, i8>,
     86                                       SDTCisSameSizeAs<0,1>,
     87                                       SDTCisSameAs<1,2>]>>;
     88 def X86dbpsadbw : SDNode<"X86ISD::DBPSADBW",
     89                   SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>,
     90                                        SDTCVecEltisVT<1, i8>,
     91                                        SDTCisSameSizeAs<0,1>,
     92                                        SDTCisSameAs<1,2>, SDTCisInt<3>]>>;
     93 def X86andnp   : SDNode<"X86ISD::ANDNP",
     94                  SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
     95                                       SDTCisSameAs<0,2>]>>;
     96 def X86multishift   : SDNode<"X86ISD::MULTISHIFT",
     97                  SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
     98                                       SDTCisSameAs<1,2>]>>;
     99 def X86pextrb  : SDNode<"X86ISD::PEXTRB",
    100                  SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, v16i8>,
    101                                       SDTCisPtrTy<2>]>>;
    102 def X86pextrw  : SDNode<"X86ISD::PEXTRW",
    103                  SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, v8i16>,
    104                                       SDTCisPtrTy<2>]>>;
    105 def X86pinsrb  : SDNode<"X86ISD::PINSRB",
    106                  SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
    107                                       SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
    108 def X86pinsrw  : SDNode<"X86ISD::PINSRW",
    109                  SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
    110                                       SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
    111 def X86insertps : SDNode<"X86ISD::INSERTPS",
    112                  SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
    113                                       SDTCisVT<2, v4f32>, SDTCisVT<3, i8>]>>;
    114 def X86vzmovl  : SDNode<"X86ISD::VZEXT_MOVL",
    115                  SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
    116 
    117 def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
    118                         [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
    119 
    120 def X86vzext   : SDNode<"X86ISD::VZEXT",
    121                          SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
    122                                               SDTCisInt<0>, SDTCisInt<1>,
    123                                               SDTCisOpSmallerThanOp<1, 0>]>>;
    124 
    125 def X86vsext   : SDNode<"X86ISD::VSEXT",
    126                          SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
    127                                               SDTCisInt<0>, SDTCisInt<1>,
    128                                               SDTCisOpSmallerThanOp<1, 0>]>>;
    129 
    130 def SDTVtrunc    : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
    131                                        SDTCisInt<0>, SDTCisInt<1>,
    132                                        SDTCisOpSmallerThanOp<0, 1>]>;
    133 
    134 def X86vtrunc    : SDNode<"X86ISD::VTRUNC",   SDTVtrunc>;
    135 def X86vtruncs   : SDNode<"X86ISD::VTRUNCS",  SDTVtrunc>;
    136 def X86vtruncus  : SDNode<"X86ISD::VTRUNCUS", SDTVtrunc>;
    137 
    138 def X86vfpext  : SDNode<"X86ISD::VFPEXT",
    139                         SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
    140                                              SDTCVecEltisVT<1, f32>,
    141                                              SDTCisSameSizeAs<0, 1>]>>;
    142 def X86vfpround: SDNode<"X86ISD::VFPROUND",
    143                         SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
    144                                              SDTCVecEltisVT<1, f64>,
    145                                              SDTCisSameSizeAs<0, 1>]>>;
    146 
    147 def X86fround: SDNode<"X86ISD::VFPROUND",
    148                         SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
    149                                              SDTCisSameAs<0, 1>,
    150                                              SDTCVecEltisVT<2, f64>,
    151                                              SDTCisSameSizeAs<0, 2>]>>;
    152 def X86froundRnd: SDNode<"X86ISD::VFPROUND",
    153                         SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
    154                                              SDTCisSameAs<0, 1>,
    155                                              SDTCVecEltisVT<2, f64>,
    156                                              SDTCisSameSizeAs<0, 2>,
    157                                              SDTCisVT<3, i32>]>>;
    158 
    159 def X86fpext  : SDNode<"X86ISD::VFPEXT",
    160                         SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
    161                                              SDTCisSameAs<0, 1>,
    162                                              SDTCVecEltisVT<2, f32>,
    163                                              SDTCisSameSizeAs<0, 2>]>>;
    164 
    165 def X86fpextRnd  : SDNode<"X86ISD::VFPEXT",
    166                         SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f64>,
    167                                              SDTCisSameAs<0, 1>,
    168                                              SDTCVecEltisVT<2, f32>,
    169                                              SDTCisSameSizeAs<0, 2>,
    170                                              SDTCisVT<3, i32>]>>;
    171 
    172 def X86vshldq  : SDNode<"X86ISD::VSHLDQ",    SDTIntShiftOp>;
    173 def X86vshrdq  : SDNode<"X86ISD::VSRLDQ",    SDTIntShiftOp>;
    174 def X86cmpp    : SDNode<"X86ISD::CMPP",      SDTX86VFCMP>;
    175 def X86pcmpeq  : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>;
    176 def X86pcmpgt  : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>;
    177 
    178 def X86IntCmpMask : SDTypeProfile<1, 2,
    179     [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisInt<1>]>;
    180 def X86pcmpeqm  : SDNode<"X86ISD::PCMPEQM", X86IntCmpMask, [SDNPCommutative]>;
    181 def X86pcmpgtm  : SDNode<"X86ISD::PCMPGTM", X86IntCmpMask>;
    182 
    183 def X86CmpMaskCC :
    184       SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
    185                        SDTCisVec<1>, SDTCisSameAs<2, 1>,
    186                        SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>]>;
    187 def X86CmpMaskCCRound :
    188       SDTypeProfile<1, 4, [SDTCisVec<0>,SDTCVecEltisVT<0, i1>,
    189                        SDTCisVec<1>, SDTCisSameAs<2, 1>,
    190                        SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>,
    191                        SDTCisInt<4>]>;
    192 def X86CmpMaskCCScalar :
    193       SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
    194 
    195 def X86CmpMaskCCScalarRound :
    196       SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>,
    197                            SDTCisInt<4>]>;
    198 
    199 def X86cmpm     : SDNode<"X86ISD::CMPM",     X86CmpMaskCC>;
    200 def X86cmpmRnd  : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>;
    201 def X86cmpmu    : SDNode<"X86ISD::CMPMU",    X86CmpMaskCC>;
    202 def X86cmpms    : SDNode<"X86ISD::FSETCC",   X86CmpMaskCCScalar>;
    203 def X86cmpmsRnd : SDNode<"X86ISD::FSETCC",   X86CmpMaskCCScalarRound>;
    204 
    205 def X86vshl    : SDNode<"X86ISD::VSHL",
    206                         SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
    207                                       SDTCisVec<2>]>>;
    208 def X86vsrl    : SDNode<"X86ISD::VSRL",
    209                         SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
    210                                       SDTCisVec<2>]>>;
    211 def X86vsra    : SDNode<"X86ISD::VSRA",
    212                         SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
    213                                       SDTCisVec<2>]>>;
    214 
    215 def X86vsrav   : SDNode<"X86ISD::VSRAV" , SDTIntShiftOp>;
    216 
    217 def X86vshli   : SDNode<"X86ISD::VSHLI", SDTIntShiftOp>;
    218 def X86vsrli   : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>;
    219 def X86vsrai   : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
    220 
    221 def X86vrotli  : SDNode<"X86ISD::VROTLI", SDTIntShiftOp>;
    222 def X86vrotri  : SDNode<"X86ISD::VROTRI", SDTIntShiftOp>;
    223 
    224 def X86vprot   : SDNode<"X86ISD::VPROT",
    225                         SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
    226                                              SDTCisSameAs<0,2>]>>;
    227 def X86vproti  : SDNode<"X86ISD::VPROTI",
    228                         SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
    229                                              SDTCisVT<2, i8>]>>;
    230 
    231 def X86vpshl   : SDNode<"X86ISD::VPSHL",
    232                         SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
    233                                              SDTCisSameAs<0,2>]>>;
    234 def X86vpsha   : SDNode<"X86ISD::VPSHA",
    235                         SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
    236                                              SDTCisSameAs<0,2>]>>;
    237 
    238 def X86vpcom   : SDNode<"X86ISD::VPCOM",
    239                         SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
    240                                              SDTCisSameAs<0,2>,
    241                                              SDTCisVT<3, i8>]>>;
    242 def X86vpcomu  : SDNode<"X86ISD::VPCOMU",
    243                         SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
    244                                              SDTCisSameAs<0,2>,
    245                                              SDTCisVT<3, i8>]>>;
    246 def X86vpermil2 : SDNode<"X86ISD::VPERMIL2",
    247                         SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
    248                                              SDTCisSameAs<0,2>,
    249                                              SDTCisSameSizeAs<0,3>,
    250                                              SDTCisSameNumEltsAs<0, 3>,
    251                                              SDTCisVT<4, i8>]>>;
    252 def X86vpperm : SDNode<"X86ISD::VPPERM",
    253                         SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
    254                                              SDTCisSameAs<0,2>]>>;
    255 
    256 def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
    257                                           SDTCisVec<1>,
    258                                           SDTCisSameAs<2, 1>]>;
    259 
    260 def SDTX86Testm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
    261                                        SDTCisSameAs<2, 1>, SDTCVecEltisVT<0, i1>,
    262                                        SDTCisSameNumEltsAs<0, 1>]>;
    263 
    264 def X86addus   : SDNode<"X86ISD::ADDUS", SDTIntBinOp>;
    265 def X86subus   : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
    266 def X86adds    : SDNode<"X86ISD::ADDS", SDTIntBinOp>;
    267 def X86subs    : SDNode<"X86ISD::SUBS", SDTIntBinOp>;
    268 def X86mulhrs  : SDNode<"X86ISD::MULHRS" , SDTIntBinOp>;
    269 def X86avg     : SDNode<"X86ISD::AVG" , SDTIntBinOp>;
    270 def X86ptest   : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
    271 def X86testp   : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
    272 def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
    273 def X86ktest   : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>;
    274 def X86testm   : SDNode<"X86ISD::TESTM", SDTX86Testm, [SDNPCommutative]>;
    275 def X86testnm  : SDNode<"X86ISD::TESTNM", SDTX86Testm, [SDNPCommutative]>;
    276 
    277 def X86movmsk : SDNode<"X86ISD::MOVMSK",
    278                         SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVec<1>]>>;
    279 
    280 def X86select  : SDNode<"X86ISD::SELECT",
    281                         SDTypeProfile<1, 3, [SDTCVecEltisVT<1, i1>,
    282                                              SDTCisSameAs<0, 2>,
    283                                              SDTCisSameAs<2, 3>,
    284                                              SDTCisSameNumEltsAs<0, 1>]>>;
    285 
    286 def X86selects : SDNode<"X86ISD::SELECT",
    287                         SDTypeProfile<1, 3, [SDTCisVT<1, i1>,
    288                                              SDTCisSameAs<0, 2>,
    289                                              SDTCisSameAs<2, 3>]>>;
    290 
    291 def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
    292                         SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
    293                                              SDTCVecEltisVT<1, i32>,
    294                                              SDTCisSameSizeAs<0,1>,
    295                                              SDTCisSameAs<1,2>]>>;
    296 def X86pmuldq  : SDNode<"X86ISD::PMULDQ",
    297                         SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
    298                                              SDTCVecEltisVT<1, i32>,
    299                                              SDTCisSameSizeAs<0,1>,
    300                                              SDTCisSameAs<1,2>]>>;
    301 
    302 def X86extrqi : SDNode<"X86ISD::EXTRQI",
    303                   SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
    304                                        SDTCisVT<2, i8>, SDTCisVT<3, i8>]>>;
    305 def X86insertqi : SDNode<"X86ISD::INSERTQI",
    306                     SDTypeProfile<1, 4, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
    307                                          SDTCisSameAs<1,2>, SDTCisVT<3, i8>,
    308                                          SDTCisVT<4, i8>]>>;
    309 
    310 // Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get
    311 // translated into one of the target nodes below during lowering.
    312 // Note: this is a work in progress...
    313 def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
    314 def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
    315                                 SDTCisSameAs<0,2>]>;
    316 
    317 def SDTShuff2OpM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
    318                                         SDTCisSameSizeAs<0,2>,
    319                                         SDTCisSameNumEltsAs<0,2>]>;
    320 def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
    321                                  SDTCisSameAs<0,1>, SDTCisVT<2, i8>]>;
    322 def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
    323                                  SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>;
    324 def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
    325                              SDTCisSameAs<0,2>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
    326 def SDTFPTernaryOpImmRound: SDTypeProfile<1, 5, [SDTCisFP<0>, SDTCisSameAs<0,1>,
    327                                                  SDTCisSameAs<0,2>,
    328                                                  SDTCisInt<3>,
    329                                                  SDTCisSameSizeAs<0, 3>,
    330                                                  SDTCisSameNumEltsAs<0, 3>,
    331                                                  SDTCisVT<4, i32>,
    332                                                  SDTCisVT<5, i32>]>;
    333 def SDTFPUnaryOpImmRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
    334                               SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
    335 
    336 def SDTVBroadcast  : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
    337 def SDTVBroadcastm : SDTypeProfile<1, 1, [SDTCisVec<0>,
    338                                           SDTCisInt<0>, SDTCisInt<1>]>;
    339 
    340 def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
    341                              SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
    342 
    343 def SDTTernlog  : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
    344                                 SDTCisSameAs<0,2>, SDTCisSameAs<0,3>,
    345                                 SDTCisVT<4, i8>]>;
    346 
    347 def SDTFPBinOpRound : SDTypeProfile<1, 3, [      // fadd_round, fmul_round, etc.
    348   SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisVT<3, i32>]>;
    349 
    350 def SDTFPUnaryOpRound : SDTypeProfile<1, 2, [      // fsqrt_round, fgetexp_round, etc.
    351   SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisVT<2, i32>]>;
    352 
    353 def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
    354                            SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
    355 def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
    356                            SDTCisSameAs<1,2>, SDTCisSameAs<1,3>,
    357                            SDTCisVT<4, i32>]>;
    358 
    359 def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
    360 def X86VAlign  : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
    361 
    362 def X86Abs      : SDNode<"X86ISD::ABS", SDTIntUnaryOp>;
    363 def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>;
    364 
    365 def X86PShufd  : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
    366 def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
    367 def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
    368 
    369 def X86Shufp   : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>;
    370 def X86Shuf128 : SDNode<"X86ISD::SHUF128", SDTShuff3OpI>;
    371 
    372 def X86Movddup  : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
    373 def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>;
    374 def X86Movsldup : SDNode<"X86ISD::MOVSLDUP", SDTShuff1Op>;
    375 
    376 def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2Op>;
    377 def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>;
    378 
    379 def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>;
    380 def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>;
    381 def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
    382 
    383 def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
    384 def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
    385 
    386 def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
    387                                    SDTCisSameSizeAs<0,1>,
    388                                    SDTCisSameAs<1,2>]>;
    389 def X86Packss : SDNode<"X86ISD::PACKSS", SDTPack>;
    390 def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>;
    391 
    392 def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
    393 def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
    394 
    395 def X86vpmaddubsw  : SDNode<"X86ISD::VPMADDUBSW" , SDTPack>;
    396 def X86vpmaddwd    : SDNode<"X86ISD::VPMADDWD"   , SDTPack>;
    397 
    398 def X86VPermilpv  : SDNode<"X86ISD::VPERMILPV", SDTShuff2OpM>;
    399 def X86VPermilpi  : SDNode<"X86ISD::VPERMILPI", SDTShuff2OpI>;
    400 def X86VPermv     : SDNode<"X86ISD::VPERMV",
    401                            SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<1>,
    402                                                 SDTCisSameNumEltsAs<0,1>,
    403                                                 SDTCisSameSizeAs<0,1>,
    404                                                 SDTCisSameAs<0,2>]>>;
    405 def X86VPermi     : SDNode<"X86ISD::VPERMI",    SDTShuff2OpI>;
    406 def X86VPermt2     : SDNode<"X86ISD::VPERMV3",
    407                     SDTypeProfile<1, 3, [SDTCisVec<0>,
    408                                          SDTCisSameAs<0,1>, SDTCisInt<2>,
    409                                          SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2>,
    410                                          SDTCisSameSizeAs<0,2>,
    411                                          SDTCisSameAs<0,3>]>, []>;
    412 
    413 def X86VPermi2X   : SDNode<"X86ISD::VPERMIV3",
    414                     SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>,
    415                                          SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>,
    416                                          SDTCisSameSizeAs<0,1>,
    417                                          SDTCisSameAs<0,2>,
    418                                          SDTCisSameAs<0,3>]>, []>;
    419 
    420 def X86vpternlog  : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>;
    421 
    422 def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
    423 
    424 def X86VFixupimm   : SDNode<"X86ISD::VFIXUPIMM", SDTFPTernaryOpImmRound>;
    425 def X86VFixupimmScalar   : SDNode<"X86ISD::VFIXUPIMMS", SDTFPTernaryOpImmRound>;
    426 def X86VRange      : SDNode<"X86ISD::VRANGE",    SDTFPBinOpImmRound>;
    427 def X86VReduce     : SDNode<"X86ISD::VREDUCE",   SDTFPUnaryOpImmRound>;
    428 def X86VRndScale   : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImmRound>;
    429 def X86VGetMant    : SDNode<"X86ISD::VGETMANT",  SDTFPUnaryOpImmRound>;
    430 def X86Vfpclass    : SDNode<"X86ISD::VFPCLASS",
    431                        SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
    432                                             SDTCisVec<1>, SDTCisFP<1>,
    433                                             SDTCisSameNumEltsAs<0,1>,
    434                                             SDTCisVT<2, i32>]>, []>;
    435 def X86Vfpclasss   : SDNode<"X86ISD::VFPCLASSS",
    436                        SDTypeProfile<1, 2, [SDTCisVT<0, i1>,
    437                                             SDTCisFP<1>, SDTCisVT<2, i32>]>,[]>;
    438 
    439 def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
    440                     SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
    441                                          SDTCisSubVecOfVec<1, 0>]>, []>;
    442 
    443 def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
    444 def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
    445 def X86Vinsert   : SDNode<"X86ISD::VINSERT",  SDTypeProfile<1, 3,
    446                               [SDTCisSameAs<0, 1>, SDTCisEltOfVec<2, 1>,
    447                                SDTCisPtrTy<3>]>, []>;
    448 def X86Vextract   : SDNode<"X86ISD::VEXTRACT",  SDTypeProfile<1, 2,
    449                               [SDTCisEltOfVec<0, 1>, SDTCisVec<1>,
    450                                SDTCisPtrTy<2>]>, []>;
    451 
    452 def X86Blendi    : SDNode<"X86ISD::BLENDI",   SDTBlend>;
    453 
    454 def X86Addsub    : SDNode<"X86ISD::ADDSUB", SDTFPBinOp>;
    455 
    456 def X86faddRnd   : SDNode<"X86ISD::FADD_RND",  SDTFPBinOpRound>;
    457 def X86fsubRnd   : SDNode<"X86ISD::FSUB_RND",  SDTFPBinOpRound>;
    458 def X86fmulRnd   : SDNode<"X86ISD::FMUL_RND",  SDTFPBinOpRound>;
    459 def X86fdivRnd   : SDNode<"X86ISD::FDIV_RND",  SDTFPBinOpRound>;
    460 def X86fmaxRnd   : SDNode<"X86ISD::FMAX_RND",       SDTFPBinOpRound>;
    461 def X86scalef    : SDNode<"X86ISD::SCALEF",         SDTFPBinOpRound>;
    462 def X86scalefs   : SDNode<"X86ISD::SCALEFS",        SDTFPBinOpRound>;
    463 def X86fminRnd   : SDNode<"X86ISD::FMIN_RND",       SDTFPBinOpRound>;
    464 def X86fsqrtRnd     : SDNode<"X86ISD::FSQRT_RND",   SDTFPUnaryOpRound>;
    465 def X86fsqrtRnds    : SDNode<"X86ISD::FSQRT_RND",   SDTFPBinOpRound>;
    466 def X86fgetexpRnd   : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>;
    467 def X86fgetexpRnds  : SDNode<"X86ISD::FGETEXP_RND", SDTFPBinOpRound>;
    468 
    469 def X86Fmadd     : SDNode<"X86ISD::FMADD",     SDTFma>;
    470 def X86Fnmadd    : SDNode<"X86ISD::FNMADD",    SDTFma>;
    471 def X86Fmsub     : SDNode<"X86ISD::FMSUB",     SDTFma>;
    472 def X86Fnmsub    : SDNode<"X86ISD::FNMSUB",    SDTFma>;
    473 def X86Fmaddsub  : SDNode<"X86ISD::FMADDSUB",  SDTFma>;
    474 def X86Fmsubadd  : SDNode<"X86ISD::FMSUBADD",  SDTFma>;
    475 
    476 def X86FmaddRnd     : SDNode<"X86ISD::FMADD_RND",     SDTFmaRound>;
    477 def X86FnmaddRnd    : SDNode<"X86ISD::FNMADD_RND",    SDTFmaRound>;
    478 def X86FmsubRnd     : SDNode<"X86ISD::FMSUB_RND",     SDTFmaRound>;
    479 def X86FnmsubRnd    : SDNode<"X86ISD::FNMSUB_RND",    SDTFmaRound>;
    480 def X86FmaddsubRnd  : SDNode<"X86ISD::FMADDSUB_RND",  SDTFmaRound>;
    481 def X86FmsubaddRnd  : SDNode<"X86ISD::FMSUBADD_RND",  SDTFmaRound>;
    482 
    483 def x86vpmadd52l     : SDNode<"X86ISD::VPMADD52L",     SDTFma>;
    484 def x86vpmadd52h     : SDNode<"X86ISD::VPMADD52H",     SDTFma>;
    485 
    486 def X86rsqrt28   : SDNode<"X86ISD::RSQRT28",  SDTFPUnaryOpRound>;
    487 def X86rcp28     : SDNode<"X86ISD::RCP28",    SDTFPUnaryOpRound>;
    488 def X86exp2      : SDNode<"X86ISD::EXP2",     SDTFPUnaryOpRound>;
    489 
    490 def X86rsqrt28s  : SDNode<"X86ISD::RSQRT28",   SDTFPBinOpRound>;
    491 def X86rcp28s    : SDNode<"X86ISD::RCP28",     SDTFPBinOpRound>;
    492 def X86RndScales : SDNode<"X86ISD::VRNDSCALE", SDTFPBinOpImmRound>;
    493 def X86Reduces   : SDNode<"X86ISD::VREDUCE",   SDTFPBinOpImmRound>;
    494 def X86GetMants  : SDNode<"X86ISD::VGETMANT",  SDTFPBinOpImmRound>;
    495 
    496 def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
    497                                          SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
    498                                          SDTCisVT<4, i8>]>;
    499 def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
    500                                          SDTCisVT<2, v16i8>, SDTCisVT<3, i32>,
    501                                          SDTCisVT<4, v16i8>, SDTCisVT<5, i32>,
    502                                          SDTCisVT<6, i8>]>;
    503 
    504 def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>;
    505 def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>;
    506 
    507 def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 1,
    508                               [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
    509 def X86expand  : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1,
    510                               [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
    511 
    512 def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
    513                                           SDTCisSameAs<0,1>, SDTCisInt<2>,
    514                                           SDTCisVT<3, i32>]>;
    515 
    516 def SDTFloatToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
    517                                         SDTCisInt<0>, SDTCisFP<1>]>;
    518 
    519 def SDTFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
    520                                            SDTCisInt<0>, SDTCisFP<1>,
    521                                            SDTCisVT<2, i32>]>;
    522 def SDTSFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisFP<1>,
    523                                             SDTCisVec<1>, SDTCisVT<2, i32>]>;
    524 def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
    525                                            SDTCisFP<0>, SDTCisInt<1>,
    526                                            SDTCisVT<2, i32>]>;
    527 
    528 // Scalar
    529 def X86SintToFpRnd  : SDNode<"X86ISD::SINT_TO_FP_RND",  SDTintToFPRound>;
    530 def X86UintToFpRnd  : SDNode<"X86ISD::UINT_TO_FP_RND",  SDTintToFPRound>;
    531 
    532 def X86cvtts2IntRnd      : SDNode<"X86ISD::FP_TO_SINT_RND",  SDTSFloatToIntRnd>;
    533 def X86cvtts2UIntRnd     : SDNode<"X86ISD::FP_TO_UINT_RND",  SDTSFloatToIntRnd>;
    534 
    535 def  X86cvts2si  : SDNode<"X86ISD::SCALAR_FP_TO_SINT_RND", SDTSFloatToIntRnd>;
    536 def  X86cvts2usi : SDNode<"X86ISD::SCALAR_FP_TO_UINT_RND", SDTSFloatToIntRnd>;
    537 
    538 // Vector with rounding mode
    539 
    540 // cvtt fp-to-int staff
    541 def X86VFpToSintRnd   : SDNode<"ISD::FP_TO_SINT",  SDTFloatToIntRnd>;
    542 def X86VFpToUintRnd   : SDNode<"ISD::FP_TO_UINT",  SDTFloatToIntRnd>;
    543 
    544 def X86VSintToFpRnd   : SDNode<"ISD::SINT_TO_FP",  SDTVintToFPRound>;
    545 def X86VUintToFpRnd   : SDNode<"ISD::UINT_TO_FP",  SDTVintToFPRound>;
    546 
    547 // cvt fp-to-int staff
    548 def X86cvtp2IntRnd      : SDNode<"X86ISD::FP_TO_SINT_RND",  SDTFloatToIntRnd>;
    549 def X86cvtp2UIntRnd     : SDNode<"X86ISD::FP_TO_UINT_RND",  SDTFloatToIntRnd>;
    550 
    551 // Vector without rounding mode
    552 def X86cvtp2Int      : SDNode<"X86ISD::FP_TO_SINT_RND",  SDTFloatToInt>;
    553 def X86cvtp2UInt     : SDNode<"X86ISD::FP_TO_UINT_RND",  SDTFloatToInt>;
    554 
    555 def X86cvtph2ps     : SDNode<"ISD::FP16_TO_FP",
    556                               SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
    557                                                    SDTCVecEltisVT<1, i16>,
    558                                                    SDTCisVT<2, i32>]> >;
    559 
    560 def X86cvtps2ph   : SDNode<"ISD::FP_TO_FP16",
    561                         SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>,
    562                                              SDTCVecEltisVT<1, f32>,
    563                                              SDTCisVT<2, i32>,
    564                                              SDTCisVT<3, i32>]> >;
    565 def X86vfpextRnd  : SDNode<"X86ISD::VFPEXT",
    566                         SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
    567                                              SDTCVecEltisVT<1, f32>,
    568                                              SDTCisOpSmallerThanOp<1, 0>,
    569                                              SDTCisVT<2, i32>]>>;
    570 def X86vfproundRnd: SDNode<"X86ISD::VFPROUND",
    571                         SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
    572                                              SDTCVecEltisVT<1, f64>,
    573                                              SDTCisOpSmallerThanOp<0, 1>,
    574                                              SDTCisVT<2, i32>]>>;
    575 
    576 def X86cvt2mask   : SDNode<"X86ISD::CVT2MASK", SDTIntTruncOp>;
    577 
    578 //===----------------------------------------------------------------------===//
    579 // SSE Complex Patterns
    580 //===----------------------------------------------------------------------===//
    581 
    582 // These are 'extloads' from a scalar to the low element of a vector, zeroing
    583 // the top elements.  These are used for the SSE 'ss' and 'sd' instruction
    584 // forms.
    585 def sse_load_f32 : ComplexPattern<v4f32, 5, "selectScalarSSELoad", [],
    586                                   [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
    587                                    SDNPWantRoot]>;
    588 def sse_load_f64 : ComplexPattern<v2f64, 5, "selectScalarSSELoad", [],
    589                                   [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
    590                                    SDNPWantRoot]>;
    591 
    592 def ssmem : Operand<v4f32> {
    593   let PrintMethod = "printf32mem";
    594   let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, SEGMENT_REG);
    595   let ParserMatchClass = X86Mem32AsmOperand;
    596   let OperandType = "OPERAND_MEMORY";
    597 }
    598 def sdmem : Operand<v2f64> {
    599   let PrintMethod = "printf64mem";
    600   let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, SEGMENT_REG);
    601   let ParserMatchClass = X86Mem64AsmOperand;
    602   let OperandType = "OPERAND_MEMORY";
    603 }
    604 
    605 //===----------------------------------------------------------------------===//
    606 // SSE pattern fragments
    607 //===----------------------------------------------------------------------===//
    608 
    609 // 128-bit load pattern fragments
    610 // NOTE: all 128-bit integer vector loads are promoted to v2i64
    611 def loadv4f32    : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
    612 def loadv2f64    : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
    613 def loadv2i64    : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
    614 
    615 // 256-bit load pattern fragments
    616 // NOTE: all 256-bit integer vector loads are promoted to v4i64
    617 def loadv8f32    : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
    618 def loadv4f64    : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
    619 def loadv4i64    : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
    620 
    621 // 512-bit load pattern fragments
    622 def loadv16f32   : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>;
    623 def loadv8f64    : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>;
    624 def loadv64i8    : PatFrag<(ops node:$ptr), (v64i8 (load node:$ptr))>;
    625 def loadv32i16   : PatFrag<(ops node:$ptr), (v32i16 (load node:$ptr))>;
    626 def loadv16i32   : PatFrag<(ops node:$ptr), (v16i32 (load node:$ptr))>;
    627 def loadv8i64    : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>;
    628 
    629 // 128-/256-/512-bit extload pattern fragments
    630 def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
    631 def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>;
    632 def extloadv8f32 : PatFrag<(ops node:$ptr), (v8f64 (extloadvf32 node:$ptr))>;
    633 
    634 // These are needed to match a scalar load that is used in a vector-only
    635 // math instruction such as the FP logical ops: andps, andnps, orps, xorps.
    636 // The memory operand is required to be a 128-bit load, so it must be converted
    637 // from a vector to a scalar.
    638 def loadf32_128 : PatFrag<(ops node:$ptr),
    639   (f32 (extractelt (loadv4f32 node:$ptr), (iPTR 0)))>;
    640 def loadf64_128 : PatFrag<(ops node:$ptr),
    641   (f64 (extractelt (loadv2f64 node:$ptr), (iPTR 0)))>;
    642 
    643 // Like 'store', but always requires 128-bit vector alignment.
    644 def alignedstore : PatFrag<(ops node:$val, node:$ptr),
    645                            (store node:$val, node:$ptr), [{
    646   return cast<StoreSDNode>(N)->getAlignment() >= 16;
    647 }]>;
    648 
    649 // Like 'store', but always requires 256-bit vector alignment.
    650 def alignedstore256 : PatFrag<(ops node:$val, node:$ptr),
    651                               (store node:$val, node:$ptr), [{
    652   return cast<StoreSDNode>(N)->getAlignment() >= 32;
    653 }]>;
    654 
    655 // Like 'store', but always requires 512-bit vector alignment.
    656 def alignedstore512 : PatFrag<(ops node:$val, node:$ptr),
    657                               (store node:$val, node:$ptr), [{
    658   return cast<StoreSDNode>(N)->getAlignment() >= 64;
    659 }]>;
    660 
    661 // Like 'load', but always requires 128-bit vector alignment.
    662 def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
    663   return cast<LoadSDNode>(N)->getAlignment() >= 16;
    664 }]>;
    665 
    666 // Like 'load', but always requires 256-bit vector alignment.
    667 def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
    668   return cast<LoadSDNode>(N)->getAlignment() >= 32;
    669 }]>;
    670 
    671 // Like 'load', but always requires 512-bit vector alignment.
    672 def alignedload512 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
    673   return cast<LoadSDNode>(N)->getAlignment() >= 64;
    674 }]>;
    675 
    676 def alignedloadfsf32 : PatFrag<(ops node:$ptr),
    677                                (f32 (alignedload node:$ptr))>;
    678 def alignedloadfsf64 : PatFrag<(ops node:$ptr),
    679                                (f64 (alignedload node:$ptr))>;
    680 
    681 // 128-bit aligned load pattern fragments
    682 // NOTE: all 128-bit integer vector loads are promoted to v2i64
    683 def alignedloadv4f32 : PatFrag<(ops node:$ptr),
    684                                (v4f32 (alignedload node:$ptr))>;
    685 def alignedloadv2f64 : PatFrag<(ops node:$ptr),
    686                                (v2f64 (alignedload node:$ptr))>;
    687 def alignedloadv2i64 : PatFrag<(ops node:$ptr),
    688                                (v2i64 (alignedload node:$ptr))>;
    689 
    690 // 256-bit aligned load pattern fragments
    691 // NOTE: all 256-bit integer vector loads are promoted to v4i64
    692 def alignedloadv8f32 : PatFrag<(ops node:$ptr),
    693                                (v8f32 (alignedload256 node:$ptr))>;
    694 def alignedloadv4f64 : PatFrag<(ops node:$ptr),
    695                                (v4f64 (alignedload256 node:$ptr))>;
    696 def alignedloadv4i64 : PatFrag<(ops node:$ptr),
    697                                (v4i64 (alignedload256 node:$ptr))>;
    698 
    699 // 512-bit aligned load pattern fragments
    700 def alignedloadv16f32 : PatFrag<(ops node:$ptr),
    701                                 (v16f32 (alignedload512 node:$ptr))>;
    702 def alignedloadv16i32 : PatFrag<(ops node:$ptr),
    703                                 (v16i32 (alignedload512 node:$ptr))>;
    704 def alignedloadv8f64  : PatFrag<(ops node:$ptr),
    705                                 (v8f64  (alignedload512 node:$ptr))>;
    706 def alignedloadv8i64  : PatFrag<(ops node:$ptr),
    707                                 (v8i64  (alignedload512 node:$ptr))>;
    708 
    709 // Like 'load', but uses special alignment checks suitable for use in
    710 // memory operands in most SSE instructions, which are required to
    711 // be naturally aligned on some targets but not on others.  If the subtarget
    712 // allows unaligned accesses, match any load, though this may require
    713 // setting a feature bit in the processor (on startup, for example).
    714 // Opteron 10h and later implement such a feature.
    715 def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
    716   return    Subtarget->hasSSEUnalignedMem()
    717          || cast<LoadSDNode>(N)->getAlignment() >= 16;
    718 }]>;
    719 
    720 def memopfsf32 : PatFrag<(ops node:$ptr), (f32   (memop node:$ptr))>;
    721 def memopfsf64 : PatFrag<(ops node:$ptr), (f64   (memop node:$ptr))>;
    722 
    723 // 128-bit memop pattern fragments
    724 // NOTE: all 128-bit integer vector loads are promoted to v2i64
    725 def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
    726 def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
    727 def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
    728 
    729 // These are needed to match a scalar memop that is used in a vector-only
    730 // math instruction such as the FP logical ops: andps, andnps, orps, xorps.
    731 // The memory operand is required to be a 128-bit load, so it must be converted
    732 // from a vector to a scalar.
    733 def memopfsf32_128 : PatFrag<(ops node:$ptr),
    734   (f32 (extractelt (memopv4f32 node:$ptr), (iPTR 0)))>;
    735 def memopfsf64_128 : PatFrag<(ops node:$ptr),
    736   (f64 (extractelt (memopv2f64 node:$ptr), (iPTR 0)))>;
    737 
    738 
    739 // SSSE3 uses MMX registers for some instructions. They aren't aligned on a
    740 // 16-byte boundary.
    741 // FIXME: 8 byte alignment for mmx reads is not required
    742 def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
    743   return cast<LoadSDNode>(N)->getAlignment() >= 8;
    744 }]>;
    745 
    746 def memopmmx  : PatFrag<(ops node:$ptr), (x86mmx  (memop64 node:$ptr))>;
    747 
    748 def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    749   (masked_gather node:$src1, node:$src2, node:$src3) , [{
    750   if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
    751     return (Mgt->getIndex().getValueType() == MVT::v4i32 ||
    752             Mgt->getBasePtr().getValueType() == MVT::v4i32);
    753   return false;
    754 }]>;
    755 
    756 def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    757   (masked_gather node:$src1, node:$src2, node:$src3) , [{
    758   if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
    759     return (Mgt->getIndex().getValueType() == MVT::v8i32 ||
    760             Mgt->getBasePtr().getValueType() == MVT::v8i32);
    761   return false;
    762 }]>;
    763 
    764 def mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    765   (masked_gather node:$src1, node:$src2, node:$src3) , [{
    766   if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
    767     return (Mgt->getIndex().getValueType() == MVT::v2i64 ||
    768             Mgt->getBasePtr().getValueType() == MVT::v2i64);
    769   return false;
    770 }]>;
    771 def mgatherv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    772   (masked_gather node:$src1, node:$src2, node:$src3) , [{
    773   if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
    774     return (Mgt->getIndex().getValueType() == MVT::v4i64 ||
    775             Mgt->getBasePtr().getValueType() == MVT::v4i64);
    776   return false;
    777 }]>;
    778 def mgatherv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    779   (masked_gather node:$src1, node:$src2, node:$src3) , [{
    780   if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
    781     return (Mgt->getIndex().getValueType() == MVT::v8i64 ||
    782             Mgt->getBasePtr().getValueType() == MVT::v8i64);
    783   return false;
    784 }]>;
    785 def mgatherv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    786   (masked_gather node:$src1, node:$src2, node:$src3) , [{
    787   if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
    788     return (Mgt->getIndex().getValueType() == MVT::v16i32 ||
    789             Mgt->getBasePtr().getValueType() == MVT::v16i32);
    790   return false;
    791 }]>;
    792 
    793 def mscatterv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    794   (masked_scatter node:$src1, node:$src2, node:$src3) , [{
    795   if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
    796     return (Sc->getIndex().getValueType() == MVT::v2i64 ||
    797             Sc->getBasePtr().getValueType() == MVT::v2i64);
    798   return false;
    799 }]>;
    800 
    801 def mscatterv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    802   (masked_scatter node:$src1, node:$src2, node:$src3) , [{
    803   if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
    804     return (Sc->getIndex().getValueType() == MVT::v4i32 ||
    805             Sc->getBasePtr().getValueType() == MVT::v4i32);
    806   return false;
    807 }]>;
    808 
    809 def mscatterv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    810   (masked_scatter node:$src1, node:$src2, node:$src3) , [{
    811   if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
    812     return (Sc->getIndex().getValueType() == MVT::v4i64 ||
    813             Sc->getBasePtr().getValueType() == MVT::v4i64);
    814   return false;
    815 }]>;
    816 
    817 def mscatterv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    818   (masked_scatter node:$src1, node:$src2, node:$src3) , [{
    819   if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
    820     return (Sc->getIndex().getValueType() == MVT::v8i32 ||
    821             Sc->getBasePtr().getValueType() == MVT::v8i32);
    822   return false;
    823 }]>;
    824 
    825 def mscatterv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    826   (masked_scatter node:$src1, node:$src2, node:$src3) , [{
    827   if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
    828     return (Sc->getIndex().getValueType() == MVT::v8i64 ||
    829             Sc->getBasePtr().getValueType() == MVT::v8i64);
    830   return false;
    831 }]>;
    832 def mscatterv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    833   (masked_scatter node:$src1, node:$src2, node:$src3) , [{
    834   if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
    835     return (Sc->getIndex().getValueType() == MVT::v16i32 ||
    836             Sc->getBasePtr().getValueType() == MVT::v16i32);
    837   return false;
    838 }]>;
    839 
    840 // 128-bit bitconvert pattern fragments
    841 def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
    842 def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
    843 def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
    844 def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
    845 def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
    846 def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
    847 
    848 // 256-bit bitconvert pattern fragments
    849 def bc_v32i8 : PatFrag<(ops node:$in), (v32i8 (bitconvert node:$in))>;
    850 def bc_v16i16 : PatFrag<(ops node:$in), (v16i16 (bitconvert node:$in))>;
    851 def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
    852 def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
    853 def bc_v8f32 : PatFrag<(ops node:$in), (v8f32 (bitconvert node:$in))>;
    854 
    855 // 512-bit bitconvert pattern fragments
    856 def bc_v16i32 : PatFrag<(ops node:$in), (v16i32 (bitconvert node:$in))>;
    857 def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>;
    858 def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>;
    859 def bc_v16f32 : PatFrag<(ops node:$in), (v16f32 (bitconvert node:$in))>;
    860 
    861 def vzmovl_v2i64 : PatFrag<(ops node:$src),
    862                            (bitconvert (v2i64 (X86vzmovl
    863                              (v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
    864 def vzmovl_v4i32 : PatFrag<(ops node:$src),
    865                            (bitconvert (v4i32 (X86vzmovl
    866                              (v4i32 (scalar_to_vector (loadi32 node:$src))))))>;
    867 
    868 def vzload_v2i64 : PatFrag<(ops node:$src),
    869                            (bitconvert (v2i64 (X86vzload node:$src)))>;
    870 
    871 
    872 def fp32imm0 : PatLeaf<(f32 fpimm), [{
    873   return N->isExactlyValue(+0.0);
    874 }]>;
    875 
    876 def I8Imm : SDNodeXForm<imm, [{
    877   // Transformation function: get the low 8 bits.
    878   return getI8Imm((uint8_t)N->getZExtValue(), SDLoc(N));
    879 }]>;
    880 
    881 def FROUND_NO_EXC : ImmLeaf<i32, [{ return Imm == 8; }]>;
    882 def FROUND_CURRENT : ImmLeaf<i32, [{
    883   return Imm == X86::STATIC_ROUNDING::CUR_DIRECTION;
    884 }]>;
    885 
    886 // BYTE_imm - Transform bit immediates into byte immediates.
    887 def BYTE_imm  : SDNodeXForm<imm, [{
    888   // Transformation function: imm >> 3
    889   return getI32Imm(N->getZExtValue() >> 3, SDLoc(N));
    890 }]>;
    891 
    892 // EXTRACT_get_vextract128_imm xform function: convert extract_subvector index
    893 // to VEXTRACTF128/VEXTRACTI128 imm.
    894 def EXTRACT_get_vextract128_imm : SDNodeXForm<extract_subvector, [{
    895   return getI8Imm(X86::getExtractVEXTRACT128Immediate(N), SDLoc(N));
    896 }]>;
    897 
    898 // INSERT_get_vinsert128_imm xform function: convert insert_subvector index to
    899 // VINSERTF128/VINSERTI128 imm.
    900 def INSERT_get_vinsert128_imm : SDNodeXForm<insert_subvector, [{
    901   return getI8Imm(X86::getInsertVINSERT128Immediate(N), SDLoc(N));
    902 }]>;
    903 
    904 // EXTRACT_get_vextract256_imm xform function: convert extract_subvector index
    905 // to VEXTRACTF64x4 imm.
    906 def EXTRACT_get_vextract256_imm : SDNodeXForm<extract_subvector, [{
    907   return getI8Imm(X86::getExtractVEXTRACT256Immediate(N), SDLoc(N));
    908 }]>;
    909 
    910 // INSERT_get_vinsert256_imm xform function: convert insert_subvector index to
    911 // VINSERTF64x4 imm.
    912 def INSERT_get_vinsert256_imm : SDNodeXForm<insert_subvector, [{
    913   return getI8Imm(X86::getInsertVINSERT256Immediate(N), SDLoc(N));
    914 }]>;
    915 
    916 def vextract128_extract : PatFrag<(ops node:$bigvec, node:$index),
    917                                    (extract_subvector node:$bigvec,
    918                                                       node:$index), [{
    919   return X86::isVEXTRACT128Index(N);
    920 }], EXTRACT_get_vextract128_imm>;
    921 
    922 def vinsert128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
    923                                       node:$index),
    924                                  (insert_subvector node:$bigvec, node:$smallvec,
    925                                                    node:$index), [{
    926   return X86::isVINSERT128Index(N);
    927 }], INSERT_get_vinsert128_imm>;
    928 
    929 
    930 def vextract256_extract : PatFrag<(ops node:$bigvec, node:$index),
    931                                    (extract_subvector node:$bigvec,
    932                                                       node:$index), [{
    933   return X86::isVEXTRACT256Index(N);
    934 }], EXTRACT_get_vextract256_imm>;
    935 
    936 def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
    937                                       node:$index),
    938                                  (insert_subvector node:$bigvec, node:$smallvec,
    939                                                    node:$index), [{
    940   return X86::isVINSERT256Index(N);
    941 }], INSERT_get_vinsert256_imm>;
    942 
    943 def masked_load_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    944                          (masked_load node:$src1, node:$src2, node:$src3), [{
    945   if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
    946     return Load->getAlignment() >= 16;
    947   return false;
    948 }]>;
    949 
    950 def masked_load_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    951                          (masked_load node:$src1, node:$src2, node:$src3), [{
    952   if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
    953     return Load->getAlignment() >= 32;
    954   return false;
    955 }]>;
    956 
    957 def masked_load_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    958                          (masked_load node:$src1, node:$src2, node:$src3), [{
    959   if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
    960     return Load->getAlignment() >= 64;
    961   return false;
    962 }]>;
    963 
    964 def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    965                          (masked_load node:$src1, node:$src2, node:$src3), [{
    966   return isa<MaskedLoadSDNode>(N);
    967 }]>;
    968 
    969 // Masked store fragments.
    970 // X86mstore can't be implemented in core DAG files because some targets
    971 // do not support vector types (llvm-tblgen will fail).
    972 def X86mstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    973                         (masked_store node:$src1, node:$src2, node:$src3), [{
    974   return !cast<MaskedStoreSDNode>(N)->isTruncatingStore();
    975 }]>;
    976 
    977 def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    978                          (X86mstore node:$src1, node:$src2, node:$src3), [{
    979   if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
    980     return Store->getAlignment() >= 16;
    981   return false;
    982 }]>;
    983 
    984 def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    985                          (X86mstore node:$src1, node:$src2, node:$src3), [{
    986   if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
    987     return Store->getAlignment() >= 32;
    988   return false;
    989 }]>;
    990 
    991 def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    992                          (X86mstore node:$src1, node:$src2, node:$src3), [{
    993   if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
    994     return Store->getAlignment() >= 64;
    995   return false;
    996 }]>;
    997 
    998 def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
    999                          (X86mstore node:$src1, node:$src2, node:$src3), [{
   1000   return isa<MaskedStoreSDNode>(N);
   1001 }]>;
   1002 
   1003 // masked truncstore fragments
   1004 // X86mtruncstore can't be implemented in core DAG files because some targets
   1005 // doesn't support vector type ( llvm-tblgen will fail)
   1006 def X86mtruncstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
   1007                              (masked_store node:$src1, node:$src2, node:$src3), [{
   1008     return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
   1009 }]>;
   1010 def masked_truncstorevi8 :
   1011   PatFrag<(ops node:$src1, node:$src2, node:$src3),
   1012           (X86mtruncstore node:$src1, node:$src2, node:$src3), [{
   1013   return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
   1014 }]>;
   1015 def masked_truncstorevi16 :
   1016   PatFrag<(ops node:$src1, node:$src2, node:$src3),
   1017           (X86mtruncstore node:$src1, node:$src2, node:$src3), [{
   1018   return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
   1019 }]>;
   1020 def masked_truncstorevi32 :
   1021   PatFrag<(ops node:$src1, node:$src2, node:$src3),
   1022           (X86mtruncstore node:$src1, node:$src2, node:$src3), [{
   1023   return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
   1024 }]>;
   1025