Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -mattr=+avx,+fma4,+xop | FileCheck %s --check-prefix=ALL --check-prefix=X32
      3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+xop | FileCheck %s --check-prefix=ALL --check-prefix=X64
      4 
      5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/xop-builtins.c
      6 
      7 define <2 x i64> @test_mm_maccs_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
      8 ; X32-LABEL: test_mm_maccs_epi16:
      9 ; X32:       # BB#0:
     10 ; X32-NEXT:    vpmacssww %xmm2, %xmm1, %xmm0, %xmm0
     11 ; X32-NEXT:    retl
     12 ;
     13 ; X64-LABEL: test_mm_maccs_epi16:
     14 ; X64:       # BB#0:
     15 ; X64-NEXT:    vpmacssww %xmm2, %xmm1, %xmm0, %xmm0
     16 ; X64-NEXT:    retq
     17   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
     18   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
     19   %arg2 = bitcast <2 x i64> %a2 to <8 x i16>
     20   %res = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %arg0, <8 x i16> %arg1, <8 x i16> %arg2)
     21   %bc = bitcast <8 x i16> %res to <2 x i64>
     22   ret <2 x i64> %bc
     23 }
     24 declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
     25 
     26 define <2 x i64> @test_mm_macc_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
     27 ; X32-LABEL: test_mm_macc_epi16:
     28 ; X32:       # BB#0:
     29 ; X32-NEXT:    vpmacsww %xmm2, %xmm1, %xmm0, %xmm0
     30 ; X32-NEXT:    retl
     31 ;
     32 ; X64-LABEL: test_mm_macc_epi16:
     33 ; X64:       # BB#0:
     34 ; X64-NEXT:    vpmacsww %xmm2, %xmm1, %xmm0, %xmm0
     35 ; X64-NEXT:    retq
     36   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
     37   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
     38   %arg2 = bitcast <2 x i64> %a2 to <8 x i16>
     39   %res = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %arg0, <8 x i16> %arg1, <8 x i16> %arg2)
     40   %bc = bitcast <8 x i16> %res to <2 x i64>
     41   ret <2 x i64> %bc
     42 }
     43 declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
     44 
     45 define <2 x i64> @test_mm_maccsd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
     46 ; X32-LABEL: test_mm_maccsd_epi16:
     47 ; X32:       # BB#0:
     48 ; X32-NEXT:    vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0
     49 ; X32-NEXT:    retl
     50 ;
     51 ; X64-LABEL: test_mm_maccsd_epi16:
     52 ; X64:       # BB#0:
     53 ; X64-NEXT:    vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0
     54 ; X64-NEXT:    retq
     55   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
     56   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
     57   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
     58   %res = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %arg0, <8 x i16> %arg1, <4 x i32> %arg2)
     59   %bc = bitcast <4 x i32> %res to <2 x i64>
     60   ret <2 x i64> %bc
     61 }
     62 declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
     63 
     64 define <2 x i64> @test_mm_maccd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
     65 ; X32-LABEL: test_mm_maccd_epi16:
     66 ; X32:       # BB#0:
     67 ; X32-NEXT:    vpmacswd %xmm2, %xmm1, %xmm0, %xmm0
     68 ; X32-NEXT:    retl
     69 ;
     70 ; X64-LABEL: test_mm_maccd_epi16:
     71 ; X64:       # BB#0:
     72 ; X64-NEXT:    vpmacswd %xmm2, %xmm1, %xmm0, %xmm0
     73 ; X64-NEXT:    retq
     74   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
     75   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
     76   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
     77   %res = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %arg0, <8 x i16> %arg1, <4 x i32> %arg2)
     78   %bc = bitcast <4 x i32> %res to <2 x i64>
     79   ret <2 x i64> %bc
     80 }
     81 declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
     82 
     83 define <2 x i64> @test_mm_maccs_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
     84 ; X32-LABEL: test_mm_maccs_epi32:
     85 ; X32:       # BB#0:
     86 ; X32-NEXT:    vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0
     87 ; X32-NEXT:    retl
     88 ;
     89 ; X64-LABEL: test_mm_maccs_epi32:
     90 ; X64:       # BB#0:
     91 ; X64-NEXT:    vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0
     92 ; X64-NEXT:    retq
     93   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
     94   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
     95   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
     96   %res = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2)
     97   %bc = bitcast <4 x i32> %res to <2 x i64>
     98   ret <2 x i64> %bc
     99 }
    100 declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
    101 
    102 define <2 x i64> @test_mm_macc_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
    103 ; X32-LABEL: test_mm_macc_epi32:
    104 ; X32:       # BB#0:
    105 ; X32-NEXT:    vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0
    106 ; X32-NEXT:    retl
    107 ;
    108 ; X64-LABEL: test_mm_macc_epi32:
    109 ; X64:       # BB#0:
    110 ; X64-NEXT:    vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0
    111 ; X64-NEXT:    retq
    112   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
    113   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
    114   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
    115   %res = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2)
    116   %bc = bitcast <4 x i32> %res to <2 x i64>
    117   ret <2 x i64> %bc
    118 }
    119 declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
    120 
    121 define <2 x i64> @test_mm_maccslo_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
    122 ; X32-LABEL: test_mm_maccslo_epi32:
    123 ; X32:       # BB#0:
    124 ; X32-NEXT:    vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0
    125 ; X32-NEXT:    retl
    126 ;
    127 ; X64-LABEL: test_mm_maccslo_epi32:
    128 ; X64:       # BB#0:
    129 ; X64-NEXT:    vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0
    130 ; X64-NEXT:    retq
    131   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
    132   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
    133   %res = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2)
    134   ret <2 x i64> %res
    135 }
    136 declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
    137 
    138 define <2 x i64> @test_mm_macclo_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
    139 ; X32-LABEL: test_mm_macclo_epi32:
    140 ; X32:       # BB#0:
    141 ; X32-NEXT:    vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0
    142 ; X32-NEXT:    retl
    143 ;
    144 ; X64-LABEL: test_mm_macclo_epi32:
    145 ; X64:       # BB#0:
    146 ; X64-NEXT:    vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0
    147 ; X64-NEXT:    retq
    148   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
    149   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
    150   %res = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2)
    151   ret <2 x i64> %res
    152 }
    153 declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
    154 
    155 define <2 x i64> @test_mm_maccshi_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
    156 ; X32-LABEL: test_mm_maccshi_epi32:
    157 ; X32:       # BB#0:
    158 ; X32-NEXT:    vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0
    159 ; X32-NEXT:    retl
    160 ;
    161 ; X64-LABEL: test_mm_maccshi_epi32:
    162 ; X64:       # BB#0:
    163 ; X64-NEXT:    vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0
    164 ; X64-NEXT:    retq
    165   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
    166   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
    167   %res = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2)
    168   ret <2 x i64> %res
    169 }
    170 declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
    171 
    172 define <2 x i64> @test_mm_macchi_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
    173 ; X32-LABEL: test_mm_macchi_epi32:
    174 ; X32:       # BB#0:
    175 ; X32-NEXT:    vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0
    176 ; X32-NEXT:    retl
    177 ;
    178 ; X64-LABEL: test_mm_macchi_epi32:
    179 ; X64:       # BB#0:
    180 ; X64-NEXT:    vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0
    181 ; X64-NEXT:    retq
    182   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
    183   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
    184   %res = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2)
    185   ret <2 x i64> %res
    186 }
    187 declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
    188 
    189 define <2 x i64> @test_mm_maddsd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
    190 ; X32-LABEL: test_mm_maddsd_epi16:
    191 ; X32:       # BB#0:
    192 ; X32-NEXT:    vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0
    193 ; X32-NEXT:    retl
    194 ;
    195 ; X64-LABEL: test_mm_maddsd_epi16:
    196 ; X64:       # BB#0:
    197 ; X64-NEXT:    vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0
    198 ; X64-NEXT:    retq
    199   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    200   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
    201   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
    202   %res = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %arg0, <8 x i16> %arg1, <4 x i32> %arg2)
    203   %bc = bitcast <4 x i32> %res to <2 x i64>
    204   ret <2 x i64> %bc
    205 }
    206 declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
    207 
    208 define <2 x i64> @test_mm_maddd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
    209 ; X32-LABEL: test_mm_maddd_epi16:
    210 ; X32:       # BB#0:
    211 ; X32-NEXT:    vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0
    212 ; X32-NEXT:    retl
    213 ;
    214 ; X64-LABEL: test_mm_maddd_epi16:
    215 ; X64:       # BB#0:
    216 ; X64-NEXT:    vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0
    217 ; X64-NEXT:    retq
    218   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    219   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
    220   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
    221   %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %arg0, <8 x i16> %arg1, <4 x i32> %arg2)
    222   %bc = bitcast <4 x i32> %res to <2 x i64>
    223   ret <2 x i64> %bc
    224 }
    225 declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
    226 
    227 define <2 x i64> @test_mm_haddw_epi8(<2 x i64> %a0) {
    228 ; X32-LABEL: test_mm_haddw_epi8:
    229 ; X32:       # BB#0:
    230 ; X32-NEXT:    vphaddbw %xmm0, %xmm0
    231 ; X32-NEXT:    retl
    232 ;
    233 ; X64-LABEL: test_mm_haddw_epi8:
    234 ; X64:       # BB#0:
    235 ; X64-NEXT:    vphaddbw %xmm0, %xmm0
    236 ; X64-NEXT:    retq
    237   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    238   %res = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %arg0)
    239   %bc = bitcast <8 x i16> %res to <2 x i64>
    240   ret <2 x i64> %bc
    241 }
    242 declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone
    243 
    244 define <2 x i64> @test_mm_haddd_epi8(<2 x i64> %a0) {
    245 ; X32-LABEL: test_mm_haddd_epi8:
    246 ; X32:       # BB#0:
    247 ; X32-NEXT:    vphaddbd %xmm0, %xmm0
    248 ; X32-NEXT:    retl
    249 ;
    250 ; X64-LABEL: test_mm_haddd_epi8:
    251 ; X64:       # BB#0:
    252 ; X64-NEXT:    vphaddbd %xmm0, %xmm0
    253 ; X64-NEXT:    retq
    254   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    255   %res = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %arg0)
    256   %bc = bitcast <4 x i32> %res to <2 x i64>
    257   ret <2 x i64> %bc
    258 }
    259 declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone
    260 
    261 define <2 x i64> @test_mm_haddq_epi8(<2 x i64> %a0) {
    262 ; X32-LABEL: test_mm_haddq_epi8:
    263 ; X32:       # BB#0:
    264 ; X32-NEXT:    vphaddbq %xmm0, %xmm0
    265 ; X32-NEXT:    retl
    266 ;
    267 ; X64-LABEL: test_mm_haddq_epi8:
    268 ; X64:       # BB#0:
    269 ; X64-NEXT:    vphaddbq %xmm0, %xmm0
    270 ; X64-NEXT:    retq
    271   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    272   %res = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %arg0)
    273   ret <2 x i64> %res
    274 }
    275 declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone
    276 
    277 define <2 x i64> @test_mm_haddd_epi16(<2 x i64> %a0) {
    278 ; X32-LABEL: test_mm_haddd_epi16:
    279 ; X32:       # BB#0:
    280 ; X32-NEXT:    vphaddwd %xmm0, %xmm0
    281 ; X32-NEXT:    retl
    282 ;
    283 ; X64-LABEL: test_mm_haddd_epi16:
    284 ; X64:       # BB#0:
    285 ; X64-NEXT:    vphaddwd %xmm0, %xmm0
    286 ; X64-NEXT:    retq
    287   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    288   %res = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %arg0)
    289   %bc = bitcast <4 x i32> %res to <2 x i64>
    290   ret <2 x i64> %bc
    291 }
    292 declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone
    293 
    294 define <2 x i64> @test_mm_haddq_epi16(<2 x i64> %a0) {
    295 ; X32-LABEL: test_mm_haddq_epi16:
    296 ; X32:       # BB#0:
    297 ; X32-NEXT:    vphaddwq %xmm0, %xmm0
    298 ; X32-NEXT:    retl
    299 ;
    300 ; X64-LABEL: test_mm_haddq_epi16:
    301 ; X64:       # BB#0:
    302 ; X64-NEXT:    vphaddwq %xmm0, %xmm0
    303 ; X64-NEXT:    retq
    304   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    305   %res = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %arg0)
    306   ret <2 x i64> %res
    307 }
    308 declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone
    309 
    310 define <2 x i64> @test_mm_haddq_epi32(<2 x i64> %a0) {
    311 ; X32-LABEL: test_mm_haddq_epi32:
    312 ; X32:       # BB#0:
    313 ; X32-NEXT:    vphadddq %xmm0, %xmm0
    314 ; X32-NEXT:    retl
    315 ;
    316 ; X64-LABEL: test_mm_haddq_epi32:
    317 ; X64:       # BB#0:
    318 ; X64-NEXT:    vphadddq %xmm0, %xmm0
    319 ; X64-NEXT:    retq
    320   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
    321   %res = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %arg0)
    322   ret <2 x i64> %res
    323 }
    324 declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone
    325 
    326 define <2 x i64> @test_mm_haddw_epu8(<2 x i64> %a0) {
    327 ; X32-LABEL: test_mm_haddw_epu8:
    328 ; X32:       # BB#0:
    329 ; X32-NEXT:    vphaddubw %xmm0, %xmm0
    330 ; X32-NEXT:    retl
    331 ;
    332 ; X64-LABEL: test_mm_haddw_epu8:
    333 ; X64:       # BB#0:
    334 ; X64-NEXT:    vphaddubw %xmm0, %xmm0
    335 ; X64-NEXT:    retq
    336   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    337   %res = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %arg0)
    338   %bc = bitcast <8 x i16> %res to <2 x i64>
    339   ret <2 x i64> %bc
    340 }
    341 declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone
    342 
    343 define <2 x i64> @test_mm_haddd_epu8(<2 x i64> %a0) {
    344 ; X32-LABEL: test_mm_haddd_epu8:
    345 ; X32:       # BB#0:
    346 ; X32-NEXT:    vphaddubd %xmm0, %xmm0
    347 ; X32-NEXT:    retl
    348 ;
    349 ; X64-LABEL: test_mm_haddd_epu8:
    350 ; X64:       # BB#0:
    351 ; X64-NEXT:    vphaddubd %xmm0, %xmm0
    352 ; X64-NEXT:    retq
    353   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    354   %res = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %arg0)
    355   %bc = bitcast <4 x i32> %res to <2 x i64>
    356   ret <2 x i64> %bc
    357 }
    358 declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone
    359 
    360 define <2 x i64> @test_mm_haddq_epu8(<2 x i64> %a0) {
    361 ; X32-LABEL: test_mm_haddq_epu8:
    362 ; X32:       # BB#0:
    363 ; X32-NEXT:    vphaddubq %xmm0, %xmm0
    364 ; X32-NEXT:    retl
    365 ;
    366 ; X64-LABEL: test_mm_haddq_epu8:
    367 ; X64:       # BB#0:
    368 ; X64-NEXT:    vphaddubq %xmm0, %xmm0
    369 ; X64-NEXT:    retq
    370   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    371   %res = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %arg0)
    372   ret <2 x i64> %res
    373 }
    374 declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone
    375 
    376 define <2 x i64> @test_mm_haddd_epu16(<2 x i64> %a0) {
    377 ; X32-LABEL: test_mm_haddd_epu16:
    378 ; X32:       # BB#0:
    379 ; X32-NEXT:    vphadduwd %xmm0, %xmm0
    380 ; X32-NEXT:    retl
    381 ;
    382 ; X64-LABEL: test_mm_haddd_epu16:
    383 ; X64:       # BB#0:
    384 ; X64-NEXT:    vphadduwd %xmm0, %xmm0
    385 ; X64-NEXT:    retq
    386   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    387   %res = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %arg0)
    388   %bc = bitcast <4 x i32> %res to <2 x i64>
    389   ret <2 x i64> %bc
    390 }
    391 declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone
    392 
    393 
    394 define <2 x i64> @test_mm_haddq_epu16(<2 x i64> %a0) {
    395 ; X32-LABEL: test_mm_haddq_epu16:
    396 ; X32:       # BB#0:
    397 ; X32-NEXT:    vphadduwq %xmm0, %xmm0
    398 ; X32-NEXT:    retl
    399 ;
    400 ; X64-LABEL: test_mm_haddq_epu16:
    401 ; X64:       # BB#0:
    402 ; X64-NEXT:    vphadduwq %xmm0, %xmm0
    403 ; X64-NEXT:    retq
    404   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    405   %res = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %arg0)
    406   ret <2 x i64> %res
    407 }
    408 declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone
    409 
    410 define <2 x i64> @test_mm_haddq_epu32(<2 x i64> %a0) {
    411 ; X32-LABEL: test_mm_haddq_epu32:
    412 ; X32:       # BB#0:
    413 ; X32-NEXT:    vphaddudq %xmm0, %xmm0
    414 ; X32-NEXT:    retl
    415 ;
    416 ; X64-LABEL: test_mm_haddq_epu32:
    417 ; X64:       # BB#0:
    418 ; X64-NEXT:    vphaddudq %xmm0, %xmm0
    419 ; X64-NEXT:    retq
    420   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
    421   %res = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %arg0)
    422   ret <2 x i64> %res
    423 }
    424 declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone
    425 
    426 define <2 x i64> @test_mm_hsubw_epi8(<2 x i64> %a0) {
    427 ; X32-LABEL: test_mm_hsubw_epi8:
    428 ; X32:       # BB#0:
    429 ; X32-NEXT:    vphsubbw %xmm0, %xmm0
    430 ; X32-NEXT:    retl
    431 ;
    432 ; X64-LABEL: test_mm_hsubw_epi8:
    433 ; X64:       # BB#0:
    434 ; X64-NEXT:    vphsubbw %xmm0, %xmm0
    435 ; X64-NEXT:    retq
    436   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    437   %res = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %arg0)
    438   %bc = bitcast <8 x i16> %res to <2 x i64>
    439   ret <2 x i64> %bc
    440 }
    441 declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone
    442 
    443 define <2 x i64> @test_mm_hsubd_epi16(<2 x i64> %a0) {
    444 ; X32-LABEL: test_mm_hsubd_epi16:
    445 ; X32:       # BB#0:
    446 ; X32-NEXT:    vphsubwd %xmm0, %xmm0
    447 ; X32-NEXT:    retl
    448 ;
    449 ; X64-LABEL: test_mm_hsubd_epi16:
    450 ; X64:       # BB#0:
    451 ; X64-NEXT:    vphsubwd %xmm0, %xmm0
    452 ; X64-NEXT:    retq
    453   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    454   %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %arg0)
    455   %bc = bitcast <4 x i32> %res to <2 x i64>
    456   ret <2 x i64> %bc
    457 }
    458 declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone
    459 
    460 define <2 x i64> @test_mm_hsubq_epi32(<2 x i64> %a0) {
    461 ; X32-LABEL: test_mm_hsubq_epi32:
    462 ; X32:       # BB#0:
    463 ; X32-NEXT:    vphsubdq %xmm0, %xmm0
    464 ; X32-NEXT:    retl
    465 ;
    466 ; X64-LABEL: test_mm_hsubq_epi32:
    467 ; X64:       # BB#0:
    468 ; X64-NEXT:    vphsubdq %xmm0, %xmm0
    469 ; X64-NEXT:    retq
    470   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
    471   %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %arg0)
    472   ret <2 x i64> %res
    473 }
    474 declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone
    475 
    476 define <2 x i64> @test_mm_cmov_si128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
    477 ; X32-LABEL: test_mm_cmov_si128:
    478 ; X32:       # BB#0:
    479 ; X32-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
    480 ; X32-NEXT:    vpxor %xmm3, %xmm2, %xmm3
    481 ; X32-NEXT:    vpand %xmm2, %xmm0, %xmm0
    482 ; X32-NEXT:    vpand %xmm3, %xmm1, %xmm1
    483 ; X32-NEXT:    vpor %xmm1, %xmm0, %xmm0
    484 ; X32-NEXT:    retl
    485 ;
    486 ; X64-LABEL: test_mm_cmov_si128:
    487 ; X64:       # BB#0:
    488 ; X64-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
    489 ; X64-NEXT:    vpxor %xmm3, %xmm2, %xmm3
    490 ; X64-NEXT:    vpand %xmm2, %xmm0, %xmm0
    491 ; X64-NEXT:    vpand %xmm3, %xmm1, %xmm1
    492 ; X64-NEXT:    vpor %xmm1, %xmm0, %xmm0
    493 ; X64-NEXT:    retq
    494   %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2)
    495   ret <2 x i64> %res
    496 }
    497 declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
    498 
    499 define <4 x i64> @test_mm256_cmov_si256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
    500 ; X32-LABEL: test_mm256_cmov_si256:
    501 ; X32:       # BB#0:
    502 ; X32-NEXT:    vpcmov %ymm2, %ymm1, %ymm0, %ymm0
    503 ; X32-NEXT:    retl
    504 ;
    505 ; X64-LABEL: test_mm256_cmov_si256:
    506 ; X64:       # BB#0:
    507 ; X64-NEXT:    vpcmov %ymm2, %ymm1, %ymm0, %ymm0
    508 ; X64-NEXT:    retq
    509   %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2)
    510   ret <4 x i64> %res
    511 }
    512 declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone
    513 
    514 define <2 x i64> @test_mm_perm_epi8(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
    515 ; X32-LABEL: test_mm_perm_epi8:
    516 ; X32:       # BB#0:
    517 ; X32-NEXT:    vpperm %xmm2, %xmm1, %xmm0, %xmm0
    518 ; X32-NEXT:    retl
    519 ;
    520 ; X64-LABEL: test_mm_perm_epi8:
    521 ; X64:       # BB#0:
    522 ; X64-NEXT:    vpperm %xmm2, %xmm1, %xmm0, %xmm0
    523 ; X64-NEXT:    retq
    524   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    525   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
    526   %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
    527   %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %arg0, <16 x i8> %arg1, <16 x i8> %arg2)
    528   %bc = bitcast <16 x i8> %res to <2 x i64>
    529   ret <2 x i64> %bc
    530 }
    531 declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
    532 
    533 define <2 x i64> @test_mm_rot_epi8(<2 x i64> %a0, <2 x i64> %a1) {
    534 ; X32-LABEL: test_mm_rot_epi8:
    535 ; X32:       # BB#0:
    536 ; X32-NEXT:    vprotb %xmm1, %xmm0, %xmm0
    537 ; X32-NEXT:    retl
    538 ;
    539 ; X64-LABEL: test_mm_rot_epi8:
    540 ; X64:       # BB#0:
    541 ; X64-NEXT:    vprotb %xmm1, %xmm0, %xmm0
    542 ; X64-NEXT:    retq
    543   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    544   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
    545   %res = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %arg0, <16 x i8> %arg1)
    546   %bc = bitcast <16 x i8> %res to <2 x i64>
    547   ret <2 x i64> %bc
    548 }
    549 declare <16 x i8> @llvm.x86.xop.vprotb(<16 x i8>, <16 x i8>) nounwind readnone
    550 
    551 define <2 x i64> @test_mm_rot_epi16(<2 x i64> %a0, <2 x i64> %a1) {
    552 ; X32-LABEL: test_mm_rot_epi16:
    553 ; X32:       # BB#0:
    554 ; X32-NEXT:    vprotw %xmm1, %xmm0, %xmm0
    555 ; X32-NEXT:    retl
    556 ;
    557 ; X64-LABEL: test_mm_rot_epi16:
    558 ; X64:       # BB#0:
    559 ; X64-NEXT:    vprotw %xmm1, %xmm0, %xmm0
    560 ; X64-NEXT:    retq
    561   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    562   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
    563   %res = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %arg0, <8 x i16> %arg1)
    564   %bc = bitcast <8 x i16> %res to <2 x i64>
    565   ret <2 x i64> %bc
    566 }
    567 declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone
    568 
    569 define <2 x i64> @test_mm_rot_epi32(<2 x i64> %a0, <2 x i64> %a1) {
    570 ; X32-LABEL: test_mm_rot_epi32:
    571 ; X32:       # BB#0:
    572 ; X32-NEXT:    vprotd %xmm1, %xmm0, %xmm0
    573 ; X32-NEXT:    retl
    574 ;
    575 ; X64-LABEL: test_mm_rot_epi32:
    576 ; X64:       # BB#0:
    577 ; X64-NEXT:    vprotd %xmm1, %xmm0, %xmm0
    578 ; X64-NEXT:    retq
    579   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
    580   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
    581   %res = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %arg0, <4 x i32> %arg1)
    582   %bc = bitcast <4 x i32> %res to <2 x i64>
    583   ret <2 x i64> %bc
    584 }
    585 declare <4 x i32> @llvm.x86.xop.vprotd(<4 x i32>, <4 x i32>) nounwind readnone
    586 
    587 define <2 x i64> @test_mm_rot_epi64(<2 x i64> %a0, <2 x i64> %a1) {
    588 ; X32-LABEL: test_mm_rot_epi64:
    589 ; X32:       # BB#0:
    590 ; X32-NEXT:    vprotq %xmm1, %xmm0, %xmm0
    591 ; X32-NEXT:    retl
    592 ;
    593 ; X64-LABEL: test_mm_rot_epi64:
    594 ; X64:       # BB#0:
    595 ; X64-NEXT:    vprotq %xmm1, %xmm0, %xmm0
    596 ; X64-NEXT:    retq
    597   %res = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a0, <2 x i64> %a1)
    598   ret <2 x i64> %res
    599 }
    600 declare <2 x i64> @llvm.x86.xop.vprotq(<2 x i64>, <2 x i64>) nounwind readnone
    601 
    602 define <2 x i64> @test_mm_roti_epi8(<2 x i64> %a0) {
    603 ; X32-LABEL: test_mm_roti_epi8:
    604 ; X32:       # BB#0:
    605 ; X32-NEXT:    vprotb $1, %xmm0, %xmm0
    606 ; X32-NEXT:    retl
    607 ;
    608 ; X64-LABEL: test_mm_roti_epi8:
    609 ; X64:       # BB#0:
    610 ; X64-NEXT:    vprotb $1, %xmm0, %xmm0
    611 ; X64-NEXT:    retq
    612   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    613   %res = call <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8> %arg0, i8 1)
    614   %bc = bitcast <16 x i8> %res to <2 x i64>
    615   ret <2 x i64> %bc
    616 }
    617 declare <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8>, i8) nounwind readnone
    618 
    619 define <2 x i64> @test_mm_roti_epi16(<2 x i64> %a0) {
    620 ; X32-LABEL: test_mm_roti_epi16:
    621 ; X32:       # BB#0:
    622 ; X32-NEXT:    vprotw $50, %xmm0, %xmm0
    623 ; X32-NEXT:    retl
    624 ;
    625 ; X64-LABEL: test_mm_roti_epi16:
    626 ; X64:       # BB#0:
    627 ; X64-NEXT:    vprotw $50, %xmm0, %xmm0
    628 ; X64-NEXT:    retq
    629   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    630   %res = call <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16> %arg0, i8 50)
    631   %bc = bitcast <8 x i16> %res to <2 x i64>
    632   ret <2 x i64> %bc
    633 }
    634 declare <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16>, i8) nounwind readnone
    635 
    636 define <2 x i64> @test_mm_roti_epi32(<2 x i64> %a0) {
    637 ; X32-LABEL: test_mm_roti_epi32:
    638 ; X32:       # BB#0:
    639 ; X32-NEXT:    vprotd $226, %xmm0, %xmm0
    640 ; X32-NEXT:    retl
    641 ;
    642 ; X64-LABEL: test_mm_roti_epi32:
    643 ; X64:       # BB#0:
    644 ; X64-NEXT:    vprotd $226, %xmm0, %xmm0
    645 ; X64-NEXT:    retq
    646   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
    647   %res = call <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32> %arg0, i8 -30)
    648   %bc = bitcast <4 x i32> %res to <2 x i64>
    649   ret <2 x i64> %bc
    650 }
    651 declare <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32>, i8) nounwind readnone
    652 
    653 define <2 x i64> @test_mm_roti_epi64(<2 x i64> %a0) {
    654 ; X32-LABEL: test_mm_roti_epi64:
    655 ; X32:       # BB#0:
    656 ; X32-NEXT:    vprotq $100, %xmm0, %xmm0
    657 ; X32-NEXT:    retl
    658 ;
    659 ; X64-LABEL: test_mm_roti_epi64:
    660 ; X64:       # BB#0:
    661 ; X64-NEXT:    vprotq $100, %xmm0, %xmm0
    662 ; X64-NEXT:    retq
    663   %res = call <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64> %a0, i8 100)
    664   ret <2 x i64> %res
    665 }
    666 declare <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64>, i8) nounwind readnone
    667 
    668 define <2 x i64> @test_mm_shl_epi8(<2 x i64> %a0, <2 x i64> %a1) {
    669 ; X32-LABEL: test_mm_shl_epi8:
    670 ; X32:       # BB#0:
    671 ; X32-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
    672 ; X32-NEXT:    retl
    673 ;
    674 ; X64-LABEL: test_mm_shl_epi8:
    675 ; X64:       # BB#0:
    676 ; X64-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
    677 ; X64-NEXT:    retq
    678   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    679   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
    680   %res = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %arg0, <16 x i8> %arg1)
    681   %bc = bitcast <16 x i8> %res to <2 x i64>
    682   ret <2 x i64> %bc
    683 }
    684 declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone
    685 
    686 define <2 x i64> @test_mm_shl_epi16(<2 x i64> %a0, <2 x i64> %a1) {
    687 ; X32-LABEL: test_mm_shl_epi16:
    688 ; X32:       # BB#0:
    689 ; X32-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
    690 ; X32-NEXT:    retl
    691 ;
    692 ; X64-LABEL: test_mm_shl_epi16:
    693 ; X64:       # BB#0:
    694 ; X64-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
    695 ; X64-NEXT:    retq
    696   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    697   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
    698   %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %arg0, <8 x i16> %arg1)
    699   %bc = bitcast <8 x i16> %res to <2 x i64>
    700   ret <2 x i64> %bc
    701 }
    702 declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone
    703 
    704 define <2 x i64> @test_mm_shl_epi32(<2 x i64> %a0, <2 x i64> %a1) {
    705 ; X32-LABEL: test_mm_shl_epi32:
    706 ; X32:       # BB#0:
    707 ; X32-NEXT:    vpshld %xmm1, %xmm0, %xmm0
    708 ; X32-NEXT:    retl
    709 ;
    710 ; X64-LABEL: test_mm_shl_epi32:
    711 ; X64:       # BB#0:
    712 ; X64-NEXT:    vpshld %xmm1, %xmm0, %xmm0
    713 ; X64-NEXT:    retq
    714   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
    715   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
    716   %res = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %arg0, <4 x i32> %arg1)
    717   %bc = bitcast <4 x i32> %res to <2 x i64>
    718   ret <2 x i64> %bc
    719 }
    720 declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone
    721 
    722 define <2 x i64> @test_mm_shl_epi64(<2 x i64> %a0, <2 x i64> %a1) {
    723 ; X32-LABEL: test_mm_shl_epi64:
    724 ; X32:       # BB#0:
    725 ; X32-NEXT:    vpshlq %xmm1, %xmm0, %xmm0
    726 ; X32-NEXT:    retl
    727 ;
    728 ; X64-LABEL: test_mm_shl_epi64:
    729 ; X64:       # BB#0:
    730 ; X64-NEXT:    vpshlq %xmm1, %xmm0, %xmm0
    731 ; X64-NEXT:    retq
    732   %res = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1)
    733   ret <2 x i64> %res
    734 }
    735 declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone
    736 
    737 define <2 x i64> @test_mm_sha_epi8(<2 x i64> %a0, <2 x i64> %a1) {
    738 ; X32-LABEL: test_mm_sha_epi8:
    739 ; X32:       # BB#0:
    740 ; X32-NEXT:    vpshab %xmm1, %xmm0, %xmm0
    741 ; X32-NEXT:    retl
    742 ;
    743 ; X64-LABEL: test_mm_sha_epi8:
    744 ; X64:       # BB#0:
    745 ; X64-NEXT:    vpshab %xmm1, %xmm0, %xmm0
    746 ; X64-NEXT:    retq
    747   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    748   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
    749   %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %arg0, <16 x i8> %arg1)
    750   %bc = bitcast <16 x i8> %res to <2 x i64>
    751   ret <2 x i64> %bc
    752 }
    753 declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone
    754 
    755 define <2 x i64> @test_mm_sha_epi16(<2 x i64> %a0, <2 x i64> %a1) {
    756 ; X32-LABEL: test_mm_sha_epi16:
    757 ; X32:       # BB#0:
    758 ; X32-NEXT:    vpshaw %xmm1, %xmm0, %xmm0
    759 ; X32-NEXT:    retl
    760 ;
    761 ; X64-LABEL: test_mm_sha_epi16:
    762 ; X64:       # BB#0:
    763 ; X64-NEXT:    vpshaw %xmm1, %xmm0, %xmm0
    764 ; X64-NEXT:    retq
    765   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    766   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
    767   %res = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %arg0, <8 x i16> %arg1)
    768   %bc = bitcast <8 x i16> %res to <2 x i64>
    769   ret <2 x i64> %bc
    770 }
    771 declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone
    772 
    773 define <2 x i64> @test_mm_sha_epi32(<2 x i64> %a0, <2 x i64> %a1) {
    774 ; X32-LABEL: test_mm_sha_epi32:
    775 ; X32:       # BB#0:
    776 ; X32-NEXT:    vpshad %xmm1, %xmm0, %xmm0
    777 ; X32-NEXT:    retl
    778 ;
    779 ; X64-LABEL: test_mm_sha_epi32:
    780 ; X64:       # BB#0:
    781 ; X64-NEXT:    vpshad %xmm1, %xmm0, %xmm0
    782 ; X64-NEXT:    retq
    783   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
    784   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
    785   %res = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %arg0, <4 x i32> %arg1)
    786   %bc = bitcast <4 x i32> %res to <2 x i64>
    787   ret <2 x i64> %bc
    788 }
    789 declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone
    790 
    791 define <2 x i64> @test_mm_sha_epi64(<2 x i64> %a0, <2 x i64> %a1) {
    792 ; X32-LABEL: test_mm_sha_epi64:
    793 ; X32:       # BB#0:
    794 ; X32-NEXT:    vpshaq %xmm1, %xmm0, %xmm0
    795 ; X32-NEXT:    retl
    796 ;
    797 ; X64-LABEL: test_mm_sha_epi64:
    798 ; X64:       # BB#0:
    799 ; X64-NEXT:    vpshaq %xmm1, %xmm0, %xmm0
    800 ; X64-NEXT:    retq
    801   %res = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1)
    802   ret <2 x i64> %res
    803 }
    804 declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone
    805 
    806 define <2 x i64> @test_mm_com_epu8(<2 x i64> %a0, <2 x i64> %a1) {
    807 ; X32-LABEL: test_mm_com_epu8:
    808 ; X32:       # BB#0:
    809 ; X32-NEXT:    vpcomltub %xmm1, %xmm0, %xmm0
    810 ; X32-NEXT:    retl
    811 ;
    812 ; X64-LABEL: test_mm_com_epu8:
    813 ; X64:       # BB#0:
    814 ; X64-NEXT:    vpcomltub %xmm1, %xmm0, %xmm0
    815 ; X64-NEXT:    retq
    816   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    817   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
    818   %res = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %arg0, <16 x i8> %arg1, i8 0)
    819   %bc = bitcast <16 x i8> %res to <2 x i64>
    820   ret <2 x i64> %bc
    821 }
    822 declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone
    823 
    824 define <2 x i64> @test_mm_com_epu16(<2 x i64> %a0, <2 x i64> %a1) {
    825 ; X32-LABEL: test_mm_com_epu16:
    826 ; X32:       # BB#0:
    827 ; X32-NEXT:    vpcomltuw %xmm1, %xmm0, %xmm0
    828 ; X32-NEXT:    retl
    829 ;
    830 ; X64-LABEL: test_mm_com_epu16:
    831 ; X64:       # BB#0:
    832 ; X64-NEXT:    vpcomltuw %xmm1, %xmm0, %xmm0
    833 ; X64-NEXT:    retq
    834   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    835   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
    836   %res = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %arg0, <8 x i16> %arg1, i8 0)
    837   %bc = bitcast <8 x i16> %res to <2 x i64>
    838   ret <2 x i64> %bc
    839 }
    840 declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone
    841 
    842 define <2 x i64> @test_mm_com_epu32(<2 x i64> %a0, <2 x i64> %a1) {
    843 ; X32-LABEL: test_mm_com_epu32:
    844 ; X32:       # BB#0:
    845 ; X32-NEXT:    vpcomltud %xmm1, %xmm0, %xmm0
    846 ; X32-NEXT:    retl
    847 ;
    848 ; X64-LABEL: test_mm_com_epu32:
    849 ; X64:       # BB#0:
    850 ; X64-NEXT:    vpcomltud %xmm1, %xmm0, %xmm0
    851 ; X64-NEXT:    retq
    852   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
    853   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
    854   %res = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %arg0, <4 x i32> %arg1, i8 0)
    855   %bc = bitcast <4 x i32> %res to <2 x i64>
    856   ret <2 x i64> %bc
    857 }
    858 declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone
    859 
    860 define <2 x i64> @test_mm_com_epu64(<2 x i64> %a0, <2 x i64> %a1) {
    861 ; X32-LABEL: test_mm_com_epu64:
    862 ; X32:       # BB#0:
    863 ; X32-NEXT:    vpcomltuq %xmm1, %xmm0, %xmm0
    864 ; X32-NEXT:    retl
    865 ;
    866 ; X64-LABEL: test_mm_com_epu64:
    867 ; X64:       # BB#0:
    868 ; X64-NEXT:    vpcomltuq %xmm1, %xmm0, %xmm0
    869 ; X64-NEXT:    retq
    870   %res = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0)
    871   ret <2 x i64> %res
    872 }
    873 declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone
    874 
    875 define <2 x i64> @test_mm_com_epi8(<2 x i64> %a0, <2 x i64> %a1) {
    876 ; X32-LABEL: test_mm_com_epi8:
    877 ; X32:       # BB#0:
    878 ; X32-NEXT:    vpcomltb %xmm1, %xmm0, %xmm0
    879 ; X32-NEXT:    retl
    880 ;
    881 ; X64-LABEL: test_mm_com_epi8:
    882 ; X64:       # BB#0:
    883 ; X64-NEXT:    vpcomltb %xmm1, %xmm0, %xmm0
    884 ; X64-NEXT:    retq
    885   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    886   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
    887   %res = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %arg0, <16 x i8> %arg1, i8 0)
    888   %bc = bitcast <16 x i8> %res to <2 x i64>
    889   ret <2 x i64> %bc
    890 }
    891 declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone
    892 
    893 define <2 x i64> @test_mm_com_epi16(<2 x i64> %a0, <2 x i64> %a1) {
    894 ; X32-LABEL: test_mm_com_epi16:
    895 ; X32:       # BB#0:
    896 ; X32-NEXT:    vpcomltw %xmm1, %xmm0, %xmm0
    897 ; X32-NEXT:    retl
    898 ;
    899 ; X64-LABEL: test_mm_com_epi16:
    900 ; X64:       # BB#0:
    901 ; X64-NEXT:    vpcomltw %xmm1, %xmm0, %xmm0
    902 ; X64-NEXT:    retq
    903   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    904   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
    905   %res = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %arg0, <8 x i16> %arg1, i8 0)
    906   %bc = bitcast <8 x i16> %res to <2 x i64>
    907   ret <2 x i64> %bc
    908 }
    909 declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
    910 
    911 define <2 x i64> @test_mm_com_epi32(<2 x i64> %a0, <2 x i64> %a1) {
    912 ; X32-LABEL: test_mm_com_epi32:
    913 ; X32:       # BB#0:
    914 ; X32-NEXT:    vpcomltd %xmm1, %xmm0, %xmm0
    915 ; X32-NEXT:    retl
    916 ;
    917 ; X64-LABEL: test_mm_com_epi32:
    918 ; X64:       # BB#0:
    919 ; X64-NEXT:    vpcomltd %xmm1, %xmm0, %xmm0
    920 ; X64-NEXT:    retq
    921   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
    922   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
    923   %res = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %arg0, <4 x i32> %arg1, i8 0)
    924   %bc = bitcast <4 x i32> %res to <2 x i64>
    925   ret <2 x i64> %bc
    926 }
    927 declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone
    928 
    929 define <2 x i64> @test_mm_com_epi64(<2 x i64> %a0, <2 x i64> %a1) {
    930 ; X32-LABEL: test_mm_com_epi64:
    931 ; X32:       # BB#0:
    932 ; X32-NEXT:    vpcomltq %xmm1, %xmm0, %xmm0
    933 ; X32-NEXT:    retl
    934 ;
    935 ; X64-LABEL: test_mm_com_epi64:
    936 ; X64:       # BB#0:
    937 ; X64-NEXT:    vpcomltq %xmm1, %xmm0, %xmm0
    938 ; X64-NEXT:    retq
    939   %res = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0)
    940   ret <2 x i64> %res
    941 }
    942 declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone
    943 
    944 define <2 x double> @test_mm_permute2_pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2) {
    945 ; X32-LABEL: test_mm_permute2_pd:
    946 ; X32:       # BB#0:
    947 ; X32-NEXT:    vpermil2pd $0, %xmm2, %xmm1, %xmm0, %xmm0
    948 ; X32-NEXT:    retl
    949 ;
    950 ; X64-LABEL: test_mm_permute2_pd:
    951 ; X64:       # BB#0:
    952 ; X64-NEXT:    vpermil2pd $0, %xmm2, %xmm1, %xmm0, %xmm0
    953 ; X64-NEXT:    retq
    954   %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2, i8 0)
    955   ret <2 x double> %res
    956 }
    957 declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone
    958 
    959 define <4 x double> @test_mm256_permute2_pd(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2) {
    960 ; X32-LABEL: test_mm256_permute2_pd:
    961 ; X32:       # BB#0:
    962 ; X32-NEXT:    vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0
    963 ; X32-NEXT:    retl
    964 ;
    965 ; X64-LABEL: test_mm256_permute2_pd:
    966 ; X64:       # BB#0:
    967 ; X64-NEXT:    vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0
    968 ; X64-NEXT:    retq
    969   %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2, i8 0)
    970   ret <4 x double> %res
    971 }
    972 declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone
    973 
    974 define <4 x float> @test_mm_permute2_ps(<4 x float> %a0, <4 x float> %a1, <2 x i64> %a2) {
    975 ; X32-LABEL: test_mm_permute2_ps:
    976 ; X32:       # BB#0:
    977 ; X32-NEXT:    vpermil2ps $0, %xmm2, %xmm1, %xmm0, %xmm0
    978 ; X32-NEXT:    retl
    979 ;
    980 ; X64-LABEL: test_mm_permute2_ps:
    981 ; X64:       # BB#0:
    982 ; X64-NEXT:    vpermil2ps $0, %xmm2, %xmm1, %xmm0, %xmm0
    983 ; X64-NEXT:    retq
    984   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
    985   %res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> %arg2, i8 0)
    986   ret <4 x float> %res
    987 }
    988 declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x i32>, i8) nounwind readnone
    989 
    990 define <8 x float> @test_mm256_permute2_ps(<8 x float> %a0, <8 x float> %a1, <4 x i64> %a2) {
    991 ; X32-LABEL: test_mm256_permute2_ps:
    992 ; X32:       # BB#0:
    993 ; X32-NEXT:    vpermil2ps $0, %ymm2, %ymm1, %ymm0, %ymm0
    994 ; X32-NEXT:    retl
    995 ;
    996 ; X64-LABEL: test_mm256_permute2_ps:
    997 ; X64:       # BB#0:
    998 ; X64-NEXT:    vpermil2ps $0, %ymm2, %ymm1, %ymm0, %ymm0
    999 ; X64-NEXT:    retq
   1000   %arg2 = bitcast <4 x i64> %a2 to <8 x i32>
   1001   %res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> %arg2, i8 0)
   1002   ret <8 x float> %res
   1003 }
   1004 declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x i32>, i8) nounwind readnone
   1005 
   1006 define <4 x float> @test_mm_frcz_ss(<4 x float> %a0) {
   1007 ; X32-LABEL: test_mm_frcz_ss:
   1008 ; X32:       # BB#0:
   1009 ; X32-NEXT:    vfrczss %xmm0, %xmm0
   1010 ; X32-NEXT:    retl
   1011 ;
   1012 ; X64-LABEL: test_mm_frcz_ss:
   1013 ; X64:       # BB#0:
   1014 ; X64-NEXT:    vfrczss %xmm0, %xmm0
   1015 ; X64-NEXT:    retq
   1016   %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0)
   1017   ret <4 x float> %res
   1018 }
   1019 declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone
   1020 
   1021 define <2 x double> @test_mm_frcz_sd(<2 x double> %a0) {
   1022 ; X32-LABEL: test_mm_frcz_sd:
   1023 ; X32:       # BB#0:
   1024 ; X32-NEXT:    vfrczsd %xmm0, %xmm0
   1025 ; X32-NEXT:    retl
   1026 ;
   1027 ; X64-LABEL: test_mm_frcz_sd:
   1028 ; X64:       # BB#0:
   1029 ; X64-NEXT:    vfrczsd %xmm0, %xmm0
   1030 ; X64-NEXT:    retq
   1031   %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0)
   1032   ret <2 x double> %res
   1033 }
   1034 declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone
   1035 
   1036 define <4 x float> @test_mm_frcz_ps(<4 x float> %a0) {
   1037 ; X32-LABEL: test_mm_frcz_ps:
   1038 ; X32:       # BB#0:
   1039 ; X32-NEXT:    vfrczps %xmm0, %xmm0
   1040 ; X32-NEXT:    retl
   1041 ;
   1042 ; X64-LABEL: test_mm_frcz_ps:
   1043 ; X64:       # BB#0:
   1044 ; X64-NEXT:    vfrczps %xmm0, %xmm0
   1045 ; X64-NEXT:    retq
   1046   %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0)
   1047   ret <4 x float> %res
   1048 }
   1049 declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone
   1050 
   1051 define <2 x double> @test_mm_frcz_pd(<2 x double> %a0) {
   1052 ; X32-LABEL: test_mm_frcz_pd:
   1053 ; X32:       # BB#0:
   1054 ; X32-NEXT:    vfrczpd %xmm0, %xmm0
   1055 ; X32-NEXT:    retl
   1056 ;
   1057 ; X64-LABEL: test_mm_frcz_pd:
   1058 ; X64:       # BB#0:
   1059 ; X64-NEXT:    vfrczpd %xmm0, %xmm0
   1060 ; X64-NEXT:    retq
   1061   %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0)
   1062   ret <2 x double> %res
   1063 }
   1064 declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone
   1065 
   1066 define <8 x float> @test_mm256_frcz_ps(<8 x float> %a0) {
   1067 ; X32-LABEL: test_mm256_frcz_ps:
   1068 ; X32:       # BB#0:
   1069 ; X32-NEXT:    vfrczps %ymm0, %ymm0
   1070 ; X32-NEXT:    retl
   1071 ;
   1072 ; X64-LABEL: test_mm256_frcz_ps:
   1073 ; X64:       # BB#0:
   1074 ; X64-NEXT:    vfrczps %ymm0, %ymm0
   1075 ; X64-NEXT:    retq
   1076   %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0)
   1077   ret <8 x float> %res
   1078 }
   1079 declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone
   1080 
   1081 define <4 x double> @test_mm256_frcz_pd(<4 x double> %a0) {
   1082 ; X32-LABEL: test_mm256_frcz_pd:
   1083 ; X32:       # BB#0:
   1084 ; X32-NEXT:    vfrczpd %ymm0, %ymm0
   1085 ; X32-NEXT:    retl
   1086 ;
   1087 ; X64-LABEL: test_mm256_frcz_pd:
   1088 ; X64:       # BB#0:
   1089 ; X64-NEXT:    vfrczpd %ymm0, %ymm0
   1090 ; X64-NEXT:    retq
   1091   %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0)
   1092   ret <4 x double> %res
   1093 }
   1094 declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone
   1095 
   1096 
   1097 
   1098 
   1099 
   1100 
   1101 
   1102 
   1103 
   1104 
   1105 
   1106 
   1107 
   1108 
   1109 
   1110 
   1111 
   1112