Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4,+xop | FileCheck %s
      2 
      3 define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
      4   ; CHECK: vpermil2pd
      5   %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 1) ;  [#uses=1]
      6   ret <2 x double> %res
      7 }
      8 define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x double> %a2) {
      9   ; CHECK-NOT: vmovaps
     10   ; CHECK: vpermil2pd
     11   %vec = load <2 x double>* %a1
     12   %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x double> %a2, i8 1) ;  [#uses=1]
     13   ret <2 x double> %res
     14 }
     15 define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double>* %a2) {
     16   ; CHECK-NOT: vmovaps
     17   ; CHECK: vpermil2pd
     18   %vec = load <2 x double>* %a2
     19   %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %vec, i8 1) ;  [#uses=1]
     20   ret <2 x double> %res
     21 }
     22 declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
     23 
     24 define <4 x double> @test_int_x86_xop_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
     25   ; CHECK: vpermil2pd
     26   ; CHECK: ymm
     27   %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 2) ;
     28   ret <4 x double> %res
     29 }
     30 define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, <4 x double>* %a1, <4 x double> %a2) {
     31   ; CHECK-NOT: vmovaps
     32   ; CHECK: vpermil2pd
     33   ; CHECK: ymm
     34   %vec = load <4 x double>* %a1
     35   %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x double> %a2, i8 2) ;
     36   ret <4 x double> %res
     37 }
     38 define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x double> %a1, <4 x double>* %a2) {
     39   ; CHECK-NOT: vmovaps
     40   ; CHECK: vpermil2pd
     41   ; CHECK: ymm
     42   %vec = load <4 x double>* %a2
     43   %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %vec, i8 2) ;
     44   ret <4 x double> %res
     45 }
     46 declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
     47 
     48 define <4 x float> @test_int_x86_xop_vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
     49   ; CHECK: vpermil2ps
     50   %res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 3) ;
     51   ret <4 x float> %res
     52 }
     53 declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
     54 
     55 define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
     56   ; CHECK: vpermil2ps
     57   ; CHECK: ymm
     58   %res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 4) ;
     59   ret <8 x float> %res
     60 }
     61 declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
     62 
     63 define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
     64   ; CHECK: vpcmov
     65   %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ;
     66   ret <2 x i64> %res
     67 }
     68 declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
     69 
     70 define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
     71   ; CHECK: vpcmov
     72   ; CHECK: ymm
     73   %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ;
     74   ret <4 x i64> %res
     75 }
     76 define <4 x i64> @test_int_x86_xop_vpcmov_256_mr(<4 x i64> %a0, <4 x i64>* %a1, <4 x i64> %a2) {
     77   ; CHECK-NOT: vmovaps
     78   ; CHECK: vpcmov
     79   ; CHECK: ymm
     80   %vec = load <4 x i64>* %a1
     81   %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %vec, <4 x i64> %a2) ;
     82   ret <4 x i64> %res
     83 }
     84 define <4 x i64> @test_int_x86_xop_vpcmov_256_rm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64>* %a2) {
     85   ; CHECK-NOT: vmovaps
     86   ; CHECK: vpcmov
     87   ; CHECK: ymm
     88  %vec = load <4 x i64>* %a2
     89  %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %vec) ;
     90   ret <4 x i64> %res
     91 }
     92 declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone
     93 
     94 define <16 x i8> @test_int_x86_xop_vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) {
     95   ; CHECK:vpcomb
     96   %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) ;
     97   ret <16 x i8> %res
     98 }
     99 define <16 x i8> @test_int_x86_xop_vpcomeqb_mem(<16 x i8> %a0, <16 x i8>* %a1) {
    100   ; CHECK-NOT: vmovaps
    101   ; CHECK:vpcomb
    102   %vec = load <16 x i8>* %a1
    103   %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %vec) ;
    104   ret <16 x i8> %res
    105 }
    106 declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone
    107 
    108 define <8 x i16> @test_int_x86_xop_vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) {
    109   ; CHECK: vpcomw
    110   %res = call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) ;
    111   ret <8 x i16> %res
    112 }
    113 declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone
    114 
    115 define <4 x i32> @test_int_x86_xop_vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) {
    116   ; CHECK: vpcomd
    117   %res = call <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) ;
    118   ret <4 x i32> %res
    119 }
    120 declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone
    121 
    122 define <2 x i64> @test_int_x86_xop_vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) {
    123   ; CHECK: vpcomq
    124   %res = call <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) ;
    125   ret <2 x i64> %res
    126 }
    127 declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone
    128 
    129 define <16 x i8> @test_int_x86_xop_vpcomequb(<16 x i8> %a0, <16 x i8> %a1) {
    130   ; CHECK: vpcomub
    131   %res = call <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8> %a0, <16 x i8> %a1) ;
    132   ret <16 x i8> %res
    133 }
    134 declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone
    135 
    136 define <4 x i32> @test_int_x86_xop_vpcomequd(<4 x i32> %a0, <4 x i32> %a1) {
    137   ; CHECK: vpcomud
    138   %res = call <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32> %a0, <4 x i32> %a1) ;
    139   ret <4 x i32> %res
    140 }
    141 declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone
    142 
    143 define <2 x i64> @test_int_x86_xop_vpcomequq(<2 x i64> %a0, <2 x i64> %a1) {
    144   ; CHECK: vpcomuq
    145   %res = call <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64> %a0, <2 x i64> %a1) ;
    146   ret <2 x i64> %res
    147 }
    148 declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone
    149 
    150 define <8 x i16> @test_int_x86_xop_vpcomequw(<8 x i16> %a0, <8 x i16> %a1) {
    151   ; CHECK: vpcomuw
    152   %res = call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a0, <8 x i16> %a1) ;
    153   ret <8 x i16> %res
    154 }
    155 declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone
    156 
    157 define <16 x i8> @test_int_x86_xop_vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) {
    158   ; CHECK: vpcomb
    159   %res = call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) ;
    160   ret <16 x i8> %res
    161 }
    162 declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone
    163 
    164 define <4 x i32> @test_int_x86_xop_vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) {
    165   ; CHECK: vpcomd
    166   %res = call <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) ;
    167   ret <4 x i32> %res
    168 }
    169 declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone
    170 
    171 define <2 x i64> @test_int_x86_xop_vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) {
    172   ; CHECK: vpcomq
    173   %res = call <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) ;
    174   ret <2 x i64> %res
    175 }
    176 declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone
    177 
    178 define <16 x i8> @test_int_x86_xop_vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) {
    179   ; CHECK: vpcomub
    180   %res = call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) ;
    181   ret <16 x i8> %res
    182 }
    183 declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone
    184 
    185 define <4 x i32> @test_int_x86_xop_vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) {
    186   ; CHECK: vpcomud
    187   %res = call <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) ;
    188   ret <4 x i32> %res
    189 }
    190 declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone
    191 
    192 define <2 x i64> @test_int_x86_xop_vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) {
    193   ; CHECK: vpcomuq
    194   %res = call <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) ;
    195   ret <2 x i64> %res
    196 }
    197 declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone
    198 
    199 define <8 x i16> @test_int_x86_xop_vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) {
    200   ; CHECK: vpcomuw
    201   %res = call <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) ;
    202   ret <8 x i16> %res
    203 }
    204 declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone
    205 
    206 define <8 x i16> @test_int_x86_xop_vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) {
    207   ; CHECK: vpcomw
    208   %res = call <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) ;
    209   ret <8 x i16> %res
    210 }
    211 declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone
    212 
    213 define <16 x i8> @test_int_x86_xop_vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) {
    214   ; CHECK: vpcomb
    215   %res = call <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) ;
    216   ret <16 x i8> %res
    217 }
    218 declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone
    219 
    220 define <4 x i32> @test_int_x86_xop_vpcomged(<4 x i32> %a0, <4 x i32> %a1) {
    221   ; CHECK: vpcomd
    222   %res = call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a0, <4 x i32> %a1) ;
    223   ret <4 x i32> %res
    224 }
    225 declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone
    226 
    227 define <2 x i64> @test_int_x86_xop_vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) {
    228   ; CHECK: vpcomq
    229   %res = call <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) ;
    230   ret <2 x i64> %res
    231 }
    232 declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone
    233 
    234 define <16 x i8> @test_int_x86_xop_vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) {
    235   ; CHECK: vpcomub
    236   %res = call <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) ;
    237   ret <16 x i8> %res
    238 }
    239 declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone
    240 
    241 define <4 x i32> @test_int_x86_xop_vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) {
    242   ; CHECK: vpcomud
    243   %res = call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) ;
    244   ret <4 x i32> %res
    245 }
    246 declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone
    247 
    248 define <2 x i64> @test_int_x86_xop_vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) {
    249   ; CHECK: vpcomuq
    250   %res = call <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) ;
    251   ret <2 x i64> %res
    252 }
    253 declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone
    254 
    255 define <8 x i16> @test_int_x86_xop_vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) {
    256   ; CHECK: vpcomuw
    257   %res = call <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) ;
    258   ret <8 x i16> %res
    259 }
    260 declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone
    261 
    262 define <8 x i16> @test_int_x86_xop_vpcomgew(<8 x i16> %a0, <8 x i16> %a1) {
    263   ; CHECK: vpcomw
    264   %res = call <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16> %a0, <8 x i16> %a1) ;
    265   ret <8 x i16> %res
    266 }
    267 declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone
    268 
    269 define <16 x i8> @test_int_x86_xop_vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) {
    270   ; CHECK: vpcomb
    271   %res = call <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) ;
    272   ret <16 x i8> %res
    273 }
    274 declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone
    275 
    276 define <4 x i32> @test_int_x86_xop_vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) {
    277   ; CHECK: vpcomd
    278   %res = call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) ;
    279   ret <4 x i32> %res
    280 }
    281 declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone
    282 
    283 define <2 x i64> @test_int_x86_xop_vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) {
    284   ; CHECK: vpcomq
    285   %res = call <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) ;
    286   ret <2 x i64> %res
    287 }
    288 declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone
    289 
    290 define <16 x i8> @test_int_x86_xop_vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) {
    291   ; CHECK: vpcomub
    292   %res = call <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) ;
    293   ret <16 x i8> %res
    294 }
    295 declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone
    296 
    297 define <4 x i32> @test_int_x86_xop_vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) {
    298   ; CHECK: vpcomud
    299   %res = call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) ;
    300   ret <4 x i32> %res
    301 }
    302 declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone
    303 
    304 define <2 x i64> @test_int_x86_xop_vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) {
    305   ; CHECK: vpcomuq
    306   %res = call <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) ;
    307   ret <2 x i64> %res
    308 }
    309 declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone
    310 
    311 define <8 x i16> @test_int_x86_xop_vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) {
    312   ; CHECK: vpcomuw
    313   %res = call <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) ;
    314   ret <8 x i16> %res
    315 }
    316 declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone
    317 
    318 define <8 x i16> @test_int_x86_xop_vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) {
    319   ; CHECK: vpcomw
    320   %res = call <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) ;
    321   ret <8 x i16> %res
    322 }
    323 declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone
    324 
    325 define <16 x i8> @test_int_x86_xop_vpcomleb(<16 x i8> %a0, <16 x i8> %a1) {
    326   ; CHECK: vpcomb
    327   %res = call <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8> %a0, <16 x i8> %a1) ;
    328   ret <16 x i8> %res
    329 }
    330 declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone
    331 
    332 define <4 x i32> @test_int_x86_xop_vpcomled(<4 x i32> %a0, <4 x i32> %a1) {
    333   ; CHECK: vpcomd
    334   %res = call <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32> %a0, <4 x i32> %a1) ;
    335   ret <4 x i32> %res
    336 }
    337 declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone
    338 
    339 define <2 x i64> @test_int_x86_xop_vpcomleq(<2 x i64> %a0, <2 x i64> %a1) {
    340   ; CHECK: vpcomq
    341   %res = call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a0, <2 x i64> %a1) ;
    342   ret <2 x i64> %res
    343 }
    344 declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone
    345 
    346 define <16 x i8> @test_int_x86_xop_vpcomleub(<16 x i8> %a0, <16 x i8> %a1) {
    347   ; CHECK: vpcomub
    348   %res = call <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8> %a0, <16 x i8> %a1) ;
    349   ret <16 x i8> %res
    350 }
    351 declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone
    352 
    353 define <4 x i32> @test_int_x86_xop_vpcomleud(<4 x i32> %a0, <4 x i32> %a1) {
    354   ; CHECK: vpcomud
    355   %res = call <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32> %a0, <4 x i32> %a1) ;
    356   ret <4 x i32> %res
    357 }
    358 declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone
    359 
    360 define <2 x i64> @test_int_x86_xop_vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) {
    361   ; CHECK: vpcomuq
    362   %res = call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) ;
    363   ret <2 x i64> %res
    364 }
    365 declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone
    366 
    367 define <8 x i16> @test_int_x86_xop_vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) {
    368   ; CHECK: vpcomuw
    369   %res = call <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) ;
    370   ret <8 x i16> %res
    371 }
    372 declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone
    373 
    374 define <8 x i16> @test_int_x86_xop_vpcomlew(<8 x i16> %a0, <8 x i16> %a1) {
    375   ; CHECK: vpcomw
    376   %res = call <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16> %a0, <8 x i16> %a1) ;
    377   ret <8 x i16> %res
    378 }
    379 declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone
    380 
    381 define <16 x i8> @test_int_x86_xop_vpcomltb(<16 x i8> %a0, <16 x i8> %a1) {
    382   ; CHECK: vpcomb
    383   %res = call <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8> %a0, <16 x i8> %a1) ;
    384   ret <16 x i8> %res
    385 }
    386 declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
    387 
    388 define <4 x i32> @test_int_x86_xop_vpcomltd(<4 x i32> %a0, <4 x i32> %a1) {
    389   ; CHECK: vpcomd
    390   %res = call <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32> %a0, <4 x i32> %a1) ;
    391   ret <4 x i32> %res
    392 }
    393 declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
    394 
    395 define <2 x i64> @test_int_x86_xop_vpcomltq(<2 x i64> %a0, <2 x i64> %a1) {
    396   ; CHECK: vpcomq
    397   %res = call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a0, <2 x i64> %a1) ;
    398   ret <2 x i64> %res
    399 }
    400 declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone
    401 
    402 define <16 x i8> @test_int_x86_xop_vpcomltub(<16 x i8> %a0, <16 x i8> %a1) {
    403   ; CHECK: vpcomub
    404   %res = call <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8> %a0, <16 x i8> %a1) ;
    405   ret <16 x i8> %res
    406 }
    407 declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone
    408 
    409 define <4 x i32> @test_int_x86_xop_vpcomltud(<4 x i32> %a0, <4 x i32> %a1) {
    410   ; CHECK: vpcomud
    411   %res = call <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32> %a0, <4 x i32> %a1) ;
    412   ret <4 x i32> %res
    413 }
    414 declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone
    415 
    416 define <2 x i64> @test_int_x86_xop_vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) {
    417   ; CHECK: vpcomuq
    418   %res = call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) ;
    419   ret <2 x i64> %res
    420 }
    421 declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone
    422 
    423 define <8 x i16> @test_int_x86_xop_vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) {
    424   ; CHECK: vpcomuw
    425   %res = call <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) ;
    426   ret <8 x i16> %res
    427 }
    428 declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone
    429 
    430 define <8 x i16> @test_int_x86_xop_vpcomltw(<8 x i16> %a0, <8 x i16> %a1) {
    431   ; CHECK: vpcomw
    432   %res = call <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16> %a0, <8 x i16> %a1) ;
    433   ret <8 x i16> %res
    434 }
    435 declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
    436 
    437 define <16 x i8> @test_int_x86_xop_vpcomneb(<16 x i8> %a0, <16 x i8> %a1) {
    438   ; CHECK: vpcomb
    439   %res = call <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8> %a0, <16 x i8> %a1) ;
    440   ret <16 x i8> %res
    441 }
    442 declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone
    443 
    444 define <4 x i32> @test_int_x86_xop_vpcomned(<4 x i32> %a0, <4 x i32> %a1) {
    445   ; CHECK: vpcomd
    446   %res = call <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32> %a0, <4 x i32> %a1) ;
    447   ret <4 x i32> %res
    448 }
    449 declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone
    450 
    451 define <2 x i64> @test_int_x86_xop_vpcomneq(<2 x i64> %a0, <2 x i64> %a1) {
    452   ; CHECK: vpcomq
    453   %res = call <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64> %a0, <2 x i64> %a1) ;
    454   ret <2 x i64> %res
    455 }
    456 declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone
    457 
    458 define <16 x i8> @test_int_x86_xop_vpcomneub(<16 x i8> %a0, <16 x i8> %a1) {
    459   ; CHECK: vpcomub
    460   %res = call <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8> %a0, <16 x i8> %a1) ;
    461   ret <16 x i8> %res
    462 }
    463 declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone
    464 
    465 define <4 x i32> @test_int_x86_xop_vpcomneud(<4 x i32> %a0, <4 x i32> %a1) {
    466   ; CHECK: vpcomud
    467   %res = call <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32> %a0, <4 x i32> %a1) ;
    468   ret <4 x i32> %res
    469 }
    470 declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone
    471 
    472 define <2 x i64> @test_int_x86_xop_vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) {
    473   ; CHECK: vpcomuq
    474   %res = call <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) ;
    475   ret <2 x i64> %res
    476 }
    477 declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone
    478 
    479 define <8 x i16> @test_int_x86_xop_vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) {
    480   ; CHECK: vpcomuw
    481   %res = call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) ;
    482   ret <8 x i16> %res
    483 }
    484 declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone
    485 
    486 define <8 x i16> @test_int_x86_xop_vpcomnew(<8 x i16> %a0, <8 x i16> %a1) {
    487   ; CHECK: vpcomw
    488   %res = call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a0, <8 x i16> %a1) ;
    489   ret <8 x i16> %res
    490 }
    491 declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone
    492 
    493 define <16 x i8> @test_int_x86_xop_vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) {
    494   ; CHECK: vpcomb
    495   %res = call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) ;
    496   ret <16 x i8> %res
    497 }
    498 declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone
    499 
    500 define <4 x i32> @test_int_x86_xop_vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) {
    501   ; CHECK: vpcomd
    502   %res = call <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) ;
    503   ret <4 x i32> %res
    504 }
    505 declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone
    506 
    507 define <2 x i64> @test_int_x86_xop_vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) {
    508   ; CHECK: vpcomq
    509   %res = call <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) ;
    510   ret <2 x i64> %res
    511 }
    512 declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone
    513 
    514 define <16 x i8> @test_int_x86_xop_vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) {
    515   ; CHECK: vpcomub
    516   %res = call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) ;
    517   ret <16 x i8> %res
    518 }
    519 declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone
    520 
    521 define <4 x i32> @test_int_x86_xop_vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) {
    522   ; CHECK: vpcomud
    523   %res = call <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) ;
    524   ret <4 x i32> %res
    525 }
    526 declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone
    527 
    528 define <2 x i64> @test_int_x86_xop_vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) {
    529   ; CHECK: vpcomuq
    530   %res = call <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) ;
    531   ret <2 x i64> %res
    532 }
    533 declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone
    534 
    535 define <8 x i16> @test_int_x86_xop_vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) {
    536   ; CHECK: vpcomuw
    537   %res = call <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) ;
    538   ret <8 x i16> %res
    539 }
    540 declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone
    541 
    542 define <8 x i16> @test_int_x86_xop_vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) {
    543   ; CHECK: vpcomw
    544   %res = call <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) ;
    545   ret <8 x i16> %res
    546 }
    547 declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
    548 
    549 define <4 x i32> @test_int_x86_xop_vphaddbd(<16 x i8> %a0) {
    550   ; CHECK: vphaddbd
    551   %res = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %a0) ;
    552   ret <4 x i32> %res
    553 }
    554 declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone
    555 
    556 define <2 x i64> @test_int_x86_xop_vphaddbq(<16 x i8> %a0) {
    557   ; CHECK: vphaddbq
    558   %res = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %a0) ;
    559   ret <2 x i64> %res
    560 }
    561 declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone
    562 
    563 define <8 x i16> @test_int_x86_xop_vphaddbw(<16 x i8> %a0) {
    564   ; CHECK: vphaddbw
    565   %res = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %a0) ;
    566   ret <8 x i16> %res
    567 }
    568 declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone
    569 
    570 define <2 x i64> @test_int_x86_xop_vphadddq(<4 x i32> %a0) {
    571   ; CHECK: vphadddq
    572   %res = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %a0) ;
    573   ret <2 x i64> %res
    574 }
    575 declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone
    576 
    577 define <4 x i32> @test_int_x86_xop_vphaddubd(<16 x i8> %a0) {
    578   ; CHECK: vphaddubd
    579   %res = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %a0) ;
    580   ret <4 x i32> %res
    581 }
    582 declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone
    583 
    584 define <2 x i64> @test_int_x86_xop_vphaddubq(<16 x i8> %a0) {
    585   ; CHECK: vphaddubq
    586   %res = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %a0) ;
    587   ret <2 x i64> %res
    588 }
    589 declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone
    590 
    591 define <8 x i16> @test_int_x86_xop_vphaddubw(<16 x i8> %a0) {
    592   ; CHECK: vphaddubw
    593   %res = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %a0) ;
    594   ret <8 x i16> %res
    595 }
    596 declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone
    597 
    598 define <2 x i64> @test_int_x86_xop_vphaddudq(<4 x i32> %a0) {
    599   ; CHECK: vphaddudq
    600   %res = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %a0) ;
    601   ret <2 x i64> %res
    602 }
    603 declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone
    604 
    605 define <4 x i32> @test_int_x86_xop_vphadduwd(<8 x i16> %a0) {
    606   ; CHECK: vphadduwd
    607   %res = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %a0) ;
    608   ret <4 x i32> %res
    609 }
    610 declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone
    611 
    612 define <2 x i64> @test_int_x86_xop_vphadduwq(<8 x i16> %a0) {
    613   ; CHECK: vphadduwq
    614   %res = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %a0) ;
    615   ret <2 x i64> %res
    616 }
    617 declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone
    618 
    619 define <4 x i32> @test_int_x86_xop_vphaddwd(<8 x i16> %a0) {
    620   ; CHECK: vphaddwd
    621   %res = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %a0) ;
    622   ret <4 x i32> %res
    623 }
    624 declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone
    625 
    626 define <2 x i64> @test_int_x86_xop_vphaddwq(<8 x i16> %a0) {
    627   ; CHECK: vphaddwq
    628   %res = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %a0) ;
    629   ret <2 x i64> %res
    630 }
    631 declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone
    632 
    633 define <8 x i16> @test_int_x86_xop_vphsubbw(<16 x i8> %a0) {
    634   ; CHECK: vphsubbw
    635   %res = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %a0) ;
    636   ret <8 x i16> %res
    637 }
    638 declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone
    639 
    640 define <2 x i64> @test_int_x86_xop_vphsubdq(<4 x i32> %a0) {
    641   ; CHECK: vphsubdq
    642   %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %a0) ;
    643   ret <2 x i64> %res
    644 }
    645 define <2 x i64> @test_int_x86_xop_vphsubdq_mem(<4 x i32>* %a0) {
    646   ; CHECK-NOT: vmovaps
    647   ; CHECK: vphsubdq
    648   %vec = load <4 x i32>* %a0
    649   %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %vec) ;
    650   ret <2 x i64> %res
    651 }
    652 declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone
    653 
    654 define <4 x i32> @test_int_x86_xop_vphsubwd(<8 x i16> %a0) {
    655   ; CHECK: vphsubwd
    656   %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %a0) ;
    657   ret <4 x i32> %res
    658 }
    659 define <4 x i32> @test_int_x86_xop_vphsubwd_mem(<8 x i16>* %a0) {
    660   ; CHECK-NOT: vmovaps
    661   ; CHECK: vphsubwd
    662   %vec = load <8 x i16>* %a0
    663   %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %vec) ;
    664   ret <4 x i32> %res
    665 }
    666 declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone
    667 
    668 define <4 x i32> @test_int_x86_xop_vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
    669   ; CHECK: vpmacsdd
    670   %res = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
    671   ret <4 x i32> %res
    672 }
    673 declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
    674 
    675 define <2 x i64> @test_int_x86_xop_vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
    676   ; CHECK: vpmacsdqh
    677   %res = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
    678   ret <2 x i64> %res
    679 }
    680 declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
    681 
    682 define <2 x i64> @test_int_x86_xop_vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
    683   ; CHECK: vpmacsdql
    684   %res = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
    685   ret <2 x i64> %res
    686 }
    687 declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
    688 
    689 define <4 x i32> @test_int_x86_xop_vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
    690   ; CHECK: vpmacssdd
    691   %res = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
    692   ret <4 x i32> %res
    693 }
    694 declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
    695 
    696 define <2 x i64> @test_int_x86_xop_vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
    697   ; CHECK: vpmacssdqh
    698   %res = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
    699   ret <2 x i64> %res
    700 }
    701 declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
    702 
    703 define <2 x i64> @test_int_x86_xop_vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
    704   ; CHECK: vpmacssdql
    705   %res = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
    706   ret <2 x i64> %res
    707 }
    708 declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
    709 
    710 define <4 x i32> @test_int_x86_xop_vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
    711   ; CHECK: vpmacsswd
    712   %res = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
    713   ret <4 x i32> %res
    714 }
    715 declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
    716 
    717 define <8 x i16> @test_int_x86_xop_vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
    718   ; CHECK: vpmacssww
    719   %res = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
    720   ret <8 x i16> %res
    721 }
    722 declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
    723 
    724 define <4 x i32> @test_int_x86_xop_vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
    725   ; CHECK: vpmacswd
    726   %res = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
    727   ret <4 x i32> %res
    728 }
    729 declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
    730 
    731 define <8 x i16> @test_int_x86_xop_vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
    732   ; CHECK: vpmacsww
    733   %res = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
    734   ret <8 x i16> %res
    735 }
    736 declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
    737 
    738 define <4 x i32> @test_int_x86_xop_vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
    739   ; CHECK: vpmadcsswd
    740   %res = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
    741   ret <4 x i32> %res
    742 }
    743 declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
    744 
    745 define <4 x i32> @test_int_x86_xop_vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
    746   ; CHECK: vpmadcswd
    747   %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
    748   ret <4 x i32> %res
    749 }
    750 define <4 x i32> @test_int_x86_xop_vpmadcswd_mem(<8 x i16> %a0, <8 x i16>* %a1, <4 x i32> %a2) {
    751   ; CHECK-NOT: vmovaps
    752   ; CHECK: vpmadcswd
    753   %vec = load <8 x i16>* %a1
    754   %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %vec, <4 x i32> %a2) ;
    755   ret <4 x i32> %res
    756 }
    757 declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
    758 
    759 define <16 x i8> @test_int_x86_xop_vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
    760   ; CHECK: vpperm
    761   %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ;
    762   ret <16 x i8> %res
    763 }
    764 define <16 x i8> @test_int_x86_xop_vpperm_rm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %a2) {
    765   ; CHECK-NOT: vmovaps
    766   ; CHECK: vpperm
    767   %vec = load <16 x i8>* %a2
    768   %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %vec) ;
    769   ret <16 x i8> %res
    770 }
    771 define <16 x i8> @test_int_x86_xop_vpperm_mr(<16 x i8> %a0, <16 x i8>* %a1, <16 x i8> %a2) {
    772   ; CHECK-NOT: vmovaps
    773   ; CHECK: vpperm
    774   %vec = load <16 x i8>* %a1
    775   %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %vec, <16 x i8> %a2) ;
    776   ret <16 x i8> %res
    777 }
    778 declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
    779 
    780 define <16 x i8> @test_int_x86_xop_vprotb(<16 x i8> %a0, <16 x i8> %a1) {
    781   ; CHECK: vprotb
    782   %res = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %a0, <16 x i8> %a1) ;
    783   ret <16 x i8> %res
    784 }
    785 declare <16 x i8> @llvm.x86.xop.vprotb(<16 x i8>, <16 x i8>) nounwind readnone
    786 
    787 define <4 x i32> @test_int_x86_xop_vprotd(<4 x i32> %a0, <4 x i32> %a1) {
    788   ; CHECK: vprotd
    789   %res = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %a0, <4 x i32> %a1) ;
    790   ret <4 x i32> %res
    791 }
    792 declare <4 x i32> @llvm.x86.xop.vprotd(<4 x i32>, <4 x i32>) nounwind readnone
    793 
    794 define <2 x i64> @test_int_x86_xop_vprotq(<2 x i64> %a0, <2 x i64> %a1) {
    795   ; CHECK: vprotq
    796   %res = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a0, <2 x i64> %a1) ;
    797   ret <2 x i64> %res
    798 }
    799 declare <2 x i64> @llvm.x86.xop.vprotq(<2 x i64>, <2 x i64>) nounwind readnone
    800 
    801 define <8 x i16> @test_int_x86_xop_vprotw(<8 x i16> %a0, <8 x i16> %a1) {
    802   ; CHECK: vprotw
    803   %res = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %a0, <8 x i16> %a1) ;
    804   ret <8 x i16> %res
    805 }
    806 declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone
    807 
    808 define <16 x i8> @test_int_x86_xop_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
    809   ; CHECK: vpshab
    810   %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a0, <16 x i8> %a1) ;
    811   ret <16 x i8> %res
    812 }
    813 declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone
    814 
    815 define <4 x i32> @test_int_x86_xop_vpshad(<4 x i32> %a0, <4 x i32> %a1) {
    816   ; CHECK: vpshad
    817   %res = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %a0, <4 x i32> %a1) ;
    818   ret <4 x i32> %res
    819 }
    820 declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone
    821 
    822 define <2 x i64> @test_int_x86_xop_vpshaq(<2 x i64> %a0, <2 x i64> %a1) {
    823   ; CHECK: vpshaq
    824   %res = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1) ;
    825   ret <2 x i64> %res
    826 }
    827 declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone
    828 
    829 define <8 x i16> @test_int_x86_xop_vpshaw(<8 x i16> %a0, <8 x i16> %a1) {
    830   ; CHECK: vpshaw
    831   %res = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %a0, <8 x i16> %a1) ;
    832   ret <8 x i16> %res
    833 }
    834 declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone
    835 
    836 define <16 x i8> @test_int_x86_xop_vpshlb(<16 x i8> %a0, <16 x i8> %a1) {
    837   ; CHECK: vpshlb
    838   %res = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %a0, <16 x i8> %a1) ;
    839   ret <16 x i8> %res
    840 }
    841 declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone
    842 
    843 define <4 x i32> @test_int_x86_xop_vpshld(<4 x i32> %a0, <4 x i32> %a1) {
    844   ; CHECK: vpshld
    845   %res = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a0, <4 x i32> %a1) ;
    846   ret <4 x i32> %res
    847 }
    848 declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone
    849 
    850 define <2 x i64> @test_int_x86_xop_vpshlq(<2 x i64> %a0, <2 x i64> %a1) {
    851   ; CHECK: vpshlq
    852   %res = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1) ;
    853   ret <2 x i64> %res
    854 }
    855 declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone
    856 
    857 define <8 x i16> @test_int_x86_xop_vpshlw(<8 x i16> %a0, <8 x i16> %a1) {
    858   ; CHECK: vpshlw
    859   %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %a1) ;
    860   ret <8 x i16> %res
    861 }
    862 define <8 x i16> @test_int_x86_xop_vpshlw_rm(<8 x i16> %a0, <8 x i16>* %a1) {
    863   ; CHECK-NOT: vmovaps
    864   ; CHECK: vpshlw
    865   %vec = load <8 x i16>* %a1
    866   %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %vec) ;
    867   ret <8 x i16> %res
    868 }
    869 define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) {
    870   ; CHECK-NOT: vmovaps
    871   ; CHECK: vpshlw
    872   %vec = load <8 x i16>* %a0
    873   %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %vec, <8 x i16> %a1) ;
    874   ret <8 x i16> %res
    875 }
    876 declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone
    877 
    878 define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0, <4 x float> %a1) {
    879   ; CHECK-NOT: mov
    880   ; CHECK: vfrczss
    881   %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %a1) ;
    882   ret <4 x float> %res
    883 }
    884 define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(<4 x float> %a0, float* %a1) {
    885   ; CHECK-NOT: mov
    886   ; CHECK: vfrczss
    887   %elem = load float* %a1
    888   %vec = insertelement <4 x float> undef, float %elem, i32 0
    889   %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %vec) ;
    890   ret <4 x float> %res
    891 }
    892 declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>, <4 x float>) nounwind readnone
    893 
    894 define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0, <2 x double> %a1) {
    895   ; CHECK-NOT: mov
    896   ; CHECK: vfrczsd
    897   %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %a1) ;
    898   ret <2 x double> %res
    899 }
    900 define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(<2 x double> %a0, double* %a1) {
    901   ; CHECK-NOT: mov
    902   ; CHECK: vfrczsd
    903   %elem = load double* %a1
    904   %vec = insertelement <2 x double> undef, double %elem, i32 0
    905   %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %vec) ;
    906   ret <2 x double> %res
    907 }
    908 declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>, <2 x double>) nounwind readnone
    909 
    910 define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) {
    911   ; CHECK: vfrczpd
    912   %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0) ;
    913   ret <2 x double> %res
    914 }
    915 define <2 x double> @test_int_x86_xop_vfrcz_pd_mem(<2 x double>* %a0) {
    916   ; CHECK-NOT: vmovaps
    917   ; CHECK: vfrczpd
    918   %vec = load <2 x double>* %a0
    919   %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %vec) ;
    920   ret <2 x double> %res
    921 }
    922 declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone
    923 
    924 define <4 x double> @test_int_x86_xop_vfrcz_pd_256(<4 x double> %a0) {
    925   ; CHECK: vfrczpd
    926   ; CHECK: ymm
    927   %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0) ;
    928   ret <4 x double> %res
    929 }
    930 define <4 x double> @test_int_x86_xop_vfrcz_pd_256_mem(<4 x double>* %a0) {
    931   ; CHECK-NOT: vmovaps
    932   ; CHECK: vfrczpd
    933   ; CHECK: ymm
    934   %vec = load <4 x double>* %a0
    935   %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %vec) ;
    936   ret <4 x double> %res
    937 }
    938 declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone
    939 
    940 define <4 x float> @test_int_x86_xop_vfrcz_ps(<4 x float> %a0) {
    941   ; CHECK: vfrczps
    942   %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0) ;
    943   ret <4 x float> %res
    944 }
    945 define <4 x float> @test_int_x86_xop_vfrcz_ps_mem(<4 x float>* %a0) {
    946   ; CHECK-NOT: vmovaps
    947   ; CHECK: vfrczps
    948   %vec = load <4 x float>* %a0
    949   %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %vec) ;
    950   ret <4 x float> %res
    951 }
    952 declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone
    953 
    954 define <8 x float> @test_int_x86_xop_vfrcz_ps_256(<8 x float> %a0) {
    955   ; CHECK: vfrczps
    956   ; CHECK: ymm
    957   %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0) ;
    958   ret <8 x float> %res
    959 }
    960 define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(<8 x float>* %a0) {
    961   ; CHECK-NOT: vmovaps
    962   ; CHECK: vfrczps
    963   ; CHECK: ymm
    964   %vec = load <8 x float>* %a0
    965   %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %vec) ;
    966   ret <8 x float> %res
    967 }
    968 declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone
    969 
    970