Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4,+xop | FileCheck %s
      2 
      3 define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
      4   ; CHECK: vpermil2pd
      5   %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 1) ;  [#uses=1]
      6   ret <2 x double> %res
      7 }
      8 define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x double> %a2) {
      9   ; CHECK-NOT: vmovaps
     10   ; CHECK: vpermil2pd
     11   %vec = load <2 x double>, <2 x double>* %a1
     12   %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x double> %a2, i8 1) ;  [#uses=1]
     13   ret <2 x double> %res
     14 }
     15 define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double>* %a2) {
     16   ; CHECK-NOT: vmovaps
     17   ; CHECK: vpermil2pd
     18   %vec = load <2 x double>, <2 x double>* %a2
     19   %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %vec, i8 1) ;  [#uses=1]
     20   ret <2 x double> %res
     21 }
     22 declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
     23 
     24 define <4 x double> @test_int_x86_xop_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
     25   ; CHECK: vpermil2pd
     26   ; CHECK: ymm
     27   %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 2) ;
     28   ret <4 x double> %res
     29 }
     30 define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, <4 x double>* %a1, <4 x double> %a2) {
     31   ; CHECK-NOT: vmovaps
     32   ; CHECK: vpermil2pd
     33   ; CHECK: ymm
     34   %vec = load <4 x double>, <4 x double>* %a1
     35   %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x double> %a2, i8 2) ;
     36   ret <4 x double> %res
     37 }
     38 define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x double> %a1, <4 x double>* %a2) {
     39   ; CHECK-NOT: vmovaps
     40   ; CHECK: vpermil2pd
     41   ; CHECK: ymm
     42   %vec = load <4 x double>, <4 x double>* %a2
     43   %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %vec, i8 2) ;
     44   ret <4 x double> %res
     45 }
     46 declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
     47 
     48 define <4 x float> @test_int_x86_xop_vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
     49   ; CHECK: vpermil2ps
     50   %res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 3) ;
     51   ret <4 x float> %res
     52 }
     53 declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
     54 
     55 define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
     56   ; CHECK: vpermil2ps
     57   ; CHECK: ymm
     58   %res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 4) ;
     59   ret <8 x float> %res
     60 }
     61 declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
     62 
     63 define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
     64   ; CHECK: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
     65   %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ;
     66   ret <2 x i64> %res
     67 }
     68 declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
     69 
     70 define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
     71   ; CHECK: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
     72   %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ;
     73   ret <4 x i64> %res
     74 }
     75 define <4 x i64> @test_int_x86_xop_vpcmov_256_mr(<4 x i64> %a0, <4 x i64>* %a1, <4 x i64> %a2) {
     76   ; CHECK-NOT: vmovaps
     77   ; CHECK: vpcmov
     78   ; CHECK: ymm
     79   %vec = load <4 x i64>, <4 x i64>* %a1
     80   %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %vec, <4 x i64> %a2) ;
     81   ret <4 x i64> %res
     82 }
     83 define <4 x i64> @test_int_x86_xop_vpcmov_256_rm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64>* %a2) {
     84   ; CHECK-NOT: vmovaps
     85   ; CHECK: vpcmov
     86   ; CHECK: ymm
     87  %vec = load <4 x i64>, <4 x i64>* %a2
     88  %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %vec) ;
     89   ret <4 x i64> %res
     90 }
     91 declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone
     92 
     93 define <16 x i8> @test_int_x86_xop_vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) {
     94   ; CHECK:vpcomeqb
     95   %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) ;
     96   ret <16 x i8> %res
     97 }
     98 define <16 x i8> @test_int_x86_xop_vpcomeqb_mem(<16 x i8> %a0, <16 x i8>* %a1) {
     99   ; CHECK-NOT: vmovaps
    100   ; CHECK:vpcomeqb
    101   %vec = load <16 x i8>, <16 x i8>* %a1
    102   %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %vec) ;
    103   ret <16 x i8> %res
    104 }
    105 declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone
    106 
    107 define <8 x i16> @test_int_x86_xop_vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) {
    108   ; CHECK: vpcomeqw
    109   %res = call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) ;
    110   ret <8 x i16> %res
    111 }
    112 declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone
    113 
    114 define <4 x i32> @test_int_x86_xop_vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) {
    115   ; CHECK: vpcomeqd
    116   %res = call <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) ;
    117   ret <4 x i32> %res
    118 }
    119 declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone
    120 
    121 define <2 x i64> @test_int_x86_xop_vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) {
    122   ; CHECK: vpcomeqq
    123   %res = call <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) ;
    124   ret <2 x i64> %res
    125 }
    126 declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone
    127 
    128 define <16 x i8> @test_int_x86_xop_vpcomequb(<16 x i8> %a0, <16 x i8> %a1) {
    129   ; CHECK: vpcomequb
    130   %res = call <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8> %a0, <16 x i8> %a1) ;
    131   ret <16 x i8> %res
    132 }
    133 declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone
    134 
    135 define <4 x i32> @test_int_x86_xop_vpcomequd(<4 x i32> %a0, <4 x i32> %a1) {
    136   ; CHECK: vpcomequd
    137   %res = call <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32> %a0, <4 x i32> %a1) ;
    138   ret <4 x i32> %res
    139 }
    140 declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone
    141 
    142 define <2 x i64> @test_int_x86_xop_vpcomequq(<2 x i64> %a0, <2 x i64> %a1) {
    143   ; CHECK: vpcomequq
    144   %res = call <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64> %a0, <2 x i64> %a1) ;
    145   ret <2 x i64> %res
    146 }
    147 declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone
    148 
    149 define <8 x i16> @test_int_x86_xop_vpcomequw(<8 x i16> %a0, <8 x i16> %a1) {
    150   ; CHECK: vpcomequw
    151   %res = call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a0, <8 x i16> %a1) ;
    152   ret <8 x i16> %res
    153 }
    154 declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone
    155 
    156 define <16 x i8> @test_int_x86_xop_vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) {
    157   ; CHECK: vpcomfalseb
    158   %res = call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) ;
    159   ret <16 x i8> %res
    160 }
    161 declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone
    162 
    163 define <4 x i32> @test_int_x86_xop_vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) {
    164   ; CHECK: vpcomfalsed
    165   %res = call <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) ;
    166   ret <4 x i32> %res
    167 }
    168 declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone
    169 
    170 define <2 x i64> @test_int_x86_xop_vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) {
    171   ; CHECK: vpcomfalseq
    172   %res = call <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) ;
    173   ret <2 x i64> %res
    174 }
    175 declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone
    176 
    177 define <16 x i8> @test_int_x86_xop_vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) {
    178   ; CHECK: vpcomfalseub
    179   %res = call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) ;
    180   ret <16 x i8> %res
    181 }
    182 declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone
    183 
    184 define <4 x i32> @test_int_x86_xop_vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) {
    185   ; CHECK: vpcomfalseud
    186   %res = call <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) ;
    187   ret <4 x i32> %res
    188 }
    189 declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone
    190 
    191 define <2 x i64> @test_int_x86_xop_vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) {
    192   ; CHECK: vpcomfalseuq
    193   %res = call <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) ;
    194   ret <2 x i64> %res
    195 }
    196 declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone
    197 
    198 define <8 x i16> @test_int_x86_xop_vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) {
    199   ; CHECK: vpcomfalseuw
    200   %res = call <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) ;
    201   ret <8 x i16> %res
    202 }
    203 declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone
    204 
    205 define <8 x i16> @test_int_x86_xop_vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) {
    206   ; CHECK: vpcomfalsew
    207   %res = call <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) ;
    208   ret <8 x i16> %res
    209 }
    210 declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone
    211 
    212 define <16 x i8> @test_int_x86_xop_vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) {
    213   ; CHECK: vpcomgeb
    214   %res = call <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) ;
    215   ret <16 x i8> %res
    216 }
    217 declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone
    218 
    219 define <4 x i32> @test_int_x86_xop_vpcomged(<4 x i32> %a0, <4 x i32> %a1) {
    220   ; CHECK: vpcomged
    221   %res = call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a0, <4 x i32> %a1) ;
    222   ret <4 x i32> %res
    223 }
    224 declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone
    225 
    226 define <2 x i64> @test_int_x86_xop_vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) {
    227   ; CHECK: vpcomgeq
    228   %res = call <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) ;
    229   ret <2 x i64> %res
    230 }
    231 declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone
    232 
    233 define <16 x i8> @test_int_x86_xop_vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) {
    234   ; CHECK: vpcomgeub
    235   %res = call <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) ;
    236   ret <16 x i8> %res
    237 }
    238 declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone
    239 
    240 define <4 x i32> @test_int_x86_xop_vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) {
    241   ; CHECK: vpcomgeud
    242   %res = call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) ;
    243   ret <4 x i32> %res
    244 }
    245 declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone
    246 
    247 define <2 x i64> @test_int_x86_xop_vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) {
    248   ; CHECK: vpcomgeuq
    249   %res = call <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) ;
    250   ret <2 x i64> %res
    251 }
    252 declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone
    253 
    254 define <8 x i16> @test_int_x86_xop_vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) {
    255   ; CHECK: vpcomgeuw
    256   %res = call <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) ;
    257   ret <8 x i16> %res
    258 }
    259 declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone
    260 
    261 define <8 x i16> @test_int_x86_xop_vpcomgew(<8 x i16> %a0, <8 x i16> %a1) {
    262   ; CHECK: vpcomgew
    263   %res = call <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16> %a0, <8 x i16> %a1) ;
    264   ret <8 x i16> %res
    265 }
    266 declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone
    267 
    268 define <16 x i8> @test_int_x86_xop_vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) {
    269   ; CHECK: vpcomgtb
    270   %res = call <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) ;
    271   ret <16 x i8> %res
    272 }
    273 declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone
    274 
    275 define <4 x i32> @test_int_x86_xop_vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) {
    276   ; CHECK: vpcomgtd
    277   %res = call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) ;
    278   ret <4 x i32> %res
    279 }
    280 declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone
    281 
    282 define <2 x i64> @test_int_x86_xop_vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) {
    283   ; CHECK: vpcomgtq
    284   %res = call <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) ;
    285   ret <2 x i64> %res
    286 }
    287 declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone
    288 
    289 define <16 x i8> @test_int_x86_xop_vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) {
    290   ; CHECK: vpcomgtub
    291   %res = call <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) ;
    292   ret <16 x i8> %res
    293 }
    294 declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone
    295 
    296 define <4 x i32> @test_int_x86_xop_vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) {
    297   ; CHECK: vpcomgtud
    298   %res = call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) ;
    299   ret <4 x i32> %res
    300 }
    301 declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone
    302 
    303 define <2 x i64> @test_int_x86_xop_vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) {
    304   ; CHECK: vpcomgtuq
    305   %res = call <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) ;
    306   ret <2 x i64> %res
    307 }
    308 declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone
    309 
    310 define <8 x i16> @test_int_x86_xop_vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) {
    311   ; CHECK: vpcomgtuw
    312   %res = call <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) ;
    313   ret <8 x i16> %res
    314 }
    315 declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone
    316 
    317 define <8 x i16> @test_int_x86_xop_vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) {
    318   ; CHECK: vpcomgtw
    319   %res = call <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) ;
    320   ret <8 x i16> %res
    321 }
    322 declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone
    323 
    324 define <16 x i8> @test_int_x86_xop_vpcomleb(<16 x i8> %a0, <16 x i8> %a1) {
    325   ; CHECK: vpcomleb
    326   %res = call <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8> %a0, <16 x i8> %a1) ;
    327   ret <16 x i8> %res
    328 }
    329 declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone
    330 
    331 define <4 x i32> @test_int_x86_xop_vpcomled(<4 x i32> %a0, <4 x i32> %a1) {
    332   ; CHECK: vpcomled
    333   %res = call <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32> %a0, <4 x i32> %a1) ;
    334   ret <4 x i32> %res
    335 }
    336 declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone
    337 
    338 define <2 x i64> @test_int_x86_xop_vpcomleq(<2 x i64> %a0, <2 x i64> %a1) {
    339   ; CHECK: vpcomleq
    340   %res = call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a0, <2 x i64> %a1) ;
    341   ret <2 x i64> %res
    342 }
    343 declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone
    344 
    345 define <16 x i8> @test_int_x86_xop_vpcomleub(<16 x i8> %a0, <16 x i8> %a1) {
    346   ; CHECK: vpcomleub
    347   %res = call <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8> %a0, <16 x i8> %a1) ;
    348   ret <16 x i8> %res
    349 }
    350 declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone
    351 
    352 define <4 x i32> @test_int_x86_xop_vpcomleud(<4 x i32> %a0, <4 x i32> %a1) {
    353   ; CHECK: vpcomleud
    354   %res = call <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32> %a0, <4 x i32> %a1) ;
    355   ret <4 x i32> %res
    356 }
    357 declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone
    358 
    359 define <2 x i64> @test_int_x86_xop_vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) {
    360   ; CHECK: vpcomleuq
    361   %res = call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) ;
    362   ret <2 x i64> %res
    363 }
    364 declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone
    365 
    366 define <8 x i16> @test_int_x86_xop_vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) {
    367   ; CHECK: vpcomleuw
    368   %res = call <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) ;
    369   ret <8 x i16> %res
    370 }
    371 declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone
    372 
    373 define <8 x i16> @test_int_x86_xop_vpcomlew(<8 x i16> %a0, <8 x i16> %a1) {
    374   ; CHECK: vpcomlew
    375   %res = call <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16> %a0, <8 x i16> %a1) ;
    376   ret <8 x i16> %res
    377 }
    378 declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone
    379 
    380 define <16 x i8> @test_int_x86_xop_vpcomltb(<16 x i8> %a0, <16 x i8> %a1) {
    381   ; CHECK: vpcomltb
    382   %res = call <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8> %a0, <16 x i8> %a1) ;
    383   ret <16 x i8> %res
    384 }
    385 declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
    386 
    387 define <4 x i32> @test_int_x86_xop_vpcomltd(<4 x i32> %a0, <4 x i32> %a1) {
    388   ; CHECK: vpcomltd
    389   %res = call <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32> %a0, <4 x i32> %a1) ;
    390   ret <4 x i32> %res
    391 }
    392 declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
    393 
    394 define <2 x i64> @test_int_x86_xop_vpcomltq(<2 x i64> %a0, <2 x i64> %a1) {
    395   ; CHECK: vpcomltq
    396   %res = call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a0, <2 x i64> %a1) ;
    397   ret <2 x i64> %res
    398 }
    399 declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone
    400 
    401 define <16 x i8> @test_int_x86_xop_vpcomltub(<16 x i8> %a0, <16 x i8> %a1) {
    402   ; CHECK: vpcomltub
    403   %res = call <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8> %a0, <16 x i8> %a1) ;
    404   ret <16 x i8> %res
    405 }
    406 declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone
    407 
    408 define <4 x i32> @test_int_x86_xop_vpcomltud(<4 x i32> %a0, <4 x i32> %a1) {
    409   ; CHECK: vpcomltud
    410   %res = call <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32> %a0, <4 x i32> %a1) ;
    411   ret <4 x i32> %res
    412 }
    413 declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone
    414 
    415 define <2 x i64> @test_int_x86_xop_vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) {
    416   ; CHECK: vpcomltuq
    417   %res = call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) ;
    418   ret <2 x i64> %res
    419 }
    420 declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone
    421 
    422 define <8 x i16> @test_int_x86_xop_vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) {
    423   ; CHECK: vpcomltuw
    424   %res = call <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) ;
    425   ret <8 x i16> %res
    426 }
    427 declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone
    428 
    429 define <8 x i16> @test_int_x86_xop_vpcomltw(<8 x i16> %a0, <8 x i16> %a1) {
    430   ; CHECK: vpcomltw
    431   %res = call <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16> %a0, <8 x i16> %a1) ;
    432   ret <8 x i16> %res
    433 }
    434 declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
    435 
    436 define <16 x i8> @test_int_x86_xop_vpcomneb(<16 x i8> %a0, <16 x i8> %a1) {
    437   ; CHECK: vpcomneqb
    438   %res = call <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8> %a0, <16 x i8> %a1) ;
    439   ret <16 x i8> %res
    440 }
    441 declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone
    442 
    443 define <4 x i32> @test_int_x86_xop_vpcomned(<4 x i32> %a0, <4 x i32> %a1) {
    444   ; CHECK: vpcomneqd
    445   %res = call <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32> %a0, <4 x i32> %a1) ;
    446   ret <4 x i32> %res
    447 }
    448 declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone
    449 
    450 define <2 x i64> @test_int_x86_xop_vpcomneq(<2 x i64> %a0, <2 x i64> %a1) {
    451   ; CHECK: vpcomneqq
    452   %res = call <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64> %a0, <2 x i64> %a1) ;
    453   ret <2 x i64> %res
    454 }
    455 declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone
    456 
    457 define <16 x i8> @test_int_x86_xop_vpcomneub(<16 x i8> %a0, <16 x i8> %a1) {
    458   ; CHECK: vpcomnequb
    459   %res = call <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8> %a0, <16 x i8> %a1) ;
    460   ret <16 x i8> %res
    461 }
    462 declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone
    463 
    464 define <4 x i32> @test_int_x86_xop_vpcomneud(<4 x i32> %a0, <4 x i32> %a1) {
    465   ; CHECK: vpcomnequd
    466   %res = call <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32> %a0, <4 x i32> %a1) ;
    467   ret <4 x i32> %res
    468 }
    469 declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone
    470 
    471 define <2 x i64> @test_int_x86_xop_vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) {
    472   ; CHECK: vpcomnequq
    473   %res = call <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) ;
    474   ret <2 x i64> %res
    475 }
    476 declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone
    477 
    478 define <8 x i16> @test_int_x86_xop_vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) {
    479   ; CHECK: vpcomnequw
    480   %res = call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) ;
    481   ret <8 x i16> %res
    482 }
    483 declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone
    484 
    485 define <8 x i16> @test_int_x86_xop_vpcomnew(<8 x i16> %a0, <8 x i16> %a1) {
    486   ; CHECK: vpcomneqw
    487   %res = call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a0, <8 x i16> %a1) ;
    488   ret <8 x i16> %res
    489 }
    490 declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone
    491 
    492 define <16 x i8> @test_int_x86_xop_vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) {
    493   ; CHECK: vpcomtrueb
    494   %res = call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) ;
    495   ret <16 x i8> %res
    496 }
    497 declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone
    498 
    499 define <4 x i32> @test_int_x86_xop_vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) {
    500   ; CHECK: vpcomtrued
    501   %res = call <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) ;
    502   ret <4 x i32> %res
    503 }
    504 declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone
    505 
    506 define <2 x i64> @test_int_x86_xop_vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) {
    507   ; CHECK: vpcomtrueq
    508   %res = call <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) ;
    509   ret <2 x i64> %res
    510 }
    511 declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone
    512 
    513 define <16 x i8> @test_int_x86_xop_vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) {
    514   ; CHECK: vpcomtrueub
    515   %res = call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) ;
    516   ret <16 x i8> %res
    517 }
    518 declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone
    519 
    520 define <4 x i32> @test_int_x86_xop_vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) {
    521   ; CHECK: vpcomtrueud
    522   %res = call <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) ;
    523   ret <4 x i32> %res
    524 }
    525 declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone
    526 
    527 define <2 x i64> @test_int_x86_xop_vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) {
    528   ; CHECK: vpcomtrueuq
    529   %res = call <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) ;
    530   ret <2 x i64> %res
    531 }
    532 declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone
    533 
    534 define <8 x i16> @test_int_x86_xop_vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) {
    535   ; CHECK: vpcomtrueuw
    536   %res = call <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) ;
    537   ret <8 x i16> %res
    538 }
    539 declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone
    540 
    541 define <8 x i16> @test_int_x86_xop_vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) {
    542   ; CHECK: vpcomtruew
    543   %res = call <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) ;
    544   ret <8 x i16> %res
    545 }
    546 declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
    547 
    548 define <4 x i32> @test_int_x86_xop_vphaddbd(<16 x i8> %a0) {
    549   ; CHECK: vphaddbd
    550   %res = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %a0) ;
    551   ret <4 x i32> %res
    552 }
    553 declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone
    554 
    555 define <2 x i64> @test_int_x86_xop_vphaddbq(<16 x i8> %a0) {
    556   ; CHECK: vphaddbq
    557   %res = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %a0) ;
    558   ret <2 x i64> %res
    559 }
    560 declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone
    561 
    562 define <8 x i16> @test_int_x86_xop_vphaddbw(<16 x i8> %a0) {
    563   ; CHECK: vphaddbw
    564   %res = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %a0) ;
    565   ret <8 x i16> %res
    566 }
    567 declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone
    568 
    569 define <2 x i64> @test_int_x86_xop_vphadddq(<4 x i32> %a0) {
    570   ; CHECK: vphadddq
    571   %res = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %a0) ;
    572   ret <2 x i64> %res
    573 }
    574 declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone
    575 
    576 define <4 x i32> @test_int_x86_xop_vphaddubd(<16 x i8> %a0) {
    577   ; CHECK: vphaddubd
    578   %res = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %a0) ;
    579   ret <4 x i32> %res
    580 }
    581 declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone
    582 
    583 define <2 x i64> @test_int_x86_xop_vphaddubq(<16 x i8> %a0) {
    584   ; CHECK: vphaddubq
    585   %res = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %a0) ;
    586   ret <2 x i64> %res
    587 }
    588 declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone
    589 
    590 define <8 x i16> @test_int_x86_xop_vphaddubw(<16 x i8> %a0) {
    591   ; CHECK: vphaddubw
    592   %res = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %a0) ;
    593   ret <8 x i16> %res
    594 }
    595 declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone
    596 
    597 define <2 x i64> @test_int_x86_xop_vphaddudq(<4 x i32> %a0) {
    598   ; CHECK: vphaddudq
    599   %res = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %a0) ;
    600   ret <2 x i64> %res
    601 }
    602 declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone
    603 
    604 define <4 x i32> @test_int_x86_xop_vphadduwd(<8 x i16> %a0) {
    605   ; CHECK: vphadduwd
    606   %res = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %a0) ;
    607   ret <4 x i32> %res
    608 }
    609 declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone
    610 
    611 define <2 x i64> @test_int_x86_xop_vphadduwq(<8 x i16> %a0) {
    612   ; CHECK: vphadduwq
    613   %res = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %a0) ;
    614   ret <2 x i64> %res
    615 }
    616 declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone
    617 
    618 define <4 x i32> @test_int_x86_xop_vphaddwd(<8 x i16> %a0) {
    619   ; CHECK: vphaddwd
    620   %res = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %a0) ;
    621   ret <4 x i32> %res
    622 }
    623 declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone
    624 
    625 define <2 x i64> @test_int_x86_xop_vphaddwq(<8 x i16> %a0) {
    626   ; CHECK: vphaddwq
    627   %res = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %a0) ;
    628   ret <2 x i64> %res
    629 }
    630 declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone
    631 
    632 define <8 x i16> @test_int_x86_xop_vphsubbw(<16 x i8> %a0) {
    633   ; CHECK: vphsubbw
    634   %res = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %a0) ;
    635   ret <8 x i16> %res
    636 }
    637 declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone
    638 
    639 define <2 x i64> @test_int_x86_xop_vphsubdq(<4 x i32> %a0) {
    640   ; CHECK: vphsubdq
    641   %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %a0) ;
    642   ret <2 x i64> %res
    643 }
    644 define <2 x i64> @test_int_x86_xop_vphsubdq_mem(<4 x i32>* %a0) {
    645   ; CHECK-NOT: vmovaps
    646   ; CHECK: vphsubdq
    647   %vec = load <4 x i32>, <4 x i32>* %a0
    648   %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %vec) ;
    649   ret <2 x i64> %res
    650 }
    651 declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone
    652 
    653 define <4 x i32> @test_int_x86_xop_vphsubwd(<8 x i16> %a0) {
    654   ; CHECK: vphsubwd
    655   %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %a0) ;
    656   ret <4 x i32> %res
    657 }
    658 define <4 x i32> @test_int_x86_xop_vphsubwd_mem(<8 x i16>* %a0) {
    659   ; CHECK-NOT: vmovaps
    660   ; CHECK: vphsubwd
    661   %vec = load <8 x i16>, <8 x i16>* %a0
    662   %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %vec) ;
    663   ret <4 x i32> %res
    664 }
    665 declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone
    666 
    667 define <4 x i32> @test_int_x86_xop_vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
    668   ; CHECK: vpmacsdd
    669   %res = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
    670   ret <4 x i32> %res
    671 }
    672 declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
    673 
    674 define <2 x i64> @test_int_x86_xop_vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
    675   ; CHECK: vpmacsdqh
    676   %res = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
    677   ret <2 x i64> %res
    678 }
    679 declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
    680 
    681 define <2 x i64> @test_int_x86_xop_vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
    682   ; CHECK: vpmacsdql
    683   %res = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
    684   ret <2 x i64> %res
    685 }
    686 declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
    687 
    688 define <4 x i32> @test_int_x86_xop_vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
    689   ; CHECK: vpmacssdd
    690   %res = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
    691   ret <4 x i32> %res
    692 }
    693 declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
    694 
    695 define <2 x i64> @test_int_x86_xop_vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
    696   ; CHECK: vpmacssdqh
    697   %res = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
    698   ret <2 x i64> %res
    699 }
    700 declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
    701 
    702 define <2 x i64> @test_int_x86_xop_vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
    703   ; CHECK: vpmacssdql
    704   %res = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
    705   ret <2 x i64> %res
    706 }
    707 declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
    708 
    709 define <4 x i32> @test_int_x86_xop_vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
    710   ; CHECK: vpmacsswd
    711   %res = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
    712   ret <4 x i32> %res
    713 }
    714 declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
    715 
    716 define <8 x i16> @test_int_x86_xop_vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
    717   ; CHECK: vpmacssww
    718   %res = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
    719   ret <8 x i16> %res
    720 }
    721 declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
    722 
    723 define <4 x i32> @test_int_x86_xop_vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
    724   ; CHECK: vpmacswd
    725   %res = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
    726   ret <4 x i32> %res
    727 }
    728 declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
    729 
    730 define <8 x i16> @test_int_x86_xop_vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
    731   ; CHECK: vpmacsww
    732   %res = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
    733   ret <8 x i16> %res
    734 }
    735 declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
    736 
    737 define <4 x i32> @test_int_x86_xop_vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
    738   ; CHECK: vpmadcsswd
    739   %res = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
    740   ret <4 x i32> %res
    741 }
    742 declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
    743 
    744 define <4 x i32> @test_int_x86_xop_vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
    745   ; CHECK: vpmadcswd
    746   %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
    747   ret <4 x i32> %res
    748 }
    749 define <4 x i32> @test_int_x86_xop_vpmadcswd_mem(<8 x i16> %a0, <8 x i16>* %a1, <4 x i32> %a2) {
    750   ; CHECK-NOT: vmovaps
    751   ; CHECK: vpmadcswd
    752   %vec = load <8 x i16>, <8 x i16>* %a1
    753   %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %vec, <4 x i32> %a2) ;
    754   ret <4 x i32> %res
    755 }
    756 declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
    757 
    758 define <16 x i8> @test_int_x86_xop_vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
    759   ; CHECK: vpperm
    760   %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ;
    761   ret <16 x i8> %res
    762 }
    763 define <16 x i8> @test_int_x86_xop_vpperm_rm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %a2) {
    764   ; CHECK-NOT: vmovaps
    765   ; CHECK: vpperm
    766   %vec = load <16 x i8>, <16 x i8>* %a2
    767   %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %vec) ;
    768   ret <16 x i8> %res
    769 }
    770 define <16 x i8> @test_int_x86_xop_vpperm_mr(<16 x i8> %a0, <16 x i8>* %a1, <16 x i8> %a2) {
    771   ; CHECK-NOT: vmovaps
    772   ; CHECK: vpperm
    773   %vec = load <16 x i8>, <16 x i8>* %a1
    774   %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %vec, <16 x i8> %a2) ;
    775   ret <16 x i8> %res
    776 }
    777 declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
    778 
    779 define <16 x i8> @test_int_x86_xop_vprotb(<16 x i8> %a0, <16 x i8> %a1) {
    780   ; CHECK: vprotb
    781   %res = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %a0, <16 x i8> %a1) ;
    782   ret <16 x i8> %res
    783 }
    784 declare <16 x i8> @llvm.x86.xop.vprotb(<16 x i8>, <16 x i8>) nounwind readnone
    785 
    786 define <4 x i32> @test_int_x86_xop_vprotd(<4 x i32> %a0, <4 x i32> %a1) {
    787   ; CHECK: vprotd
    788   %res = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %a0, <4 x i32> %a1) ;
    789   ret <4 x i32> %res
    790 }
    791 declare <4 x i32> @llvm.x86.xop.vprotd(<4 x i32>, <4 x i32>) nounwind readnone
    792 
    793 define <2 x i64> @test_int_x86_xop_vprotq(<2 x i64> %a0, <2 x i64> %a1) {
    794   ; CHECK: vprotq
    795   %res = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a0, <2 x i64> %a1) ;
    796   ret <2 x i64> %res
    797 }
    798 declare <2 x i64> @llvm.x86.xop.vprotq(<2 x i64>, <2 x i64>) nounwind readnone
    799 
    800 define <8 x i16> @test_int_x86_xop_vprotw(<8 x i16> %a0, <8 x i16> %a1) {
    801   ; CHECK: vprotw
    802   %res = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %a0, <8 x i16> %a1) ;
    803   ret <8 x i16> %res
    804 }
    805 declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone
    806 
    807 define <16 x i8> @test_int_x86_xop_vprotbi(<16 x i8> %a0) {
    808   ; CHECK: vprotb
    809   %res = call <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8> %a0, i8 1) ;
    810   ret <16 x i8> %res
    811 }
    812 declare <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8>, i8) nounwind readnone
    813 
    814 define <4 x i32> @test_int_x86_xop_vprotdi(<4 x i32> %a0) {
    815   ; CHECK: vprotd
    816   %res = call <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32> %a0, i8 -2) ;
    817   ret <4 x i32> %res
    818 }
    819 declare <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32>, i8) nounwind readnone
    820 
    821 define <2 x i64> @test_int_x86_xop_vprotqi(<2 x i64> %a0) {
    822   ; CHECK: vprotq
    823   %res = call <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64> %a0, i8 3) ;
    824   ret <2 x i64> %res
    825 }
    826 declare <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64>, i8) nounwind readnone
    827 
    828 define <8 x i16> @test_int_x86_xop_vprotwi(<8 x i16> %a0) {
    829   ; CHECK: vprotw
    830   %res = call <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16> %a0, i8 -4) ;
    831   ret <8 x i16> %res
    832 }
    833 declare <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16>, i8) nounwind readnone
    834 
    835 define <16 x i8> @test_int_x86_xop_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
    836   ; CHECK: vpshab
    837   %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a0, <16 x i8> %a1) ;
    838   ret <16 x i8> %res
    839 }
    840 declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone
    841 
    842 define <4 x i32> @test_int_x86_xop_vpshad(<4 x i32> %a0, <4 x i32> %a1) {
    843   ; CHECK: vpshad
    844   %res = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %a0, <4 x i32> %a1) ;
    845   ret <4 x i32> %res
    846 }
    847 declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone
    848 
    849 define <2 x i64> @test_int_x86_xop_vpshaq(<2 x i64> %a0, <2 x i64> %a1) {
    850   ; CHECK: vpshaq
    851   %res = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1) ;
    852   ret <2 x i64> %res
    853 }
    854 declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone
    855 
    856 define <8 x i16> @test_int_x86_xop_vpshaw(<8 x i16> %a0, <8 x i16> %a1) {
    857   ; CHECK: vpshaw
    858   %res = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %a0, <8 x i16> %a1) ;
    859   ret <8 x i16> %res
    860 }
    861 declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone
    862 
    863 define <16 x i8> @test_int_x86_xop_vpshlb(<16 x i8> %a0, <16 x i8> %a1) {
    864   ; CHECK: vpshlb
    865   %res = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %a0, <16 x i8> %a1) ;
    866   ret <16 x i8> %res
    867 }
    868 declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone
    869 
    870 define <4 x i32> @test_int_x86_xop_vpshld(<4 x i32> %a0, <4 x i32> %a1) {
    871   ; CHECK: vpshld
    872   %res = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a0, <4 x i32> %a1) ;
    873   ret <4 x i32> %res
    874 }
    875 declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone
    876 
    877 define <2 x i64> @test_int_x86_xop_vpshlq(<2 x i64> %a0, <2 x i64> %a1) {
    878   ; CHECK: vpshlq
    879   %res = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1) ;
    880   ret <2 x i64> %res
    881 }
    882 declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone
    883 
    884 define <8 x i16> @test_int_x86_xop_vpshlw(<8 x i16> %a0, <8 x i16> %a1) {
    885   ; CHECK: vpshlw
    886   %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %a1) ;
    887   ret <8 x i16> %res
    888 }
    889 define <8 x i16> @test_int_x86_xop_vpshlw_rm(<8 x i16> %a0, <8 x i16>* %a1) {
    890   ; CHECK-NOT: vmovaps
    891   ; CHECK: vpshlw
    892   %vec = load <8 x i16>, <8 x i16>* %a1
    893   %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %vec) ;
    894   ret <8 x i16> %res
    895 }
    896 define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) {
    897   ; CHECK-NOT: vmovaps
    898   ; CHECK: vpshlw
    899   %vec = load <8 x i16>, <8 x i16>* %a0
    900   %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %vec, <8 x i16> %a1) ;
    901   ret <8 x i16> %res
    902 }
    903 declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone
    904 
    905 define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0) {
    906   ; CHECK-NOT: mov
    907   ; CHECK: vfrczss
    908   %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0) ;
    909   ret <4 x float> %res
    910 }
    911 define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(float* %a0) {
    912   ; CHECK-NOT: mov
    913   ; CHECK: vfrczss
    914   %elem = load float, float* %a0
    915   %vec = insertelement <4 x float> undef, float %elem, i32 0
    916   %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %vec) ;
    917   ret <4 x float> %res
    918 }
    919 declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone
    920 
    921 define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0) {
    922   ; CHECK-NOT: mov
    923   ; CHECK: vfrczsd
    924   %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0) ;
    925   ret <2 x double> %res
    926 }
    927 define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(double* %a0) {
    928   ; CHECK-NOT: mov
    929   ; CHECK: vfrczsd
    930   %elem = load double, double* %a0
    931   %vec = insertelement <2 x double> undef, double %elem, i32 0
    932   %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %vec) ;
    933   ret <2 x double> %res
    934 }
    935 declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone
    936 
    937 define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) {
    938   ; CHECK: vfrczpd
    939   %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0) ;
    940   ret <2 x double> %res
    941 }
    942 define <2 x double> @test_int_x86_xop_vfrcz_pd_mem(<2 x double>* %a0) {
    943   ; CHECK-NOT: vmovaps
    944   ; CHECK: vfrczpd
    945   %vec = load <2 x double>, <2 x double>* %a0
    946   %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %vec) ;
    947   ret <2 x double> %res
    948 }
    949 declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone
    950 
    951 define <4 x double> @test_int_x86_xop_vfrcz_pd_256(<4 x double> %a0) {
    952   ; CHECK: vfrczpd
    953   ; CHECK: ymm
    954   %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0) ;
    955   ret <4 x double> %res
    956 }
    957 define <4 x double> @test_int_x86_xop_vfrcz_pd_256_mem(<4 x double>* %a0) {
    958   ; CHECK-NOT: vmovaps
    959   ; CHECK: vfrczpd
    960   ; CHECK: ymm
    961   %vec = load <4 x double>, <4 x double>* %a0
    962   %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %vec) ;
    963   ret <4 x double> %res
    964 }
    965 declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone
    966 
    967 define <4 x float> @test_int_x86_xop_vfrcz_ps(<4 x float> %a0) {
    968   ; CHECK: vfrczps
    969   %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0) ;
    970   ret <4 x float> %res
    971 }
    972 define <4 x float> @test_int_x86_xop_vfrcz_ps_mem(<4 x float>* %a0) {
    973   ; CHECK-NOT: vmovaps
    974   ; CHECK: vfrczps
    975   %vec = load <4 x float>, <4 x float>* %a0
    976   %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %vec) ;
    977   ret <4 x float> %res
    978 }
    979 declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone
    980 
    981 define <8 x float> @test_int_x86_xop_vfrcz_ps_256(<8 x float> %a0) {
    982   ; CHECK: vfrczps
    983   ; CHECK: ymm
    984   %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0) ;
    985   ret <8 x float> %res
    986 }
    987 define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(<8 x float>* %a0) {
    988   ; CHECK-NOT: vmovaps
    989   ; CHECK: vfrczps
    990   ; CHECK: ymm
    991   %vec = load <8 x float>, <8 x float>* %a0
    992   %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %vec) ;
    993   ret <8 x float> %res
    994 }
    995 declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone
    996 
    997 define <16 x i8> @test_int_x86_xop_vpcomb(<16 x i8> %a0, <16 x i8> %a1) {
    998   ; CHECK:vpcomb
    999   %res = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %a0, <16 x i8> %a1, i8 0) ;
   1000   ret <16 x i8> %res
   1001 }
   1002 declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1003 
   1004 define <8 x i16> @test_int_x86_xop_vpcomw(<8 x i16> %a0, <8 x i16> %a1) {
   1005   ; CHECK: vpcomw
   1006   %res = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ;
   1007   ret <8 x i16> %res
   1008 }
   1009 declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
   1010 
   1011 define <4 x i32> @test_int_x86_xop_vpcomd(<4 x i32> %a0, <4 x i32> %a1) {
   1012   ; CHECK: vpcomd
   1013   %res = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %a0, <4 x i32> %a1, i8 0) ;
   1014   ret <4 x i32> %res
   1015 }
   1016 declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone
   1017 
   1018 define <2 x i64> @test_int_x86_xop_vpcomq(<2 x i64> %a0, <2 x i64> %a1) {
   1019   ; CHECK: vpcomq
   1020   %res = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ;
   1021   ret <2 x i64> %res
   1022 }
   1023 declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone
   1024 
   1025 define <16 x i8> @test_int_x86_xop_vpcomub(<16 x i8> %a0, <16 x i8> %a1) {
   1026   ; CHECK:vpcomub
   1027   %res = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %a0, <16 x i8> %a1, i8 0) ;
   1028   ret <16 x i8> %res
   1029 }
   1030 declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1031 
   1032 define <8 x i16> @test_int_x86_xop_vpcomuw(<8 x i16> %a0, <8 x i16> %a1) {
   1033   ; CHECK: vpcomuw
   1034   %res = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ;
   1035   ret <8 x i16> %res
   1036 }
   1037 declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone
   1038 
   1039 define <4 x i32> @test_int_x86_xop_vpcomud(<4 x i32> %a0, <4 x i32> %a1) {
   1040   ; CHECK: vpcomud
   1041   %res = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %a0, <4 x i32> %a1, i8 0) ;
   1042   ret <4 x i32> %res
   1043 }
   1044 declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone
   1045 
   1046 define <2 x i64> @test_int_x86_xop_vpcomuq(<2 x i64> %a0, <2 x i64> %a1) {
   1047   ; CHECK: vpcomuq
   1048   %res = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ;
   1049   ret <2 x i64> %res
   1050 }
   1051 declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone
   1052 
   1053