Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=X64
      4 
      5 ;
      6 ; VPPERM
      7 ;
      8 
      9 define <16 x i8> @vpperm_shuffle_unary(<16 x i8> %a0) {
     10 ; X32-LABEL: vpperm_shuffle_unary:
     11 ; X32:       # BB#0:
     12 ; X32-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
     13 ; X32-NEXT:    retl
     14 ;
     15 ; X64-LABEL: vpperm_shuffle_unary:
     16 ; X64:       # BB#0:
     17 ; X64-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
     18 ; X64-NEXT:    retq
     19   %1 = tail call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> <i8 31, i8 14, i8 29, i8 12, i8 27, i8 10, i8 25, i8 8, i8 23, i8 6, i8 21, i8 4, i8 19, i8 2, i8 17, i8 0>)
     20   ret <16 x i8> %1
     21 }
     22 
     23 define <16 x i8> @vpperm_shuffle_unary_undef(<16 x i8> %a0) {
     24 ; X32-LABEL: vpperm_shuffle_unary_undef:
     25 ; X32:       # BB#0:
     26 ; X32-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
     27 ; X32-NEXT:    retl
     28 ;
     29 ; X64-LABEL: vpperm_shuffle_unary_undef:
     30 ; X64:       # BB#0:
     31 ; X64-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
     32 ; X64-NEXT:    retq
     33   %1 = tail call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> undef, <16 x i8> <i8 31, i8 14, i8 29, i8 12, i8 27, i8 10, i8 25, i8 8, i8 23, i8 6, i8 21, i8 4, i8 19, i8 2, i8 17, i8 0>)
     34   ret <16 x i8> %1
     35 }
     36 
     37 define <16 x i8> @vpperm_shuffle_unary_zero(<16 x i8> %a0) {
     38 ; X32-LABEL: vpperm_shuffle_unary_zero:
     39 ; X32:       # BB#0:
     40 ; X32-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3],zero,xmm0[1],zero
     41 ; X32-NEXT:    retl
     42 ;
     43 ; X64-LABEL: vpperm_shuffle_unary_zero:
     44 ; X64:       # BB#0:
     45 ; X64-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3],zero,xmm0[1],zero
     46 ; X64-NEXT:    retq
     47   %1 = tail call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> <i8 31, i8 14, i8 29, i8 12, i8 27, i8 10, i8 25, i8 8, i8 23, i8 6, i8 21, i8 4, i8 19, i8 130, i8 17, i8 128>)
     48   ret <16 x i8> %1
     49 }
     50 
     51 define <16 x i8> @vpperm_shuffle_binary(<16 x i8> %a0, <16 x i8> %a1) {
     52 ; X32-LABEL: vpperm_shuffle_binary:
     53 ; X32:       # BB#0:
     54 ; X32-NEXT:    vpperm {{.*#+}} xmm0 = xmm1[15],xmm0[14],xmm1[13],xmm0[12],xmm1[11],xmm0[10],xmm1[9],xmm0[8],xmm1[7],xmm0[6],xmm1[5],xmm0[4],xmm1[3],xmm0[2],xmm1[1],xmm0[0]
     55 ; X32-NEXT:    retl
     56 ;
     57 ; X64-LABEL: vpperm_shuffle_binary:
     58 ; X64:       # BB#0:
     59 ; X64-NEXT:    vpperm {{.*#+}} xmm0 = xmm1[15],xmm0[14],xmm1[13],xmm0[12],xmm1[11],xmm0[10],xmm1[9],xmm0[8],xmm1[7],xmm0[6],xmm1[5],xmm0[4],xmm1[3],xmm0[2],xmm1[1],xmm0[0]
     60 ; X64-NEXT:    retq
     61   %1 = tail call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 31, i8 14, i8 29, i8 12, i8 27, i8 10, i8 25, i8 8, i8 23, i8 6, i8 21, i8 4, i8 19, i8 2, i8 17, i8 0>)
     62   ret <16 x i8> %1
     63 }
     64 
     65 define <16 x i8> @vpperm_shuffle_binary_zero(<16 x i8> %a0, <16 x i8> %a1) {
     66 ; X32-LABEL: vpperm_shuffle_binary_zero:
     67 ; X32:       # BB#0:
     68 ; X32-NEXT:    vpperm {{.*#+}} xmm0 = xmm1[15],xmm0[14],xmm1[13],xmm0[12],xmm1[11],xmm0[10],xmm1[9],xmm0[8],xmm1[7],xmm0[6],xmm1[5],xmm0[4],zero,zero,zero,zero
     69 ; X32-NEXT:    retl
     70 ;
     71 ; X64-LABEL: vpperm_shuffle_binary_zero:
     72 ; X64:       # BB#0:
     73 ; X64-NEXT:    vpperm {{.*#+}} xmm0 = xmm1[15],xmm0[14],xmm1[13],xmm0[12],xmm1[11],xmm0[10],xmm1[9],xmm0[8],xmm1[7],xmm0[6],xmm1[5],xmm0[4],zero,zero,zero,zero
     74 ; X64-NEXT:    retq
     75   %1 = tail call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 31, i8 14, i8 29, i8 12, i8 27, i8 10, i8 25, i8 8, i8 23, i8 6, i8 21, i8 4, i8 147, i8 130, i8 145, i8 128>)
     76   ret <16 x i8> %1
     77 }
     78 
     79 ; we can't decode vpperm's other permute ops
     80 define <16 x i8> @vpperm_shuffle_general(<16 x i8> %a0, <16 x i8> %a1) {
     81 ; X32-LABEL: vpperm_shuffle_general:
     82 ; X32:       # BB#0:
     83 ; X32-NEXT:    vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
     84 ; X32-NEXT:    retl
     85 ;
     86 ; X64-LABEL: vpperm_shuffle_general:
     87 ; X64:       # BB#0:
     88 ; X64-NEXT:    vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
     89 ; X64-NEXT:    retq
     90   %1 = tail call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> <i8 31, i8 14, i8 29, i8 12, i8 27, i8 10, i8 25, i8 8, i8 23, i8 6, i8 21, i8 4, i8 179, i8 162, i8 177, i8 160>)
     91   ret <16 x i8> %1
     92 }
     93 
     94 ;
     95 ; VPERMIL2
     96 ;
     97 
     98 define <2 x double> @vpermil2pd_21(<2 x double> %a0, <2 x double> %a1) {
     99 ; X32-LABEL: vpermil2pd_21:
    100 ; X32:       # BB#0:
    101 ; X32-NEXT:    vpermil2pd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    102 ; X32-NEXT:    retl
    103 ;
    104 ; X64-LABEL: vpermil2pd_21:
    105 ; X64:       # BB#0:
    106 ; X64-NEXT:    vpermil2pd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    107 ; X64-NEXT:    retq
    108   %1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> <i64 4, i64 2>, i8 0)
    109   ret <2 x double> %1
    110 }
    111 
    112 define <4 x double> @vpermil2pd256_0062(<4 x double> %a0, <4 x double> %a1) {
    113 ; X32-LABEL: vpermil2pd256_0062:
    114 ; X32:       # BB#0:
    115 ; X32-NEXT:    vpermil2pd {{.*#+}} ymm0 = ymm0[0,0],ymm1[2],ymm0[2]
    116 ; X32-NEXT:    retl
    117 ;
    118 ; X64-LABEL: vpermil2pd256_0062:
    119 ; X64:       # BB#0:
    120 ; X64-NEXT:    vpermil2pd {{.*#+}} ymm0 = ymm0[0,0],ymm1[2],ymm0[2]
    121 ; X64-NEXT:    retq
    122   %1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> <i64 0, i64 0, i64 4, i64 0>, i8 0)
    123   ret <4 x double> %1
    124 }
    125 
    126 define <4 x double> @vpermil2pd256_zz73(<4 x double> %a0, <4 x double> %a1) {
    127 ; X32-LABEL: vpermil2pd256_zz73:
    128 ; X32:       # BB#0:
    129 ; X32-NEXT:    vpermil2pd {{.*#+}} ymm0 = zero,zero,ymm1[3],ymm0[3]
    130 ; X32-NEXT:    retl
    131 ;
    132 ; X64-LABEL: vpermil2pd256_zz73:
    133 ; X64:       # BB#0:
    134 ; X64-NEXT:    vpermil2pd {{.*#+}} ymm0 = zero,zero,ymm1[3],ymm0[3]
    135 ; X64-NEXT:    retq
    136   %1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> <i64 0, i64 0, i64 14, i64 10>, i8 3)
    137   ret <4 x double> %1
    138 }
    139 
    140 define <4 x float> @vpermil2ps_0561(<4 x float> %a0, <4 x float> %a1) {
    141 ; X32-LABEL: vpermil2ps_0561:
    142 ; X32:       # BB#0:
    143 ; X32-NEXT:    vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[1]
    144 ; X32-NEXT:    retl
    145 ;
    146 ; X64-LABEL: vpermil2ps_0561:
    147 ; X64:       # BB#0:
    148 ; X64-NEXT:    vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[1]
    149 ; X64-NEXT:    retq
    150   %1 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 5, i32 6, i32 1>, i8 0)
    151   ret <4 x float> %1
    152 }
    153 
    154 define <8 x float> @vpermil2ps256_098144FE(<8 x float> %a0, <8 x float> %a1) {
    155 ; X32-LABEL: vpermil2ps256_098144FE:
    156 ; X32:       # BB#0:
    157 ; X32-NEXT:    vpermil2ps {{.*#+}} ymm0 = ymm0[0],ymm1[1,0],ymm0[1,4,4],ymm1[7,6]
    158 ; X32-NEXT:    retl
    159 ;
    160 ; X64-LABEL: vpermil2ps256_098144FE:
    161 ; X64:       # BB#0:
    162 ; X64-NEXT:    vpermil2ps {{.*#+}} ymm0 = ymm0[0],ymm1[1,0],ymm0[1,4,4],ymm1[7,6]
    163 ; X64-NEXT:    retq
    164   %1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 5, i32 4, i32 1, i32 0, i32 0, i32 7, i32 6>, i8 0)
    165   ret <8 x float> %1
    166 }
    167 
    168 define <8 x float> @vpermil2ps256_0zz8BzzA(<8 x float> %a0, <8 x float> %a1) {
    169 ; X32-LABEL: vpermil2ps256_0zz8BzzA:
    170 ; X32:       # BB#0:
    171 ; X32-NEXT:    vpermil2ps {{.*#+}} ymm0 = ymm0[0],zero,zero,ymm1[0,7],zero,zero,ymm1[6]
    172 ; X32-NEXT:    retl
    173 ;
    174 ; X64-LABEL: vpermil2ps256_0zz8BzzA:
    175 ; X64:       # BB#0:
    176 ; X64-NEXT:    vpermil2ps {{.*#+}} ymm0 = ymm0[0],zero,zero,ymm1[0,7],zero,zero,ymm1[6]
    177 ; X64-NEXT:    retq
    178   %1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 8, i32 8, i32 4, i32 7, i32 8, i32 8, i32 6>, i8 2)
    179   ret <8 x float> %1
    180 }
    181 
    182 declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone
    183 declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone
    184 
    185 declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x i32>, i8) nounwind readnone
    186 declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x i32>, i8) nounwind readnone
    187 
    188 declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
    189