Home | History | Annotate | Download | only in InstCombine
      1 ; RUN: opt < %s -instcombine -S | FileCheck %s
      2 
      3 ; This should never happen, but make sure we don't crash handling a non-constant immediate byte.
      4 
      5 define <4 x double> @perm2pd_non_const_imm(<4 x double> %a0, <4 x double> %a1, i8 %b) {
      6   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b) 
      7   ret <4 x double> %res
      8 
      9 ; CHECK-LABEL: @perm2pd_non_const_imm
     10 ; CHECK-NEXT:  call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b)
     11 ; CHECK-NEXT:  ret <4 x double>
     12 }
     13 
     14 
     15 ; In the following 4 tests, both zero mask bits of the immediate are set.
     16 
     17 define <4 x double> @perm2pd_0x88(<4 x double> %a0, <4 x double> %a1) {
     18   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 136) 
     19   ret <4 x double> %res
     20 
     21 ; CHECK-LABEL: @perm2pd_0x88
     22 ; CHECK-NEXT:  ret <4 x double> zeroinitializer
     23 }
     24 
     25 define <8 x float> @perm2ps_0x88(<8 x float> %a0, <8 x float> %a1) {
     26   %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 136) 
     27   ret <8 x float> %res
     28 
     29 ; CHECK-LABEL: @perm2ps_0x88
     30 ; CHECK-NEXT:  ret <8 x float> zeroinitializer
     31 }
     32 
     33 define <8 x i32> @perm2si_0x88(<8 x i32> %a0, <8 x i32> %a1) {
     34   %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 136) 
     35   ret <8 x i32> %res
     36 
     37 ; CHECK-LABEL: @perm2si_0x88
     38 ; CHECK-NEXT:  ret <8 x i32> zeroinitializer
     39 }
     40 
     41 define <4 x i64> @perm2i_0x88(<4 x i64> %a0, <4 x i64> %a1) {
     42   %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 136) 
     43   ret <4 x i64> %res
     44 
     45 ; CHECK-LABEL: @perm2i_0x88
     46 ; CHECK-NEXT:  ret <4 x i64> zeroinitializer
     47 }
     48 
     49 
     50 ; The other control bits are ignored when zero mask bits of the immediate are set.
     51 
     52 define <4 x double> @perm2pd_0xff(<4 x double> %a0, <4 x double> %a1) {
     53   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 255) 
     54   ret <4 x double> %res
     55 
     56 ; CHECK-LABEL: @perm2pd_0xff
     57 ; CHECK-NEXT:  ret <4 x double> zeroinitializer
     58 }
     59 
     60 
     61 ; The following 16 tests are simple shuffles, except for 2 cases where we can just return one of the
     62 ; source vectors. Verify that we generate the right shuffle masks and undef source operand where possible..
     63 
     64 define <4 x double> @perm2pd_0x00(<4 x double> %a0, <4 x double> %a1) {
     65   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 0)
     66   ret <4 x double> %res
     67 
     68 ; CHECK-LABEL: @perm2pd_0x00
     69 ; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
     70 ; CHECK-NEXT:  ret <4 x double> %1
     71 }
     72 
     73 define <4 x double> @perm2pd_0x01(<4 x double> %a0, <4 x double> %a1) {
     74   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 1)
     75   ret <4 x double> %res
     76 
     77 ; CHECK-LABEL: @perm2pd_0x01
     78 ; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
     79 ; CHECK-NEXT:  ret <4 x double> %1
     80 }
     81 
     82 define <4 x double> @perm2pd_0x02(<4 x double> %a0, <4 x double> %a1) {
     83   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 2)
     84   ret <4 x double> %res
     85 
     86 ; CHECK-LABEL: @perm2pd_0x02
     87 ; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
     88 ; CHECK-NEXT:  ret <4 x double> %1
     89 }
     90 
     91 define <4 x double> @perm2pd_0x03(<4 x double> %a0, <4 x double> %a1) {
     92   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3)
     93   ret <4 x double> %res
     94 
     95 ; CHECK-LABEL: @perm2pd_0x03
     96 ; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
     97 ; CHECK-NEXT:  ret <4 x double> %1
     98 }
     99 
    100 define <4 x double> @perm2pd_0x10(<4 x double> %a0, <4 x double> %a1) {
    101   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 16)
    102   ret <4 x double> %res
    103 
    104 ; CHECK-LABEL: @perm2pd_0x10
    105 ; CHECK-NEXT:  ret <4 x double> %a0
    106 }
    107 
    108 define <4 x double> @perm2pd_0x11(<4 x double> %a0, <4 x double> %a1) {
    109   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 17)
    110   ret <4 x double> %res
    111 
    112 ; CHECK-LABEL: @perm2pd_0x11
    113 ; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
    114 ; CHECK-NEXT:  ret <4 x double> %1
    115 }
    116 
    117 define <4 x double> @perm2pd_0x12(<4 x double> %a0, <4 x double> %a1) {
    118   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 18)
    119   ret <4 x double> %res
    120 
    121 ; CHECK-LABEL: @perm2pd_0x12
    122 ; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
    123 ; CHECK-NEXT:  ret <4 x double> %1
    124 }
    125 
    126 define <4 x double> @perm2pd_0x13(<4 x double> %a0, <4 x double> %a1) {
    127   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 19)
    128   ret <4 x double> %res
    129 
    130 ; CHECK-LABEL: @perm2pd_0x13
    131 ; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
    132 ; CHECK-NEXT:  ret <4 x double> %1
    133 }
    134 
    135 define <4 x double> @perm2pd_0x20(<4 x double> %a0, <4 x double> %a1) {
    136   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 32)
    137   ret <4 x double> %res
    138 
    139 ; CHECK-LABEL: @perm2pd_0x20
    140 ; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
    141 ; CHECK-NEXT:  ret <4 x double> %1
    142 }
    143 
    144 define <4 x double> @perm2pd_0x21(<4 x double> %a0, <4 x double> %a1) {
    145   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 33)
    146   ret <4 x double> %res
    147 
    148 ; CHECK-LABEL: @perm2pd_0x21
    149 ; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
    150 ; CHECK-NEXT:  ret <4 x double> %1
    151 }
    152 
    153 define <4 x double> @perm2pd_0x22(<4 x double> %a0, <4 x double> %a1) {
    154   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 34)
    155   ret <4 x double> %res
    156 
    157 ; CHECK-LABEL: @perm2pd_0x22
    158 ; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
    159 ; CHECK-NEXT:  ret <4 x double> %1
    160 }
    161 
    162 define <4 x double> @perm2pd_0x23(<4 x double> %a0, <4 x double> %a1) {
    163   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 35)
    164   ret <4 x double> %res
    165 
    166 ; CHECK-LABEL: @perm2pd_0x23
    167 ; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
    168 ; CHECK-NEXT:  ret <4 x double> %1
    169 }
    170 
    171 define <4 x double> @perm2pd_0x30(<4 x double> %a0, <4 x double> %a1) {
    172   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 48)
    173   ret <4 x double> %res
    174 
    175 ; CHECK-LABEL: @perm2pd_0x30
    176 ; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
    177 ; CHECK-NEXT:  ret <4 x double> %1
    178 }
    179 
    180 define <4 x double> @perm2pd_0x31(<4 x double> %a0, <4 x double> %a1) {
    181   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 49)
    182   ret <4 x double> %res
    183 
    184 ; CHECK-LABEL: @perm2pd_0x31
    185 ; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
    186 ; CHECK-NEXT:  ret <4 x double> %1
    187 }
    188 
    189 define <4 x double> @perm2pd_0x32(<4 x double> %a0, <4 x double> %a1) {
    190   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 50)
    191   ret <4 x double> %res
    192 
    193 ; CHECK-LABEL: @perm2pd_0x32
    194 ; CHECK-NEXT:  ret <4 x double> %a1
    195 }
    196 
    197 define <4 x double> @perm2pd_0x33(<4 x double> %a0, <4 x double> %a1) {
    198   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 51)
    199   ret <4 x double> %res
    200 
    201 ; CHECK-LABEL: @perm2pd_0x33
    202 ; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
    203 ; CHECK-NEXT:  ret <4 x double> %1
    204 }
    205 
    206 ; Confirm that a mask for 32-bit elements is also correct.
    207 
    208 define <8 x float> @perm2ps_0x31(<8 x float> %a0, <8 x float> %a1) {
    209   %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 49)
    210   ret <8 x float> %res
    211 
    212 ; CHECK-LABEL: @perm2ps_0x31
    213 ; CHECK-NEXT:  %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
    214 ; CHECK-NEXT:  ret <8 x float> %1
    215 }
    216 
    217 
    218 ; Confirm that the AVX2 version works the same.
    219 
    220 define <4 x i64> @perm2i_0x33(<4 x i64> %a0, <4 x i64> %a1) {
    221   %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 51)
    222   ret <4 x i64> %res
    223 
    224 ; CHECK-LABEL: @perm2i_0x33
    225 ; CHECK-NEXT:  %1 = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
    226 ; CHECK-NEXT:  ret <4 x i64> %1
    227 }
    228 
    229 
    230 ; Confirm that when a single zero mask bit is set, we replace a source vector with zeros.
    231 
    232 define <4 x double> @perm2pd_0x81(<4 x double> %a0, <4 x double> %a1) {
    233   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 129)
    234   ret <4 x double> %res
    235 
    236 ; CHECK-LABEL: @perm2pd_0x81
    237 ; CHECK-NEXT:  shufflevector <4 x double> %a0, <4 x double> <double 0.0{{.*}}<4 x i32> <i32 2, i32 3, i32 4, i32 5>
    238 ; CHECK-NEXT:  ret <4 x double>
    239 }
    240 
    241 define <4 x double> @perm2pd_0x83(<4 x double> %a0, <4 x double> %a1) {
    242   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 131)
    243   ret <4 x double> %res
    244 
    245 ; CHECK-LABEL: @perm2pd_0x83
    246 ; CHECK-NEXT:  shufflevector <4 x double> %a1, <4 x double> <double 0.0{{.*}}, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
    247 ; CHECK-NEXT:  ret <4 x double>
    248 }
    249 
    250 define <4 x double> @perm2pd_0x28(<4 x double> %a0, <4 x double> %a1) {
    251   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 40)
    252   ret <4 x double> %res
    253 
    254 ; CHECK-LABEL: @perm2pd_0x28
    255 ; CHECK-NEXT:  shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
    256 ; CHECK-NEXT:  ret <4 x double>
    257 }
    258 
    259 define <4 x double> @perm2pd_0x08(<4 x double> %a0, <4 x double> %a1) {
    260   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 8)
    261   ret <4 x double> %res
    262 
    263 ; CHECK-LABEL: @perm2pd_0x08
    264 ; CHECK-NEXT:  shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
    265 ; CHECK-NEXT:  ret <4 x double>
    266 }
    267 
    268 ; Check one more with the AVX2 version.
    269 
    270 define <4 x i64> @perm2i_0x28(<4 x i64> %a0, <4 x i64> %a1) {
    271   %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 40)
    272   ret <4 x i64> %res
    273 
    274 ; CHECK-LABEL: @perm2i_0x28
    275 ; CHECK-NEXT:  shufflevector <4 x i64> <i64 0{{.*}}, <4 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
    276 ; CHECK-NEXT:  ret <4 x i64>
    277 }
    278 
    279 declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
    280 declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
    281 declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
    282 declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readnone
    283 
    284