Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
      2 
      3 ; PR11102
      4 define <4 x float> @test1(<4 x float> %a) nounwind {
      5   %b = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 5, i32 undef, i32 undef>
      6   ret <4 x float> %b
      7 ; CHECK: test1:
      8 ; CHECK: vshufps
      9 ; CHECK: vpshufd
     10 }
     11 
     12 ; rdar://10538417
     13 define <3 x i64> @test2(<2 x i64> %v) nounwind readnone {
     14 ; CHECK: test2:
     15 ; CHECK: vinsertf128
     16   %1 = shufflevector <2 x i64> %v, <2 x i64> %v, <3 x i32> <i32 0, i32 1, i32 undef>
     17   %2 = shufflevector <3 x i64> zeroinitializer, <3 x i64> %1, <3 x i32> <i32 3, i32 4, i32 2>
     18   ret <3 x i64> %2
     19 ; CHECK: ret
     20 }
     21 
     22 define <4 x i64> @test3(<4 x i64> %a, <4 x i64> %b) nounwind {
     23   %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 undef>
     24   ret <4 x i64> %c
     25 ; CHECK: test3:
     26 ; CHECK: vperm2f128
     27 ; CHECK: ret
     28 }
     29 
     30 define <8 x float> @test4(float %a) nounwind {
     31   %b = insertelement <8 x float> zeroinitializer, float %a, i32 0
     32   ret <8 x float> %b
     33 ; CHECK: test4:
     34 ; CHECK: vinsertf128
     35 }
     36 
     37 ; rdar://10594409
     38 define <8 x float> @test5(float* nocapture %f) nounwind uwtable readonly ssp {
     39 entry:
     40   %0 = bitcast float* %f to <4 x float>*
     41   %1 = load <4 x float>* %0, align 16
     42 ; CHECK: test5
     43 ; CHECK: vmovaps
     44 ; CHECK-NOT: vxorps
     45 ; CHECK-NOT: vinsertf128
     46   %shuffle.i = shufflevector <4 x float> %1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
     47   ret <8 x float> %shuffle.i
     48 }
     49 
     50 define <4 x double> @test6(double* nocapture %d) nounwind uwtable readonly ssp {
     51 entry:
     52   %0 = bitcast double* %d to <2 x double>*
     53   %1 = load <2 x double>* %0, align 16
     54 ; CHECK: test6
     55 ; CHECK: vmovaps
     56 ; CHECK-NOT: vxorps
     57 ; CHECK-NOT: vinsertf128
     58   %shuffle.i = shufflevector <2 x double> %1, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
     59   ret <4 x double> %shuffle.i
     60 }
     61 
     62 define <16 x i16> @test7(<4 x i16> %a) nounwind {
     63 ; CHECK: test7
     64   %b = shufflevector <4 x i16> %a, <4 x i16> undef, <16 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     65 ; CHECK: ret
     66   ret <16 x i16> %b
     67 }
     68 
     69 ; CHECK: test8
     70 define void @test8() {
     71 entry:
     72   %0 = load <16 x i64> addrspace(1)* null, align 128
     73   %1 = shufflevector <16 x i64> <i64 undef, i64 undef, i64 0, i64 undef, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i64> %0, <16 x i32> <i32 17, i32 18, i32 2, i32 undef, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 26>
     74   %2 = shufflevector <16 x i64> %1, <16 x i64> %0, <16 x i32> <i32 0, i32 1, i32 2, i32 30, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 undef, i32 11, i32 undef, i32 22, i32 20, i32 15>
     75   store <16 x i64> %2, <16 x i64> addrspace(1)* undef, align 128
     76 ; CHECK: ret
     77   ret void
     78 }
     79 
     80 ; Extract a value from a shufflevector..
     81 define i32 @test9(<4 x i32> %a) nounwind {
     82 ; CHECK: test9
     83 ; CHECK: vpextrd
     84   %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 undef, i32 4> 
     85   %r = extractelement <8 x i32> %b, i32 2
     86 ; CHECK: ret
     87   ret i32 %r
     88 }
     89 
     90 ; Extract a value which is the result of an undef mask.
     91 define i32 @test10(<4 x i32> %a) nounwind {
     92 ; CHECK: @test10
     93 ; CHECK-NOT: {{^[^#]*[a-z]}}
     94 ; CHECK: ret
     95   %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     96   %r = extractelement <8 x i32> %b, i32 2
     97   ret i32 %r
     98 }
     99 
    100 define <4 x float> @test11(<4 x float> %a) nounwind  {
    101 ; CHECK: test11
    102 ; CHECK: vpshufd $27
    103   %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    104   ret <4 x float> %tmp1
    105 }
    106 
    107 define <4 x float> @test12(<4 x float>* %a) nounwind  {
    108 ; CHECK: test12
    109 ; CHECK: vpshufd
    110   %tmp0 = load <4 x float>* %a
    111   %tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    112   ret <4 x float> %tmp1
    113 }
    114 
    115 define <4 x i32> @test13(<4 x i32> %a) nounwind  {
    116 ; CHECK: test13
    117 ; CHECK: vpshufd $27
    118   %tmp1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    119   ret <4 x i32> %tmp1
    120 }
    121 
    122 define <4 x i32> @test14(<4 x i32>* %a) nounwind  {
    123 ; CHECK: test14
    124 ; CHECK: vpshufd $27, (
    125   %tmp0 = load <4 x i32>* %a
    126   %tmp1 = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    127   ret <4 x i32> %tmp1
    128 }
    129 
    130 ; CHECK: test15
    131 ; CHECK: vpshufd $8
    132 ; CHECK: ret
    133 define <4 x i32> @test15(<2 x i32>%x) nounwind readnone {
    134   %x1 = shufflevector <2 x i32> %x, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    135   ret <4 x i32>%x1
    136 }
    137 
    138 ; rdar://10974078
    139 define <8 x float> @test16(float* nocapture %f) nounwind uwtable readonly ssp {
    140 entry:
    141   %0 = bitcast float* %f to <4 x float>*
    142   %1 = load <4 x float>* %0, align 8
    143 ; CHECK: test16
    144 ; CHECK: vmovups
    145 ; CHECK-NOT: vxorps
    146 ; CHECK-NOT: vinsertf128
    147   %shuffle.i = shufflevector <4 x float> %1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
    148   ret <8 x float> %shuffle.i
    149 }
    150 
    151 ; PR12413
    152 ; CHECK: shuf1
    153 ; CHECK: vpshufb
    154 ; CHECK: vpshufb
    155 ; CHECK: vpshufb
    156 ; CHECK: vpshufb
    157 define <32 x i8> @shuf1(<32 x i8> %inval1, <32 x i8> %inval2) {
    158 entry:
    159  %0 = shufflevector <32 x i8> %inval1, <32 x i8> %inval2, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62>
    160  ret <32 x i8> %0
    161 }
    162 
    163 ; handle the case where only half of the 256-bits is splittable
    164 ; CHECK: shuf2
    165 ; CHECK: vpshufb
    166 ; CHECK: vpshufb
    167 ; CHECK: vpextrb
    168 ; CHECK: vpextrb
    169 define <32 x i8> @shuf2(<32 x i8> %inval1, <32 x i8> %inval2) {
    170 entry:
    171  %0 = shufflevector <32 x i8> %inval1, <32 x i8> %inval2, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 31, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62>
    172  ret <32 x i8> %0
    173 }
    174 
    175 ; CHECK: blend1
    176 ; CHECK: vblendps
    177 ; CHECK: ret
    178 define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
    179   %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
    180   ret <4 x i32> %t
    181 }
    182 
    183 ; CHECK: blend2
    184 ; CHECK: vblendps
    185 ; CHECK: ret
    186 define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
    187   %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    188   ret <4 x i32> %t
    189 }
    190 
    191 ; CHECK: blend2a
    192 ; CHECK: vblendps
    193 ; CHECK: ret
    194 define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinline {
    195   %t = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    196   ret <4 x float> %t
    197 }
    198 
    199 ; CHECK: blend3
    200 ; CHECK-NOT: vblendps
    201 ; CHECK: ret
    202 define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
    203   %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
    204   ret <4 x i32> %t
    205 }
    206 
    207 ; CHECK: blend4
    208 ; CHECK: vblendpd
    209 ; CHECK: ret
    210 define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
    211   %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
    212   ret <4 x i64> %t
    213 }
    214 
    215 ; CHECK: narrow
    216 ; CHECK: vpermilps
    217 ; CHECK: ret
    218 define <16 x i16> @narrow(<16 x i16> %a) nounwind alwaysinline {
    219   %t = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 undef, i32 14, i32 15, i32 undef, i32 undef>
    220   ret <16 x i16> %t
    221 }
    222 
    223 ;CHECK: test17
    224 ;CHECK-NOT: vinsertf128
    225 ;CHECK: ret
    226 define   <8 x float> @test17(<4 x float> %y) {
    227   %x = shufflevector <4 x float> %y, <4 x float> undef, <8 x i32> <i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    228   ret <8 x float> %x
    229 }
    230 
    231 ; CHECK: test18
    232 ; CHECK: vmovshdup
    233 ; CHECK: vblendps
    234 ; CHECK: ret
    235 define <8 x float> @test18(<8 x float> %A, <8 x float>%B) nounwind {
    236   %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
    237   ret <8 x float>%S
    238 }
    239 
    240 ; CHECK: test19
    241 ; CHECK: vmovsldup
    242 ; CHECK: vblendps
    243 ; CHECK: ret
    244 define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind {
    245   %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
    246   ret <8 x float>%S
    247 }
    248 
    249 ; rdar://12684358
    250 ; Make sure loads happen before stores.
    251 ; CHECK: swap8doubles
    252 ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
    253 ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
    254 ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
    255 ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
    256 ; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
    257 ; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
    258 ; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
    259 ; CHECK: vextractf128
    260 ; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
    261 ; CHECK: vextractf128
    262 ; CHECK: vmovaps %ymm{{[0-9]+}}, {{[0-9]*}}(%rsi)
    263 ; CHECK: vmovaps %ymm{{[0-9]+}}, {{[0-9]*}}(%rsi)
    264 define void @swap8doubles(double* nocapture %A, double* nocapture %C) nounwind uwtable ssp {
    265 entry:
    266   %add.ptr = getelementptr inbounds double* %A, i64 2
    267   %v.i = bitcast double* %A to <2 x double>*
    268   %0 = load <2 x double>* %v.i, align 1
    269   %shuffle.i.i = shufflevector <2 x double> %0, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
    270   %v1.i = bitcast double* %add.ptr to <2 x double>*
    271   %1 = load <2 x double>* %v1.i, align 1
    272   %2 = tail call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %shuffle.i.i, <2 x double> %1, i8 1) nounwind
    273   %add.ptr1 = getelementptr inbounds double* %A, i64 6
    274   %add.ptr2 = getelementptr inbounds double* %A, i64 4
    275   %v.i27 = bitcast double* %add.ptr2 to <2 x double>*
    276   %3 = load <2 x double>* %v.i27, align 1
    277   %shuffle.i.i28 = shufflevector <2 x double> %3, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
    278   %v1.i29 = bitcast double* %add.ptr1 to <2 x double>*
    279   %4 = load <2 x double>* %v1.i29, align 1
    280   %5 = tail call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %shuffle.i.i28, <2 x double> %4, i8 1) nounwind
    281   %6 = bitcast double* %C to <4 x double>*
    282   %7 = load <4 x double>* %6, align 32
    283   %add.ptr5 = getelementptr inbounds double* %C, i64 4
    284   %8 = bitcast double* %add.ptr5 to <4 x double>*
    285   %9 = load <4 x double>* %8, align 32
    286   %shuffle.i26 = shufflevector <4 x double> %7, <4 x double> undef, <2 x i32> <i32 0, i32 1>
    287   %10 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %7, i8 1)
    288   %shuffle.i = shufflevector <4 x double> %9, <4 x double> undef, <2 x i32> <i32 0, i32 1>
    289   %11 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %9, i8 1)
    290   store <2 x double> %shuffle.i26, <2 x double>* %v.i, align 16
    291   store <2 x double> %10, <2 x double>* %v1.i, align 16
    292   store <2 x double> %shuffle.i, <2 x double>* %v.i27, align 16
    293   store <2 x double> %11, <2 x double>* %v1.i29, align 16
    294   store <4 x double> %2, <4 x double>* %6, align 32
    295   store <4 x double> %5, <4 x double>* %8, align 32
    296   ret void
    297 }
    298 declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
    299 declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
    300