Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
      4 
      5 define <4 x double> @test_broadcast_2f64_4f64(<2 x double> *%p) nounwind {
      6 ; X32-LABEL: test_broadcast_2f64_4f64:
      7 ; X32:       # %bb.0:
      8 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
      9 ; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
     10 ; X32-NEXT:    retl
     11 ;
     12 ; X64-LABEL: test_broadcast_2f64_4f64:
     13 ; X64:       # %bb.0:
     14 ; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
     15 ; X64-NEXT:    retq
     16  %1 = load <2 x double>, <2 x double> *%p
     17  %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
     18  ret <4 x double> %2
     19 }
     20 
     21 define <4 x i64> @test_broadcast_2i64_4i64(<2 x i64> *%p) nounwind {
     22 ; X32-LABEL: test_broadcast_2i64_4i64:
     23 ; X32:       # %bb.0:
     24 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     25 ; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
     26 ; X32-NEXT:    retl
     27 ;
     28 ; X64-LABEL: test_broadcast_2i64_4i64:
     29 ; X64:       # %bb.0:
     30 ; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
     31 ; X64-NEXT:    retq
     32  %1 = load <2 x i64>, <2 x i64> *%p
     33  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
     34  ret <4 x i64> %2
     35 }
     36 
     37 define <8 x float> @test_broadcast_4f32_8f32(<4 x float> *%p) nounwind {
     38 ; X32-LABEL: test_broadcast_4f32_8f32:
     39 ; X32:       # %bb.0:
     40 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     41 ; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
     42 ; X32-NEXT:    retl
     43 ;
     44 ; X64-LABEL: test_broadcast_4f32_8f32:
     45 ; X64:       # %bb.0:
     46 ; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
     47 ; X64-NEXT:    retq
     48  %1 = load <4 x float>, <4 x float> *%p
     49  %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
     50  ret <8 x float> %2
     51 }
     52 
     53 define <8 x i32> @test_broadcast_4i32_8i32(<4 x i32> *%p) nounwind {
     54 ; X32-LABEL: test_broadcast_4i32_8i32:
     55 ; X32:       # %bb.0:
     56 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     57 ; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
     58 ; X32-NEXT:    retl
     59 ;
     60 ; X64-LABEL: test_broadcast_4i32_8i32:
     61 ; X64:       # %bb.0:
     62 ; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
     63 ; X64-NEXT:    retq
     64  %1 = load <4 x i32>, <4 x i32> *%p
     65  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
     66  ret <8 x i32> %2
     67 }
     68 
     69 define <16 x i16> @test_broadcast_8i16_16i16(<8 x i16> *%p) nounwind {
     70 ; X32-LABEL: test_broadcast_8i16_16i16:
     71 ; X32:       # %bb.0:
     72 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     73 ; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
     74 ; X32-NEXT:    retl
     75 ;
     76 ; X64-LABEL: test_broadcast_8i16_16i16:
     77 ; X64:       # %bb.0:
     78 ; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
     79 ; X64-NEXT:    retq
     80  %1 = load <8 x i16>, <8 x i16> *%p
     81  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     82  ret <16 x i16> %2
     83 }
     84 
     85 define <32 x i8> @test_broadcast_16i8_32i8(<16 x i8> *%p) nounwind {
     86 ; X32-LABEL: test_broadcast_16i8_32i8:
     87 ; X32:       # %bb.0:
     88 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     89 ; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
     90 ; X32-NEXT:    retl
     91 ;
     92 ; X64-LABEL: test_broadcast_16i8_32i8:
     93 ; X64:       # %bb.0:
     94 ; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
     95 ; X64-NEXT:    retq
     96  %1 = load <16 x i8>, <16 x i8> *%p
     97  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
     98  ret <32 x i8> %2
     99 }
    100 
    101 define <4 x double> @test_broadcast_2f64_4f64_reuse(<2 x double>* %p0, <2 x double>* %p1) {
    102 ; X32-LABEL: test_broadcast_2f64_4f64_reuse:
    103 ; X32:       # %bb.0:
    104 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    105 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    106 ; X32-NEXT:    vmovaps (%ecx), %xmm1
    107 ; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
    108 ; X32-NEXT:    vmovaps %xmm1, (%eax)
    109 ; X32-NEXT:    retl
    110 ;
    111 ; X64-LABEL: test_broadcast_2f64_4f64_reuse:
    112 ; X64:       # %bb.0:
    113 ; X64-NEXT:    vmovaps (%rdi), %xmm1
    114 ; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
    115 ; X64-NEXT:    vmovaps %xmm1, (%rsi)
    116 ; X64-NEXT:    retq
    117  %1 = load <2 x double>, <2 x double>* %p0
    118  %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
    119  store <2 x double> %1, <2 x double>* %p1
    120  ret <4 x double> %2
    121 }
    122 
    123 define <4 x i64> @test_broadcast_2i64_4i64_reuse(<2 x i64>* %p0, <2 x i64>* %p1) {
    124 ; X32-LABEL: test_broadcast_2i64_4i64_reuse:
    125 ; X32:       # %bb.0:
    126 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    127 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    128 ; X32-NEXT:    vmovaps (%ecx), %xmm1
    129 ; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
    130 ; X32-NEXT:    vmovaps %xmm1, (%eax)
    131 ; X32-NEXT:    retl
    132 ;
    133 ; X64-LABEL: test_broadcast_2i64_4i64_reuse:
    134 ; X64:       # %bb.0:
    135 ; X64-NEXT:    vmovaps (%rdi), %xmm1
    136 ; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
    137 ; X64-NEXT:    vmovaps %xmm1, (%rsi)
    138 ; X64-NEXT:    retq
    139  %1 = load <2 x i64>, <2 x i64>* %p0
    140  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
    141  store <2 x i64> %1, <2 x i64>* %p1
    142  ret <4 x i64> %2
    143 }
    144 
    145 define <8 x float> @test_broadcast_4f32_8f32_reuse(<4 x float>* %p0, <4 x float>* %p1) {
    146 ; X32-LABEL: test_broadcast_4f32_8f32_reuse:
    147 ; X32:       # %bb.0:
    148 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    149 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    150 ; X32-NEXT:    vmovaps (%ecx), %xmm1
    151 ; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
    152 ; X32-NEXT:    vmovaps %xmm1, (%eax)
    153 ; X32-NEXT:    retl
    154 ;
    155 ; X64-LABEL: test_broadcast_4f32_8f32_reuse:
    156 ; X64:       # %bb.0:
    157 ; X64-NEXT:    vmovaps (%rdi), %xmm1
    158 ; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
    159 ; X64-NEXT:    vmovaps %xmm1, (%rsi)
    160 ; X64-NEXT:    retq
    161  %1 = load <4 x float>, <4 x float>* %p0
    162  %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    163  store <4 x float> %1, <4 x float>* %p1
    164  ret <8 x float> %2
    165 }
    166 
    167 define <8 x i32> @test_broadcast_4i32_8i32_reuse(<4 x i32>* %p0, <4 x i32>* %p1) {
    168 ; X32-LABEL: test_broadcast_4i32_8i32_reuse:
    169 ; X32:       # %bb.0:
    170 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    171 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    172 ; X32-NEXT:    vmovaps (%ecx), %xmm1
    173 ; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
    174 ; X32-NEXT:    vmovaps %xmm1, (%eax)
    175 ; X32-NEXT:    retl
    176 ;
    177 ; X64-LABEL: test_broadcast_4i32_8i32_reuse:
    178 ; X64:       # %bb.0:
    179 ; X64-NEXT:    vmovaps (%rdi), %xmm1
    180 ; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
    181 ; X64-NEXT:    vmovaps %xmm1, (%rsi)
    182 ; X64-NEXT:    retq
    183  %1 = load <4 x i32>, <4 x i32>* %p0
    184  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    185  store <4 x i32> %1, <4 x i32>* %p1
    186  ret <8 x i32> %2
    187 }
    188 
    189 define <16 x i16> @test_broadcast_8i16_16i16_reuse(<8 x i16> *%p0, <8 x i16> *%p1) nounwind {
    190 ; X32-LABEL: test_broadcast_8i16_16i16_reuse:
    191 ; X32:       # %bb.0:
    192 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    193 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    194 ; X32-NEXT:    vmovaps (%ecx), %xmm1
    195 ; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
    196 ; X32-NEXT:    vmovaps %xmm1, (%eax)
    197 ; X32-NEXT:    retl
    198 ;
    199 ; X64-LABEL: test_broadcast_8i16_16i16_reuse:
    200 ; X64:       # %bb.0:
    201 ; X64-NEXT:    vmovaps (%rdi), %xmm1
    202 ; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
    203 ; X64-NEXT:    vmovaps %xmm1, (%rsi)
    204 ; X64-NEXT:    retq
    205  %1 = load <8 x i16>, <8 x i16> *%p0
    206  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    207  store <8 x i16> %1, <8 x i16>* %p1
    208  ret <16 x i16> %2
    209 }
    210 
    211 define <32 x i8> @test_broadcast_16i8_32i8_reuse(<16 x i8> *%p0, <16 x i8> *%p1) nounwind {
    212 ; X32-LABEL: test_broadcast_16i8_32i8_reuse:
    213 ; X32:       # %bb.0:
    214 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    215 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    216 ; X32-NEXT:    vmovaps (%ecx), %xmm1
    217 ; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
    218 ; X32-NEXT:    vmovaps %xmm1, (%eax)
    219 ; X32-NEXT:    retl
    220 ;
    221 ; X64-LABEL: test_broadcast_16i8_32i8_reuse:
    222 ; X64:       # %bb.0:
    223 ; X64-NEXT:    vmovaps (%rdi), %xmm1
    224 ; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
    225 ; X64-NEXT:    vmovaps %xmm1, (%rsi)
    226 ; X64-NEXT:    retq
    227  %1 = load <16 x i8>, <16 x i8> *%p0
    228  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    229  store <16 x i8> %1, <16 x i8>* %p1
    230  ret <32 x i8> %2
    231 }
    232 
    233 define <8 x i32> @PR29088(<4 x i32>* %p0, <8 x float>* %p1) {
    234 ; X32-LABEL: PR29088:
    235 ; X32:       # %bb.0:
    236 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    237 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    238 ; X32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    239 ; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
    240 ; X32-NEXT:    vmovaps %ymm1, (%eax)
    241 ; X32-NEXT:    retl
    242 ;
    243 ; X64-LABEL: PR29088:
    244 ; X64:       # %bb.0:
    245 ; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    246 ; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
    247 ; X64-NEXT:    vmovaps %ymm1, (%rsi)
    248 ; X64-NEXT:    retq
    249   %ld = load <4 x i32>, <4 x i32>* %p0
    250   store <8 x float> zeroinitializer, <8 x float>* %p1
    251   %shuf = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    252   ret <8 x i32> %shuf
    253 }
    254