Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
      4 
      5 define <4 x double> @test_broadcast_2f64_4f64(<2 x double> *%p) nounwind {
      6 ; X32-LABEL: test_broadcast_2f64_4f64:
      7 ; X32:       # %bb.0:
      8 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
      9 ; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
     10 ; X32-NEXT:    vaddpd {{\.LCPI.*}}, %ymm0, %ymm0
     11 ; X32-NEXT:    retl
     12 ;
     13 ; X64-LABEL: test_broadcast_2f64_4f64:
     14 ; X64:       # %bb.0:
     15 ; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
     16 ; X64-NEXT:    vaddpd {{.*}}(%rip), %ymm0, %ymm0
     17 ; X64-NEXT:    retq
     18  %1 = load <2 x double>, <2 x double> *%p
     19  %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
     20  %3 = fadd <4 x double> %2, <double 1.0, double 2.0, double 3.0, double 4.0>
     21  ret <4 x double> %3
     22 }
     23 
     24 define <4 x i64> @test_broadcast_2i64_4i64(<2 x i64> *%p) nounwind {
     25 ; X32-LABEL: test_broadcast_2i64_4i64:
     26 ; X32:       # %bb.0:
     27 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     28 ; X32-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
     29 ; X32-NEXT:    vpaddq {{\.LCPI.*}}, %ymm0, %ymm0
     30 ; X32-NEXT:    retl
     31 ;
     32 ; X64-LABEL: test_broadcast_2i64_4i64:
     33 ; X64:       # %bb.0:
     34 ; X64-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
     35 ; X64-NEXT:    vpaddq {{.*}}(%rip), %ymm0, %ymm0
     36 ; X64-NEXT:    retq
     37  %1 = load <2 x i64>, <2 x i64> *%p
     38  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
     39  %3 = add <4 x i64> %2, <i64 1, i64 2, i64 3, i64 4>
     40  ret <4 x i64> %3
     41 }
     42 
     43 define <8 x float> @test_broadcast_4f32_8f32(<4 x float> *%p) nounwind {
     44 ; X32-LABEL: test_broadcast_4f32_8f32:
     45 ; X32:       # %bb.0:
     46 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     47 ; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
     48 ; X32-NEXT:    vaddps {{\.LCPI.*}}, %ymm0, %ymm0
     49 ; X32-NEXT:    retl
     50 ;
     51 ; X64-LABEL: test_broadcast_4f32_8f32:
     52 ; X64:       # %bb.0:
     53 ; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
     54 ; X64-NEXT:    vaddps {{.*}}(%rip), %ymm0, %ymm0
     55 ; X64-NEXT:    retq
     56  %1 = load <4 x float>, <4 x float> *%p
     57  %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
     58  %3 = fadd <8 x float> %2, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>
     59  ret <8 x float> %3
     60 }
     61 
     62 define <8 x i32> @test_broadcast_4i32_8i32(<4 x i32> *%p) nounwind {
     63 ; X32-LABEL: test_broadcast_4i32_8i32:
     64 ; X32:       # %bb.0:
     65 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     66 ; X32-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
     67 ; X32-NEXT:    vpaddd {{\.LCPI.*}}, %ymm0, %ymm0
     68 ; X32-NEXT:    retl
     69 ;
     70 ; X64-LABEL: test_broadcast_4i32_8i32:
     71 ; X64:       # %bb.0:
     72 ; X64-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
     73 ; X64-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
     74 ; X64-NEXT:    retq
     75  %1 = load <4 x i32>, <4 x i32> *%p
     76  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
     77  %3 = add <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
     78  ret <8 x i32> %3
     79 }
     80 
     81 define <16 x i16> @test_broadcast_8i16_16i16(<8 x i16> *%p) nounwind {
     82 ; X32-LABEL: test_broadcast_8i16_16i16:
     83 ; X32:       # %bb.0:
     84 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     85 ; X32-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
     86 ; X32-NEXT:    vpaddw {{\.LCPI.*}}, %ymm0, %ymm0
     87 ; X32-NEXT:    retl
     88 ;
     89 ; X64-LABEL: test_broadcast_8i16_16i16:
     90 ; X64:       # %bb.0:
     91 ; X64-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
     92 ; X64-NEXT:    vpaddw {{.*}}(%rip), %ymm0, %ymm0
     93 ; X64-NEXT:    retq
     94  %1 = load <8 x i16>, <8 x i16> *%p
     95  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     96  %3  = add <16 x i16> %2, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16>
     97  ret <16 x i16> %3
     98 }
     99 
    100 define <32 x i8> @test_broadcast_16i8_32i8(<16 x i8> *%p) nounwind {
    101 ; X32-LABEL: test_broadcast_16i8_32i8:
    102 ; X32:       # %bb.0:
    103 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    104 ; X32-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
    105 ; X32-NEXT:    vpaddb {{\.LCPI.*}}, %ymm0, %ymm0
    106 ; X32-NEXT:    retl
    107 ;
    108 ; X64-LABEL: test_broadcast_16i8_32i8:
    109 ; X64:       # %bb.0:
    110 ; X64-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
    111 ; X64-NEXT:    vpaddb {{.*}}(%rip), %ymm0, %ymm0
    112 ; X64-NEXT:    retq
    113  %1 = load <16 x i8>, <16 x i8> *%p
    114  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    115  %3 = add <32 x i8> %2, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32>
    116  ret <32 x i8> %3
    117 }
    118 
    119 define <4 x double> @test_broadcast_2f64_4f64_reuse(<2 x double>* %p0, <2 x double>* %p1) {
    120 ; X32-LABEL: test_broadcast_2f64_4f64_reuse:
    121 ; X32:       # %bb.0:
    122 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    123 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    124 ; X32-NEXT:    vmovapd (%ecx), %xmm1
    125 ; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
    126 ; X32-NEXT:    vaddpd {{\.LCPI.*}}, %ymm0, %ymm0
    127 ; X32-NEXT:    vmovapd %xmm1, (%eax)
    128 ; X32-NEXT:    retl
    129 ;
    130 ; X64-LABEL: test_broadcast_2f64_4f64_reuse:
    131 ; X64:       # %bb.0:
    132 ; X64-NEXT:    vmovapd (%rdi), %xmm1
    133 ; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
    134 ; X64-NEXT:    vaddpd {{.*}}(%rip), %ymm0, %ymm0
    135 ; X64-NEXT:    vmovapd %xmm1, (%rsi)
    136 ; X64-NEXT:    retq
    137  %1 = load <2 x double>, <2 x double>* %p0
    138  %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
    139  %3 = fadd <4 x double> %2, <double 1.0, double 2.0, double 3.0, double 4.0>
    140  store <2 x double> %1, <2 x double>* %p1
    141  ret <4 x double> %3
    142 }
    143 
    144 define <4 x i64> @test_broadcast_2i64_4i64_reuse(<2 x i64>* %p0, <2 x i64>* %p1) {
    145 ; X32-LABEL: test_broadcast_2i64_4i64_reuse:
    146 ; X32:       # %bb.0:
    147 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    148 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    149 ; X32-NEXT:    vmovdqa (%ecx), %xmm1
    150 ; X32-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm0
    151 ; X32-NEXT:    vpaddq {{\.LCPI.*}}, %ymm0, %ymm0
    152 ; X32-NEXT:    vmovdqa %xmm1, (%eax)
    153 ; X32-NEXT:    retl
    154 ;
    155 ; X64-LABEL: test_broadcast_2i64_4i64_reuse:
    156 ; X64:       # %bb.0:
    157 ; X64-NEXT:    vmovdqa (%rdi), %xmm1
    158 ; X64-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm0
    159 ; X64-NEXT:    vpaddq {{.*}}(%rip), %ymm0, %ymm0
    160 ; X64-NEXT:    vmovdqa %xmm1, (%rsi)
    161 ; X64-NEXT:    retq
    162  %1 = load <2 x i64>, <2 x i64>* %p0
    163  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
    164  %3 = add <4 x i64> %2, <i64 1, i64 2, i64 3, i64 4>
    165  store <2 x i64> %1, <2 x i64>* %p1
    166  ret <4 x i64> %3
    167 }
    168 
    169 define <8 x float> @test_broadcast_4f32_8f32_reuse(<4 x float>* %p0, <4 x float>* %p1) {
    170 ; X32-LABEL: test_broadcast_4f32_8f32_reuse:
    171 ; X32:       # %bb.0:
    172 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    173 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    174 ; X32-NEXT:    vmovaps (%ecx), %xmm1
    175 ; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
    176 ; X32-NEXT:    vaddps {{\.LCPI.*}}, %ymm0, %ymm0
    177 ; X32-NEXT:    vmovaps %xmm1, (%eax)
    178 ; X32-NEXT:    retl
    179 ;
    180 ; X64-LABEL: test_broadcast_4f32_8f32_reuse:
    181 ; X64:       # %bb.0:
    182 ; X64-NEXT:    vmovaps (%rdi), %xmm1
    183 ; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm0
    184 ; X64-NEXT:    vaddps {{.*}}(%rip), %ymm0, %ymm0
    185 ; X64-NEXT:    vmovaps %xmm1, (%rsi)
    186 ; X64-NEXT:    retq
    187  %1 = load <4 x float>, <4 x float>* %p0
    188  %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    189  %3 = fadd <8 x float> %2, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>
    190  store <4 x float> %1, <4 x float>* %p1
    191  ret <8 x float> %3
    192 }
    193 
    194 define <8 x i32> @test_broadcast_4i32_8i32_reuse(<4 x i32>* %p0, <4 x i32>* %p1) {
    195 ; X32-LABEL: test_broadcast_4i32_8i32_reuse:
    196 ; X32:       # %bb.0:
    197 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    198 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    199 ; X32-NEXT:    vmovdqa (%ecx), %xmm1
    200 ; X32-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm0
    201 ; X32-NEXT:    vpaddd {{\.LCPI.*}}, %ymm0, %ymm0
    202 ; X32-NEXT:    vmovdqa %xmm1, (%eax)
    203 ; X32-NEXT:    retl
    204 ;
    205 ; X64-LABEL: test_broadcast_4i32_8i32_reuse:
    206 ; X64:       # %bb.0:
    207 ; X64-NEXT:    vmovdqa (%rdi), %xmm1
    208 ; X64-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm0
    209 ; X64-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
    210 ; X64-NEXT:    vmovdqa %xmm1, (%rsi)
    211 ; X64-NEXT:    retq
    212  %1 = load <4 x i32>, <4 x i32>* %p0
    213  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    214  %3 = add <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
    215  store <4 x i32> %1, <4 x i32>* %p1
    216  ret <8 x i32> %3
    217 }
    218 
    219 define <16 x i16> @test_broadcast_8i16_16i16_reuse(<8 x i16> *%p0, <8 x i16> *%p1) nounwind {
    220 ; X32-LABEL: test_broadcast_8i16_16i16_reuse:
    221 ; X32:       # %bb.0:
    222 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    223 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    224 ; X32-NEXT:    vmovdqa (%ecx), %xmm1
    225 ; X32-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm0
    226 ; X32-NEXT:    vpaddw {{\.LCPI.*}}, %ymm0, %ymm0
    227 ; X32-NEXT:    vmovdqa %xmm1, (%eax)
    228 ; X32-NEXT:    retl
    229 ;
    230 ; X64-LABEL: test_broadcast_8i16_16i16_reuse:
    231 ; X64:       # %bb.0:
    232 ; X64-NEXT:    vmovdqa (%rdi), %xmm1
    233 ; X64-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm0
    234 ; X64-NEXT:    vpaddw {{.*}}(%rip), %ymm0, %ymm0
    235 ; X64-NEXT:    vmovdqa %xmm1, (%rsi)
    236 ; X64-NEXT:    retq
    237  %1 = load <8 x i16>, <8 x i16> *%p0
    238  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    239  %3  = add <16 x i16> %2, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16>
    240  store <8 x i16> %1, <8 x i16>* %p1
    241  ret <16 x i16> %3
    242 }
    243 
    244 define <32 x i8> @test_broadcast_16i8_32i8_reuse(<16 x i8> *%p0, <16 x i8> *%p1) nounwind {
    245 ; X32-LABEL: test_broadcast_16i8_32i8_reuse:
    246 ; X32:       # %bb.0:
    247 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    248 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    249 ; X32-NEXT:    vmovdqa (%ecx), %xmm1
    250 ; X32-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm0
    251 ; X32-NEXT:    vpaddb {{\.LCPI.*}}, %ymm0, %ymm0
    252 ; X32-NEXT:    vmovdqa %xmm1, (%eax)
    253 ; X32-NEXT:    retl
    254 ;
    255 ; X64-LABEL: test_broadcast_16i8_32i8_reuse:
    256 ; X64:       # %bb.0:
    257 ; X64-NEXT:    vmovdqa (%rdi), %xmm1
    258 ; X64-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm0
    259 ; X64-NEXT:    vpaddb {{.*}}(%rip), %ymm0, %ymm0
    260 ; X64-NEXT:    vmovdqa %xmm1, (%rsi)
    261 ; X64-NEXT:    retq
    262  %1 = load <16 x i8>, <16 x i8> *%p0
    263  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    264  %3 = add <32 x i8> %2, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32>
    265  store <16 x i8> %1, <16 x i8>* %p1
    266  ret <32 x i8> %3
    267 }
    268 
    269 define <8 x i32> @PR29088(<4 x i32>* %p0, <8 x float>* %p1) {
    270 ; X32-LABEL: PR29088:
    271 ; X32:       # %bb.0:
    272 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    273 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    274 ; X32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    275 ; X32-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
    276 ; X32-NEXT:    vmovaps %ymm1, (%eax)
    277 ; X32-NEXT:    retl
    278 ;
    279 ; X64-LABEL: PR29088:
    280 ; X64:       # %bb.0:
    281 ; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    282 ; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
    283 ; X64-NEXT:    vmovaps %ymm1, (%rsi)
    284 ; X64-NEXT:    retq
    285   %ld = load <4 x i32>, <4 x i32>* %p0
    286   store <8 x float> zeroinitializer, <8 x float>* %p1
    287   %shuf = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    288   ret <8 x i32> %shuf
    289 }
    290