Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck %s --check-prefix=X64-AVX512 --check-prefix=X64-AVX512VL
      3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefix=X64-AVX512 --check-prefix=X64-AVX512BWVL
      4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64-AVX512 --check-prefix=X64-AVX512DQVL
      5 
      6 ;
      7 ; 128-bit Subvector Broadcast to 256-bit
      8 ;
      9 
     10 define <4 x double> @test_broadcast_2f64_4f64(<2 x double> *%p) nounwind {
     11 ; X64-AVX512-LABEL: test_broadcast_2f64_4f64:
     12 ; X64-AVX512:       ## %bb.0:
     13 ; X64-AVX512-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
     14 ; X64-AVX512-NEXT:    vaddpd {{.*}}(%rip), %ymm0, %ymm0
     15 ; X64-AVX512-NEXT:    retq
     16  %1 = load <2 x double>, <2 x double> *%p
     17  %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
     18  %3 = fadd <4 x double> %2, <double 1.0, double 2.0, double 3.0, double 4.0>
     19  ret <4 x double> %3
     20 }
     21 
     22 define <4 x i64> @test_broadcast_2i64_4i64(<2 x i64> *%p) nounwind {
     23 ; X64-AVX512-LABEL: test_broadcast_2i64_4i64:
     24 ; X64-AVX512:       ## %bb.0:
     25 ; X64-AVX512-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
     26 ; X64-AVX512-NEXT:    vpaddq {{.*}}(%rip), %ymm0, %ymm0
     27 ; X64-AVX512-NEXT:    retq
     28  %1 = load <2 x i64>, <2 x i64> *%p
     29  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
     30  %3 = add <4 x i64> %2, <i64 1, i64 2, i64 3, i64 4>
     31  ret <4 x i64> %3
     32 }
     33 
     34 define <8 x float> @test_broadcast_4f32_8f32(<4 x float> *%p) nounwind {
     35 ; X64-AVX512-LABEL: test_broadcast_4f32_8f32:
     36 ; X64-AVX512:       ## %bb.0:
     37 ; X64-AVX512-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
     38 ; X64-AVX512-NEXT:    vaddps {{.*}}(%rip), %ymm0, %ymm0
     39 ; X64-AVX512-NEXT:    retq
     40  %1 = load <4 x float>, <4 x float> *%p
     41  %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
     42  %3 = fadd <8 x float> %2, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>
     43  ret <8 x float> %3
     44 }
     45 
     46 define <8 x i32> @test_broadcast_4i32_8i32(<4 x i32> *%p) nounwind {
     47 ; X64-AVX512-LABEL: test_broadcast_4i32_8i32:
     48 ; X64-AVX512:       ## %bb.0:
     49 ; X64-AVX512-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
     50 ; X64-AVX512-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
     51 ; X64-AVX512-NEXT:    retq
     52  %1 = load <4 x i32>, <4 x i32> *%p
     53  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
     54  %3 = add <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
     55  ret <8 x i32> %3
     56 }
     57 
     58 define <16 x i16> @test_broadcast_8i16_16i16(<8 x i16> *%p) nounwind {
     59 ; X64-AVX512-LABEL: test_broadcast_8i16_16i16:
     60 ; X64-AVX512:       ## %bb.0:
     61 ; X64-AVX512-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
     62 ; X64-AVX512-NEXT:    vpaddw {{.*}}(%rip), %ymm0, %ymm0
     63 ; X64-AVX512-NEXT:    retq
     64  %1 = load <8 x i16>, <8 x i16> *%p
     65  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     66  %3  = add <16 x i16> %2, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16>
     67  ret <16 x i16> %3
     68 }
     69 
     70 define <32 x i8> @test_broadcast_16i8_32i8(<16 x i8> *%p) nounwind {
     71 ; X64-AVX512-LABEL: test_broadcast_16i8_32i8:
     72 ; X64-AVX512:       ## %bb.0:
     73 ; X64-AVX512-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
     74 ; X64-AVX512-NEXT:    vpaddb {{.*}}(%rip), %ymm0, %ymm0
     75 ; X64-AVX512-NEXT:    retq
     76  %1 = load <16 x i8>, <16 x i8> *%p
     77  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
     78  %3 = add <32 x i8> %2, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32>
     79  ret <32 x i8> %3
     80 }
     81 
     82 ;
     83 ; 128-bit Subvector Broadcast to 512-bit
     84 ;
     85 
     86 define <8 x double> @test_broadcast_2f64_8f64(<2 x double> *%p) nounwind {
     87 ; X64-AVX512-LABEL: test_broadcast_2f64_8f64:
     88 ; X64-AVX512:       ## %bb.0:
     89 ; X64-AVX512-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
     90 ; X64-AVX512-NEXT:    vaddpd {{.*}}(%rip), %zmm0, %zmm0
     91 ; X64-AVX512-NEXT:    retq
     92  %1 = load <2 x double>, <2 x double> *%p
     93  %2 = shufflevector <2 x double> %1, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
     94  %3 = fadd <8 x double> %2, <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>
     95  ret <8 x double> %3
     96 }
     97 
     98 define <8 x i64> @test_broadcast_2i64_8i64(<2 x i64> *%p) nounwind {
     99 ; X64-AVX512-LABEL: test_broadcast_2i64_8i64:
    100 ; X64-AVX512:       ## %bb.0:
    101 ; X64-AVX512-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
    102 ; X64-AVX512-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
    103 ; X64-AVX512-NEXT:    retq
    104  %1 = load <2 x i64>, <2 x i64> *%p
    105  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
    106  %3 = add <8 x i64> %2, <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>
    107  ret <8 x i64> %3
    108 }
    109 
    110 define <16 x float> @test_broadcast_4f32_16f32(<4 x float> *%p) nounwind {
    111 ; X64-AVX512-LABEL: test_broadcast_4f32_16f32:
    112 ; X64-AVX512:       ## %bb.0:
    113 ; X64-AVX512-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
    114 ; X64-AVX512-NEXT:    vaddps {{.*}}(%rip), %zmm0, %zmm0
    115 ; X64-AVX512-NEXT:    retq
    116  %1 = load <4 x float>, <4 x float> *%p
    117  %2 = shufflevector <4 x float> %1, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    118  %3 = fadd <16 x float> %2, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>
    119  ret <16 x float> %3
    120 }
    121 
    122 define <16 x i32> @test_broadcast_4i32_16i32(<4 x i32> *%p) nounwind {
    123 ; X64-AVX512-LABEL: test_broadcast_4i32_16i32:
    124 ; X64-AVX512:       ## %bb.0:
    125 ; X64-AVX512-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
    126 ; X64-AVX512-NEXT:    vpaddd {{.*}}(%rip), %zmm0, %zmm0
    127 ; X64-AVX512-NEXT:    retq
    128  %1 = load <4 x i32>, <4 x i32> *%p
    129  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    130  %3 = add <16 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
    131  ret <16 x i32> %3
    132 }
    133 
    134 define <32 x i16> @test_broadcast_8i16_32i16(<8 x i16> *%p) nounwind {
    135 ; X64-AVX512VL-LABEL: test_broadcast_8i16_32i16:
    136 ; X64-AVX512VL:       ## %bb.0:
    137 ; X64-AVX512VL-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1]
    138 ; X64-AVX512VL-NEXT:    vpaddw {{.*}}(%rip), %ymm1, %ymm0
    139 ; X64-AVX512VL-NEXT:    vpaddw {{.*}}(%rip), %ymm1, %ymm1
    140 ; X64-AVX512VL-NEXT:    retq
    141 ;
    142 ; X64-AVX512BWVL-LABEL: test_broadcast_8i16_32i16:
    143 ; X64-AVX512BWVL:       ## %bb.0:
    144 ; X64-AVX512BWVL-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
    145 ; X64-AVX512BWVL-NEXT:    vpaddw {{.*}}(%rip), %zmm0, %zmm0
    146 ; X64-AVX512BWVL-NEXT:    retq
    147 ;
    148 ; X64-AVX512DQVL-LABEL: test_broadcast_8i16_32i16:
    149 ; X64-AVX512DQVL:       ## %bb.0:
    150 ; X64-AVX512DQVL-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1]
    151 ; X64-AVX512DQVL-NEXT:    vpaddw {{.*}}(%rip), %ymm1, %ymm0
    152 ; X64-AVX512DQVL-NEXT:    vpaddw {{.*}}(%rip), %ymm1, %ymm1
    153 ; X64-AVX512DQVL-NEXT:    retq
    154  %1 = load <8 x i16>, <8 x i16> *%p
    155  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    156  %3  = add <32 x i16> %2, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31, i16 32>
    157  ret <32 x i16> %3
    158 }
    159 
    160 define <64 x i8> @test_broadcast_16i8_64i8(<16 x i8> *%p) nounwind {
    161 ; X64-AVX512VL-LABEL: test_broadcast_16i8_64i8:
    162 ; X64-AVX512VL:       ## %bb.0:
    163 ; X64-AVX512VL-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1]
    164 ; X64-AVX512VL-NEXT:    vpaddb {{.*}}(%rip), %ymm1, %ymm0
    165 ; X64-AVX512VL-NEXT:    vpaddb {{.*}}(%rip), %ymm1, %ymm1
    166 ; X64-AVX512VL-NEXT:    retq
    167 ;
    168 ; X64-AVX512BWVL-LABEL: test_broadcast_16i8_64i8:
    169 ; X64-AVX512BWVL:       ## %bb.0:
    170 ; X64-AVX512BWVL-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
    171 ; X64-AVX512BWVL-NEXT:    vpaddb {{.*}}(%rip), %zmm0, %zmm0
    172 ; X64-AVX512BWVL-NEXT:    retq
    173 ;
    174 ; X64-AVX512DQVL-LABEL: test_broadcast_16i8_64i8:
    175 ; X64-AVX512DQVL:       ## %bb.0:
    176 ; X64-AVX512DQVL-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1]
    177 ; X64-AVX512DQVL-NEXT:    vpaddb {{.*}}(%rip), %ymm1, %ymm0
    178 ; X64-AVX512DQVL-NEXT:    vpaddb {{.*}}(%rip), %ymm1, %ymm1
    179 ; X64-AVX512DQVL-NEXT:    retq
    180  %1 = load <16 x i8>, <16 x i8> *%p
    181  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    182  %3 = add <64 x i8> %2, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63, i8 64>
    183  ret <64 x i8> %3
    184 }
    185 
    186 define <8 x i32> @PR29088(<4 x i32>* %p0, <8 x float>* %p1) {
    187 ; X64-AVX512-LABEL: PR29088:
    188 ; X64-AVX512:       ## %bb.0:
    189 ; X64-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    190 ; X64-AVX512-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
    191 ; X64-AVX512-NEXT:    vmovaps %ymm1, (%rsi)
    192 ; X64-AVX512-NEXT:    retq
    193   %ld = load <4 x i32>, <4 x i32>* %p0
    194   store <8 x float> zeroinitializer, <8 x float>* %p1
    195   %shuf = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    196   ret <8 x i32> %shuf
    197 }
    198