Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
      4 
      5 ; Check constant loads of every 128-bit and 256-bit vector type
      6 ; for size optimization using splat ops available with AVX and AVX2.
      7 
      8 ; There is no AVX broadcast from double to 128-bit vector because movddup has been around since SSE3 (grrr).
      9 define <2 x double> @splat_v2f64(<2 x double> %x) #0 {
     10 ; CHECK-LABEL: splat_v2f64:
     11 ; CHECK:       # BB#0:
     12 ; CHECK-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
     13 ; CHECK-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
     14 ; CHECK-NEXT:    retq
     15   %add = fadd <2 x double> %x, <double 1.0, double 1.0>
     16   ret <2 x double> %add
     17 }
     18 
     19 define <4 x double> @splat_v4f64(<4 x double> %x) #1 {
     20 ; CHECK-LABEL: splat_v4f64:
     21 ; CHECK:       # BB#0:
     22 ; CHECK-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm1
     23 ; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
     24 ; CHECK-NEXT:    retq
     25   %add = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
     26   ret <4 x double> %add
     27 }
     28 
     29 define <4 x float> @splat_v4f32(<4 x float> %x) #0 {
     30 ; CHECK-LABEL: splat_v4f32:
     31 ; CHECK:       # BB#0:
     32 ; CHECK-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
     33 ; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0
     34 ; CHECK-NEXT:    retq
     35   %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
     36   ret <4 x float> %add
     37 }
     38 
     39 define <8 x float> @splat_v8f32(<8 x float> %x) #1 {
     40 ; CHECK-LABEL: splat_v8f32:
     41 ; CHECK:       # BB#0:
     42 ; CHECK-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
     43 ; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0
     44 ; CHECK-NEXT:    retq
     45   %add = fadd <8 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
     46   ret <8 x float> %add
     47 }
     48 
     49 ; AVX can't do integer splats, so fake it: use vmovddup to splat 64-bit value.
     50 ; We also generate vmovddup for AVX2 because it's one byte smaller than vpbroadcastq.
     51 define <2 x i64> @splat_v2i64(<2 x i64> %x) #1 {
     52 ; CHECK-LABEL: splat_v2i64:
     53 ; CHECK:       # BB#0:
     54 ; CHECK-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
     55 ; CHECK-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
     56 ; CHECK-NEXT:    retq
     57   %add = add <2 x i64> %x, <i64 1, i64 1>
     58   ret <2 x i64> %add
     59 }
     60 
     61 ; AVX can't do 256-bit integer ops, so we split this into two 128-bit vectors,
     62 ; and then we fake it: use vmovddup to splat 64-bit value.
     63 define <4 x i64> @splat_v4i64(<4 x i64> %x) #0 {
     64 ; AVX-LABEL: splat_v4i64:
     65 ; AVX:       # BB#0:
     66 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
     67 ; AVX-NEXT:    vmovddup {{.*#+}} xmm2 = mem[0,0]
     68 ; AVX-NEXT:    vpaddq %xmm2, %xmm1, %xmm1
     69 ; AVX-NEXT:    vpaddq %xmm2, %xmm0, %xmm0
     70 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
     71 ; AVX-NEXT:    retq
     72 ;
     73 ; AVX2-LABEL: splat_v4i64:
     74 ; AVX2:       # BB#0:
     75 ; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm1
     76 ; AVX2-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
     77 ; AVX2-NEXT:    retq
     78   %add = add <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
     79   ret <4 x i64> %add
     80 }
     81 
     82 ; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
     83 define <4 x i32> @splat_v4i32(<4 x i32> %x) #1 {
     84 ; AVX-LABEL: splat_v4i32:
     85 ; AVX:       # BB#0:
     86 ; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
     87 ; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
     88 ; AVX-NEXT:    retq
     89 ;
     90 ; AVX2-LABEL: splat_v4i32:
     91 ; AVX2:       # BB#0:
     92 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
     93 ; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
     94 ; AVX2-NEXT:    retq
     95   %add = add <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
     96   ret <4 x i32> %add
     97 }
     98 
     99 ; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
    100 define <8 x i32> @splat_v8i32(<8 x i32> %x) #0 {
    101 ; AVX-LABEL: splat_v8i32:
    102 ; AVX:       # BB#0:
    103 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
    104 ; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
    105 ; AVX-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
    106 ; AVX-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
    107 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    108 ; AVX-NEXT:    retq
    109 ;
    110 ; AVX2-LABEL: splat_v8i32:
    111 ; AVX2:       # BB#0:
    112 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
    113 ; AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
    114 ; AVX2-NEXT:    retq
    115   %add = add <8 x i32> %x, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    116   ret <8 x i32> %add
    117 }
    118 
    119 ; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
    120 define <8 x i16> @splat_v8i16(<8 x i16> %x) #1 {
    121 ; AVX-LABEL: splat_v8i16:
    122 ; AVX:       # BB#0:
    123 ; AVX-NEXT:    vpaddw {{.*}}(%rip), %xmm0, %xmm0
    124 ; AVX-NEXT:    retq
    125 ;
    126 ; AVX2-LABEL: splat_v8i16:
    127 ; AVX2:       # BB#0:
    128 ; AVX2-NEXT:    vpbroadcastw {{.*}}(%rip), %xmm1
    129 ; AVX2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
    130 ; AVX2-NEXT:    retq
    131   %add = add <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    132   ret <8 x i16> %add
    133 }
    134 
    135 ; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
    136 define <16 x i16> @splat_v16i16(<16 x i16> %x) #0 {
    137 ; AVX-LABEL: splat_v16i16:
    138 ; AVX:       # BB#0:
    139 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
    140 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
    141 ; AVX-NEXT:    vpaddw %xmm2, %xmm1, %xmm1
    142 ; AVX-NEXT:    vpaddw %xmm2, %xmm0, %xmm0
    143 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    144 ; AVX-NEXT:    retq
    145 ;
    146 ; AVX2-LABEL: splat_v16i16:
    147 ; AVX2:       # BB#0:
    148 ; AVX2-NEXT:    vpbroadcastw {{.*}}(%rip), %ymm1
    149 ; AVX2-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
    150 ; AVX2-NEXT:    retq
    151   %add = add <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    152   ret <16 x i16> %add
    153 }
    154 
    155 ; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
    156 define <16 x i8> @splat_v16i8(<16 x i8> %x) #1 {
    157 ; AVX-LABEL: splat_v16i8:
    158 ; AVX:       # BB#0:
    159 ; AVX-NEXT:    vpaddb {{.*}}(%rip), %xmm0, %xmm0
    160 ; AVX-NEXT:    retq
    161 ;
    162 ; AVX2-LABEL: splat_v16i8:
    163 ; AVX2:       # BB#0:
    164 ; AVX2-NEXT:    vpbroadcastb {{.*}}(%rip), %xmm1
    165 ; AVX2-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
    166 ; AVX2-NEXT:    retq
    167   %add = add <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    168   ret <16 x i8> %add
    169 }
    170 
    171 ; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
    172 define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 {
    173 ; AVX-LABEL: splat_v32i8:
    174 ; AVX:       # BB#0:
    175 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
    176 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
    177 ; AVX-NEXT:    vpaddb %xmm2, %xmm1, %xmm1
    178 ; AVX-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
    179 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    180 ; AVX-NEXT:    retq
    181 ;
    182 ; AVX2-LABEL: splat_v32i8:
    183 ; AVX2:       # BB#0:
    184 ; AVX2-NEXT:    vpbroadcastb {{.*}}(%rip), %ymm1
    185 ; AVX2-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
    186 ; AVX2-NEXT:    retq
    187   %add = add <32 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    188   ret <32 x i8> %add
    189 }
    190 
    191 ; PR23259: Verify that ISel doesn't crash with a 'fatal error in backend'
    192 ; due to a missing AVX pattern to select a v2i64 X86ISD::BROADCAST of a
    193 ; loadi64 with multiple uses.
    194 
    195 @A = common global <3 x i64> zeroinitializer, align 32
    196 
    197 define <8 x i64> @pr23259() #1 {
    198 entry:
    199   %0 = load <4 x i64>, <4 x i64>* bitcast (<3 x i64>* @A to <4 x i64>*), align 32
    200   %1 = shufflevector <4 x i64> %0, <4 x i64> undef, <3 x i32> <i32 undef, i32 undef, i32 2>
    201   %shuffle = shufflevector <3 x i64> <i64 1, i64 undef, i64 undef>, <3 x i64> %1, <8 x i32> <i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    202   ret <8 x i64> %shuffle
    203 }
    204 
    205 attributes #0 = { optsize }
    206 attributes #1 = { minsize }
    207