1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s 2 3 ; CHECK: vbroadcastsd (% 4 define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp { 5 entry: 6 %q = load i64, i64* %ptr, align 8 7 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 8 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 9 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 10 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 11 ret <4 x i64> %vecinit6.i 12 } 13 14 ; CHECK: vbroadcastss (% 15 define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp { 16 entry: 17 %q = load i32, i32* %ptr, align 4 18 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 19 %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1 20 %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2 21 %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3 22 ret <8 x i32> %vecinit6.i 23 } 24 25 ; CHECK: vbroadcastsd (% 26 define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp { 27 entry: 28 %q = load double, double* %ptr, align 8 29 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 30 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 31 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 32 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 33 ret <4 x double> %vecinit6.i 34 } 35 36 ; CHECK: vbroadcastss (% 37 define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp { 38 entry: 39 %q = load float, float* %ptr, align 4 40 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 41 %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1 42 %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2 43 %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3 44 ret <8 x float> %vecinit6.i 45 } 46 47 ;;;; 128-bit versions 48 49 ; CHECK: vbroadcastss (% 50 define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp { 51 entry: 52 %q = load float, float* %ptr, align 4 53 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 54 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 55 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 56 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 57 ret <4 x float> %vecinit6.i 58 } 59 60 61 ; CHECK: _e2 62 ; CHECK-NOT: vbroadcastss 63 ; CHECK: ret 64 define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp { 65 %vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0 66 %vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1 67 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2 68 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3 69 ret <4 x float> %vecinit6.i 70 } 71 72 73 ; CHECK: vbroadcastss (% 74 define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp { 75 entry: 76 %q = load i32, i32* %ptr, align 4 77 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 78 %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %q, i32 1 79 %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %q, i32 2 80 %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %q, i32 3 81 ret <4 x i32> %vecinit6.i 82 } 83 84 ; Unsupported vbroadcasts 85 86 ; CHECK: _G 87 ; CHECK-NOT: broadcast (% 88 ; CHECK: ret 89 define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp { 90 entry: 91 %q = load i64, i64* %ptr, align 8 92 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 93 %vecinit2.i = insertelement <2 x i64> %vecinit.i, i64 %q, i32 1 94 ret <2 x i64> %vecinit2.i 95 } 96 97 ; CHECK: _H 98 ; CHECK-NOT: broadcast 99 ; CHECK: ret 100 define <4 x i32> @H(<4 x i32> %a) { 101 %x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 102 ret <4 x i32> %x 103 } 104 105 ; CHECK: _I 106 ; CHECK-NOT: broadcast (% 107 ; CHECK: ret 108 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp { 109 entry: 110 %q = load double, double* %ptr, align 4 111 %vecinit.i = insertelement <2 x double> undef, double %q, i32 0 112 %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1 113 ret <2 x double> %vecinit2.i 114 } 115 116 ; CHECK: _RR 117 ; CHECK: vbroadcastss (% 118 ; CHECK: ret 119 define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp { 120 entry: 121 %q = load float, float* %ptr, align 4 122 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 123 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 124 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 125 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 126 ; force a chain 127 %j = load i32, i32* %k, align 4 128 store i32 %j, i32* undef 129 ret <4 x float> %vecinit6.i 130 } 131 132 133 ; CHECK: _RR2 134 ; CHECK: vbroadcastss (% 135 ; CHECK: ret 136 define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp { 137 entry: 138 %q = load float, float* %ptr, align 4 139 %v = insertelement <4 x float> undef, float %q, i32 0 140 %t = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 141 ret <4 x float> %t 142 } 143 144 145 ; These tests check that a vbroadcast instruction is used when we have a splat 146 ; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs 147 ; (via the insertelements). 148 149 ; CHECK-LABEL: splat_concat1 150 ; CHECK-NOT: vinsertf128 151 ; CHECK: vbroadcastss (% 152 ; CHECK-NEXT: ret 153 define <8 x float> @splat_concat1(float* %p) { 154 %1 = load float, float* %p, align 4 155 %2 = insertelement <4 x float> undef, float %1, i32 0 156 %3 = insertelement <4 x float> %2, float %1, i32 1 157 %4 = insertelement <4 x float> %3, float %1, i32 2 158 %5 = insertelement <4 x float> %4, float %1, i32 3 159 %6 = shufflevector <4 x float> %5, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 160 ret <8 x float> %6 161 } 162 163 ; CHECK-LABEL: splat_concat2 164 ; CHECK-NOT: vinsertf128 165 ; CHECK: vbroadcastss (% 166 ; CHECK-NEXT: ret 167 define <8 x float> @splat_concat2(float* %p) { 168 %1 = load float, float* %p, align 4 169 %2 = insertelement <4 x float> undef, float %1, i32 0 170 %3 = insertelement <4 x float> %2, float %1, i32 1 171 %4 = insertelement <4 x float> %3, float %1, i32 2 172 %5 = insertelement <4 x float> %4, float %1, i32 3 173 %6 = insertelement <4 x float> undef, float %1, i32 0 174 %7 = insertelement <4 x float> %6, float %1, i32 1 175 %8 = insertelement <4 x float> %7, float %1, i32 2 176 %9 = insertelement <4 x float> %8, float %1, i32 3 177 %10 = shufflevector <4 x float> %5, <4 x float> %9, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 178 ret <8 x float> %10 179 } 180 181 ; CHECK-LABEL: splat_concat3 182 ; CHECK-NOT: vinsertf128 183 ; CHECK: vbroadcastsd (% 184 ; CHECK-NEXT: ret 185 define <4 x double> @splat_concat3(double* %p) { 186 %1 = load double, double* %p, align 8 187 %2 = insertelement <2 x double> undef, double %1, i32 0 188 %3 = insertelement <2 x double> %2, double %1, i32 1 189 %4 = shufflevector <2 x double> %3, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 190 ret <4 x double> %4 191 } 192 193 ; CHECK-LABEL: splat_concat4 194 ; CHECK-NOT: vinsertf128 195 ; CHECK: vbroadcastsd (% 196 ; CHECK-NEXT: ret 197 define <4 x double> @splat_concat4(double* %p) { 198 %1 = load double, double* %p, align 8 199 %2 = insertelement <2 x double> undef, double %1, i32 0 200 %3 = insertelement <2 x double> %2, double %1, i32 1 201 %4 = insertelement <2 x double> undef, double %1, i32 0 202 %5 = insertelement <2 x double> %2, double %1, i32 1 203 %6 = shufflevector <2 x double> %3, <2 x double> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 204 ret <4 x double> %6 205 } 206 207