1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s 3 4 define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp { 5 ; CHECK-LABEL: A: 6 ; CHECK: ## BB#0: ## %entry 7 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 8 ; CHECK-NEXT: retq 9 entry: 10 %q = load i64, i64* %ptr, align 8 11 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 12 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 13 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 14 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 15 ret <4 x i64> %vecinit6.i 16 } 17 18 define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp { 19 ; CHECK-LABEL: B: 20 ; CHECK: ## BB#0: ## %entry 21 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 22 ; CHECK-NEXT: retq 23 entry: 24 %q = load i32, i32* %ptr, align 4 25 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 26 %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1 27 %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2 28 %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3 29 ret <8 x i32> %vecinit6.i 30 } 31 32 define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp { 33 ; CHECK-LABEL: C: 34 ; CHECK: ## BB#0: ## %entry 35 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 36 ; CHECK-NEXT: retq 37 entry: 38 %q = load double, double* %ptr, align 8 39 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 40 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 41 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 42 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 43 ret <4 x double> %vecinit6.i 44 } 45 46 define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp { 47 ; CHECK-LABEL: D: 48 ; CHECK: ## BB#0: ## %entry 49 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 50 ; CHECK-NEXT: retq 51 entry: 52 %q = load float, float* %ptr, align 4 53 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 54 %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1 55 %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2 56 %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3 57 ret <8 x float> %vecinit6.i 58 } 59 60 ;;;; 128-bit versions 61 62 define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp { 63 ; CHECK-LABEL: e: 64 ; CHECK: ## BB#0: ## %entry 65 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 66 ; CHECK-NEXT: retq 67 entry: 68 %q = load float, float* %ptr, align 4 69 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 70 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 71 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 72 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 73 ret <4 x float> %vecinit6.i 74 } 75 76 ; Don't broadcast constants on pre-AVX2 hardware. 77 define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp { 78 ; CHECK-LABEL: _e2: 79 ; CHECK: ## BB#0: ## %entry 80 ; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03] 81 ; CHECK-NEXT: retq 82 entry: 83 %vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0 84 %vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1 85 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2 86 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3 87 ret <4 x float> %vecinit6.i 88 } 89 90 91 define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp { 92 ; CHECK-LABEL: F: 93 ; CHECK: ## BB#0: ## %entry 94 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 95 ; CHECK-NEXT: retq 96 entry: 97 %q = load i32, i32* %ptr, align 4 98 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 99 %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %q, i32 1 100 %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %q, i32 2 101 %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %q, i32 3 102 ret <4 x i32> %vecinit6.i 103 } 104 105 ; FIXME: Pointer adjusted broadcasts 106 107 define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp { 108 ; CHECK-LABEL: load_splat_4i32_4i32_1111: 109 ; CHECK: ## BB#0: ## %entry 110 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,1,1,1] 111 ; CHECK-NEXT: retq 112 entry: 113 %ld = load <4 x i32>, <4 x i32>* %ptr 114 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 115 ret <4 x i32> %ret 116 } 117 118 define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp { 119 ; CHECK-LABEL: load_splat_8i32_4i32_33333333: 120 ; CHECK: ## BB#0: ## %entry 121 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3] 122 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 123 ; CHECK-NEXT: retq 124 entry: 125 %ld = load <4 x i32>, <4 x i32>* %ptr 126 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 127 ret <8 x i32> %ret 128 } 129 130 define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp { 131 ; CHECK-LABEL: load_splat_8i32_8i32_55555555: 132 ; CHECK: ## BB#0: ## %entry 133 ; CHECK-NEXT: vmovaps (%rdi), %ymm0 134 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 135 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 136 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 137 ; CHECK-NEXT: retq 138 entry: 139 %ld = load <8 x i32>, <8 x i32>* %ptr 140 %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 141 ret <8 x i32> %ret 142 } 143 144 define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp { 145 ; CHECK-LABEL: load_splat_4f32_4f32_1111: 146 ; CHECK: ## BB#0: ## %entry 147 ; CHECK-NEXT: vbroadcastss 4(%rdi), %xmm0 148 ; CHECK-NEXT: retq 149 entry: 150 %ld = load <4 x float>, <4 x float>* %ptr 151 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 152 ret <4 x float> %ret 153 } 154 155 define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp { 156 ; CHECK-LABEL: load_splat_8f32_4f32_33333333: 157 ; CHECK: ## BB#0: ## %entry 158 ; CHECK-NEXT: vbroadcastss 12(%rdi), %ymm0 159 ; CHECK-NEXT: retq 160 entry: 161 %ld = load <4 x float>, <4 x float>* %ptr 162 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 163 ret <8 x float> %ret 164 } 165 166 define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp { 167 ; CHECK-LABEL: load_splat_8f32_8f32_55555555: 168 ; CHECK: ## BB#0: ## %entry 169 ; CHECK-NEXT: vbroadcastss 20(%rdi), %ymm0 170 ; CHECK-NEXT: retq 171 entry: 172 %ld = load <8 x float>, <8 x float>* %ptr 173 %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 174 ret <8 x float> %ret 175 } 176 177 define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { 178 ; CHECK-LABEL: load_splat_2i64_2i64_1111: 179 ; CHECK: ## BB#0: ## %entry 180 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3] 181 ; CHECK-NEXT: retq 182 entry: 183 %ld = load <2 x i64>, <2 x i64>* %ptr 184 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 185 ret <2 x i64> %ret 186 } 187 188 define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { 189 ; CHECK-LABEL: load_splat_4i64_2i64_1111: 190 ; CHECK: ## BB#0: ## %entry 191 ; CHECK-NEXT: vmovaps (%rdi), %xmm0 192 ; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] 193 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 194 ; CHECK-NEXT: retq 195 entry: 196 %ld = load <2 x i64>, <2 x i64>* %ptr 197 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 198 ret <4 x i64> %ret 199 } 200 201 define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp { 202 ; CHECK-LABEL: load_splat_4i64_4i64_2222: 203 ; CHECK: ## BB#0: ## %entry 204 ; CHECK-NEXT: vmovapd (%rdi), %ymm0 205 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 206 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 207 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 208 ; CHECK-NEXT: retq 209 entry: 210 %ld = load <4 x i64>, <4 x i64>* %ptr 211 %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 212 ret <4 x i64> %ret 213 } 214 215 define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { 216 ; CHECK-LABEL: load_splat_2f64_2f64_1111: 217 ; CHECK: ## BB#0: ## %entry 218 ; CHECK-NEXT: vmovaps (%rdi), %xmm0 219 ; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] 220 ; CHECK-NEXT: retq 221 entry: 222 %ld = load <2 x double>, <2 x double>* %ptr 223 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1> 224 ret <2 x double> %ret 225 } 226 227 define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { 228 ; CHECK-LABEL: load_splat_4f64_2f64_1111: 229 ; CHECK: ## BB#0: ## %entry 230 ; CHECK-NEXT: vbroadcastsd 8(%rdi), %ymm0 231 ; CHECK-NEXT: retq 232 entry: 233 %ld = load <2 x double>, <2 x double>* %ptr 234 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 235 ret <4 x double> %ret 236 } 237 238 define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp { 239 ; CHECK-LABEL: load_splat_4f64_4f64_2222: 240 ; CHECK: ## BB#0: ## %entry 241 ; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0 242 ; CHECK-NEXT: retq 243 entry: 244 %ld = load <4 x double>, <4 x double>* %ptr 245 %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 246 ret <4 x double> %ret 247 } 248 249 ; Unsupported vbroadcasts 250 251 define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp { 252 ; CHECK-LABEL: G: 253 ; CHECK: ## BB#0: ## %entry 254 ; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 255 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 256 ; CHECK-NEXT: retq 257 entry: 258 %q = load i64, i64* %ptr, align 8 259 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 260 %vecinit2.i = insertelement <2 x i64> %vecinit.i, i64 %q, i32 1 261 ret <2 x i64> %vecinit2.i 262 } 263 264 define <4 x i32> @H(<4 x i32> %a) { 265 ; CHECK-LABEL: H: 266 ; CHECK: ## BB#0: ## %entry 267 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 268 ; CHECK-NEXT: retq 269 entry: 270 %x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 271 ret <4 x i32> %x 272 } 273 274 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp { 275 ; CHECK-LABEL: I: 276 ; CHECK: ## BB#0: ## %entry 277 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 278 ; CHECK-NEXT: retq 279 entry: 280 %q = load double, double* %ptr, align 4 281 %vecinit.i = insertelement <2 x double> undef, double %q, i32 0 282 %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1 283 ret <2 x double> %vecinit2.i 284 } 285 286 define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp { 287 ; CHECK-LABEL: _RR: 288 ; CHECK: ## BB#0: ## %entry 289 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 290 ; CHECK-NEXT: movl (%rsi), %eax 291 ; CHECK-NEXT: movl %eax, (%rax) 292 ; CHECK-NEXT: retq 293 entry: 294 %q = load float, float* %ptr, align 4 295 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 296 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 297 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 298 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 299 ; force a chain 300 %j = load i32, i32* %k, align 4 301 store i32 %j, i32* undef 302 ret <4 x float> %vecinit6.i 303 } 304 305 define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp { 306 ; CHECK-LABEL: _RR2: 307 ; CHECK: ## BB#0: ## %entry 308 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 309 ; CHECK-NEXT: retq 310 entry: 311 %q = load float, float* %ptr, align 4 312 %v = insertelement <4 x float> undef, float %q, i32 0 313 %t = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 314 ret <4 x float> %t 315 } 316 317 ; These tests check that a vbroadcast instruction is used when we have a splat 318 ; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs 319 ; (via the insertelements). 320 321 define <8 x float> @splat_concat1(float* %p) { 322 ; CHECK-LABEL: splat_concat1: 323 ; CHECK: ## BB#0: 324 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 325 ; CHECK-NEXT: retq 326 %1 = load float, float* %p, align 4 327 %2 = insertelement <4 x float> undef, float %1, i32 0 328 %3 = insertelement <4 x float> %2, float %1, i32 1 329 %4 = insertelement <4 x float> %3, float %1, i32 2 330 %5 = insertelement <4 x float> %4, float %1, i32 3 331 %6 = shufflevector <4 x float> %5, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 332 ret <8 x float> %6 333 } 334 335 define <8 x float> @splat_concat2(float* %p) { 336 ; CHECK-LABEL: splat_concat2: 337 ; CHECK: ## BB#0: 338 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 339 ; CHECK-NEXT: retq 340 %1 = load float, float* %p, align 4 341 %2 = insertelement <4 x float> undef, float %1, i32 0 342 %3 = insertelement <4 x float> %2, float %1, i32 1 343 %4 = insertelement <4 x float> %3, float %1, i32 2 344 %5 = insertelement <4 x float> %4, float %1, i32 3 345 %6 = insertelement <4 x float> undef, float %1, i32 0 346 %7 = insertelement <4 x float> %6, float %1, i32 1 347 %8 = insertelement <4 x float> %7, float %1, i32 2 348 %9 = insertelement <4 x float> %8, float %1, i32 3 349 %10 = shufflevector <4 x float> %5, <4 x float> %9, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 350 ret <8 x float> %10 351 } 352 353 define <4 x double> @splat_concat3(double* %p) { 354 ; CHECK-LABEL: splat_concat3: 355 ; CHECK: ## BB#0: 356 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 357 ; CHECK-NEXT: retq 358 %1 = load double, double* %p, align 8 359 %2 = insertelement <2 x double> undef, double %1, i32 0 360 %3 = insertelement <2 x double> %2, double %1, i32 1 361 %4 = shufflevector <2 x double> %3, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 362 ret <4 x double> %4 363 } 364 365 define <4 x double> @splat_concat4(double* %p) { 366 ; CHECK-LABEL: splat_concat4: 367 ; CHECK: ## BB#0: 368 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 369 ; CHECK-NEXT: retq 370 %1 = load double, double* %p, align 8 371 %2 = insertelement <2 x double> undef, double %1, i32 0 372 %3 = insertelement <2 x double> %2, double %1, i32 1 373 %4 = insertelement <2 x double> undef, double %1, i32 0 374 %5 = insertelement <2 x double> %2, double %1, i32 1 375 %6 = shufflevector <2 x double> %3, <2 x double> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 376 ret <4 x double> %6 377 } 378