1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X32 3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X64 4 5 define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp { 6 ; X32-LABEL: A: 7 ; X32: ## BB#0: ## %entry 8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 9 ; X32-NEXT: movl (%eax), %ecx 10 ; X32-NEXT: movl 4(%eax), %eax 11 ; X32-NEXT: vmovd %ecx, %xmm0 12 ; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 13 ; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 14 ; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 15 ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 16 ; X32-NEXT: retl 17 ; 18 ; X64-LABEL: A: 19 ; X64: ## BB#0: ## %entry 20 ; X64-NEXT: vbroadcastsd (%rdi), %ymm0 21 ; X64-NEXT: retq 22 entry: 23 %q = load i64, i64* %ptr, align 8 24 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 25 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 26 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 27 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 28 ret <4 x i64> %vecinit6.i 29 } 30 31 define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp { 32 ; X32-LABEL: B: 33 ; X32: ## BB#0: ## %entry 34 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 35 ; X32-NEXT: vbroadcastss (%eax), %ymm0 36 ; X32-NEXT: retl 37 ; 38 ; X64-LABEL: B: 39 ; X64: ## BB#0: ## %entry 40 ; X64-NEXT: vbroadcastss (%rdi), %ymm0 41 ; X64-NEXT: retq 42 entry: 43 %q = load i32, i32* %ptr, align 4 44 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 45 %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1 46 %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2 47 %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3 48 ret <8 x i32> %vecinit6.i 49 } 50 51 define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp { 52 ; X32-LABEL: C: 53 ; X32: ## BB#0: ## %entry 54 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 55 ; X32-NEXT: vbroadcastsd (%eax), %ymm0 56 ; X32-NEXT: retl 57 ; 58 ; X64-LABEL: C: 59 ; X64: ## BB#0: ## %entry 60 ; X64-NEXT: vbroadcastsd (%rdi), %ymm0 61 ; X64-NEXT: retq 62 entry: 63 %q = load double, double* %ptr, align 8 64 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 65 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 66 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 67 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 68 ret <4 x double> %vecinit6.i 69 } 70 71 define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp { 72 ; X32-LABEL: D: 73 ; X32: ## BB#0: ## %entry 74 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 75 ; X32-NEXT: vbroadcastss (%eax), %ymm0 76 ; X32-NEXT: retl 77 ; 78 ; X64-LABEL: D: 79 ; X64: ## BB#0: ## %entry 80 ; X64-NEXT: vbroadcastss (%rdi), %ymm0 81 ; X64-NEXT: retq 82 entry: 83 %q = load float, float* %ptr, align 4 84 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 85 %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1 86 %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2 87 %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3 88 ret <8 x float> %vecinit6.i 89 } 90 91 ;;;; 128-bit versions 92 93 define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp { 94 ; X32-LABEL: e: 95 ; X32: ## BB#0: ## %entry 96 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 97 ; X32-NEXT: vbroadcastss (%eax), %xmm0 98 ; X32-NEXT: retl 99 ; 100 ; X64-LABEL: e: 101 ; X64: ## BB#0: ## %entry 102 ; X64-NEXT: vbroadcastss (%rdi), %xmm0 103 ; X64-NEXT: retq 104 entry: 105 %q = load float, float* %ptr, align 4 106 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 107 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 108 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 109 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 110 ret <4 x float> %vecinit6.i 111 } 112 113 ; Don't broadcast constants on pre-AVX2 hardware. 114 define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp { 115 ; X32-LABEL: _e2: 116 ; X32: ## BB#0: ## %entry 117 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03] 118 ; X32-NEXT: retl 119 ; 120 ; X64-LABEL: _e2: 121 ; X64: ## BB#0: ## %entry 122 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03] 123 ; X64-NEXT: retq 124 entry: 125 %vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0 126 %vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1 127 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2 128 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3 129 ret <4 x float> %vecinit6.i 130 } 131 132 133 define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp { 134 ; X32-LABEL: F: 135 ; X32: ## BB#0: ## %entry 136 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 137 ; X32-NEXT: vbroadcastss (%eax), %xmm0 138 ; X32-NEXT: retl 139 ; 140 ; X64-LABEL: F: 141 ; X64: ## BB#0: ## %entry 142 ; X64-NEXT: vbroadcastss (%rdi), %xmm0 143 ; X64-NEXT: retq 144 entry: 145 %q = load i32, i32* %ptr, align 4 146 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 147 %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %q, i32 1 148 %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %q, i32 2 149 %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %q, i32 3 150 ret <4 x i32> %vecinit6.i 151 } 152 153 ; FIXME: Pointer adjusted broadcasts 154 155 define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp { 156 ; X32-LABEL: load_splat_4i32_4i32_1111: 157 ; X32: ## BB#0: ## %entry 158 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 159 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,1,1,1] 160 ; X32-NEXT: retl 161 ; 162 ; X64-LABEL: load_splat_4i32_4i32_1111: 163 ; X64: ## BB#0: ## %entry 164 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,1,1,1] 165 ; X64-NEXT: retq 166 entry: 167 %ld = load <4 x i32>, <4 x i32>* %ptr 168 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 169 ret <4 x i32> %ret 170 } 171 172 define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp { 173 ; X32-LABEL: load_splat_8i32_4i32_33333333: 174 ; X32: ## BB#0: ## %entry 175 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 176 ; X32-NEXT: vbroadcastss 12(%eax), %ymm0 177 ; X32-NEXT: retl 178 ; 179 ; X64-LABEL: load_splat_8i32_4i32_33333333: 180 ; X64: ## BB#0: ## %entry 181 ; X64-NEXT: vbroadcastss 12(%rdi), %ymm0 182 ; X64-NEXT: retq 183 entry: 184 %ld = load <4 x i32>, <4 x i32>* %ptr 185 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 186 ret <8 x i32> %ret 187 } 188 189 define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp { 190 ; X32-LABEL: load_splat_8i32_8i32_55555555: 191 ; X32: ## BB#0: ## %entry 192 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 193 ; X32-NEXT: vbroadcastss 20(%eax), %ymm0 194 ; X32-NEXT: retl 195 ; 196 ; X64-LABEL: load_splat_8i32_8i32_55555555: 197 ; X64: ## BB#0: ## %entry 198 ; X64-NEXT: vbroadcastss 20(%rdi), %ymm0 199 ; X64-NEXT: retq 200 entry: 201 %ld = load <8 x i32>, <8 x i32>* %ptr 202 %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 203 ret <8 x i32> %ret 204 } 205 206 define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp { 207 ; X32-LABEL: load_splat_4f32_4f32_1111: 208 ; X32: ## BB#0: ## %entry 209 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 210 ; X32-NEXT: vbroadcastss 4(%eax), %xmm0 211 ; X32-NEXT: retl 212 ; 213 ; X64-LABEL: load_splat_4f32_4f32_1111: 214 ; X64: ## BB#0: ## %entry 215 ; X64-NEXT: vbroadcastss 4(%rdi), %xmm0 216 ; X64-NEXT: retq 217 entry: 218 %ld = load <4 x float>, <4 x float>* %ptr 219 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 220 ret <4 x float> %ret 221 } 222 223 define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp { 224 ; X32-LABEL: load_splat_8f32_4f32_33333333: 225 ; X32: ## BB#0: ## %entry 226 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 227 ; X32-NEXT: vbroadcastss 12(%eax), %ymm0 228 ; X32-NEXT: retl 229 ; 230 ; X64-LABEL: load_splat_8f32_4f32_33333333: 231 ; X64: ## BB#0: ## %entry 232 ; X64-NEXT: vbroadcastss 12(%rdi), %ymm0 233 ; X64-NEXT: retq 234 entry: 235 %ld = load <4 x float>, <4 x float>* %ptr 236 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 237 ret <8 x float> %ret 238 } 239 240 define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp { 241 ; X32-LABEL: load_splat_8f32_8f32_55555555: 242 ; X32: ## BB#0: ## %entry 243 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 244 ; X32-NEXT: vbroadcastss 20(%eax), %ymm0 245 ; X32-NEXT: retl 246 ; 247 ; X64-LABEL: load_splat_8f32_8f32_55555555: 248 ; X64: ## BB#0: ## %entry 249 ; X64-NEXT: vbroadcastss 20(%rdi), %ymm0 250 ; X64-NEXT: retq 251 entry: 252 %ld = load <8 x float>, <8 x float>* %ptr 253 %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 254 ret <8 x float> %ret 255 } 256 257 define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { 258 ; X32-LABEL: load_splat_2i64_2i64_1111: 259 ; X32: ## BB#0: ## %entry 260 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 261 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3] 262 ; X32-NEXT: retl 263 ; 264 ; X64-LABEL: load_splat_2i64_2i64_1111: 265 ; X64: ## BB#0: ## %entry 266 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3] 267 ; X64-NEXT: retq 268 entry: 269 %ld = load <2 x i64>, <2 x i64>* %ptr 270 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 271 ret <2 x i64> %ret 272 } 273 274 define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { 275 ; X32-LABEL: load_splat_4i64_2i64_1111: 276 ; X32: ## BB#0: ## %entry 277 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 278 ; X32-NEXT: vbroadcastsd 8(%eax), %ymm0 279 ; X32-NEXT: retl 280 ; 281 ; X64-LABEL: load_splat_4i64_2i64_1111: 282 ; X64: ## BB#0: ## %entry 283 ; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0 284 ; X64-NEXT: retq 285 entry: 286 %ld = load <2 x i64>, <2 x i64>* %ptr 287 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 288 ret <4 x i64> %ret 289 } 290 291 define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp { 292 ; X32-LABEL: load_splat_4i64_4i64_2222: 293 ; X32: ## BB#0: ## %entry 294 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 295 ; X32-NEXT: vbroadcastsd 16(%eax), %ymm0 296 ; X32-NEXT: retl 297 ; 298 ; X64-LABEL: load_splat_4i64_4i64_2222: 299 ; X64: ## BB#0: ## %entry 300 ; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0 301 ; X64-NEXT: retq 302 entry: 303 %ld = load <4 x i64>, <4 x i64>* %ptr 304 %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 305 ret <4 x i64> %ret 306 } 307 308 define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { 309 ; X32-LABEL: load_splat_2f64_2f64_1111: 310 ; X32: ## BB#0: ## %entry 311 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 312 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 313 ; X32-NEXT: retl 314 ; 315 ; X64-LABEL: load_splat_2f64_2f64_1111: 316 ; X64: ## BB#0: ## %entry 317 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 318 ; X64-NEXT: retq 319 entry: 320 %ld = load <2 x double>, <2 x double>* %ptr 321 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1> 322 ret <2 x double> %ret 323 } 324 325 define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { 326 ; X32-LABEL: load_splat_4f64_2f64_1111: 327 ; X32: ## BB#0: ## %entry 328 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 329 ; X32-NEXT: vbroadcastsd 8(%eax), %ymm0 330 ; X32-NEXT: retl 331 ; 332 ; X64-LABEL: load_splat_4f64_2f64_1111: 333 ; X64: ## BB#0: ## %entry 334 ; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0 335 ; X64-NEXT: retq 336 entry: 337 %ld = load <2 x double>, <2 x double>* %ptr 338 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 339 ret <4 x double> %ret 340 } 341 342 define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp { 343 ; X32-LABEL: load_splat_4f64_4f64_2222: 344 ; X32: ## BB#0: ## %entry 345 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 346 ; X32-NEXT: vbroadcastsd 16(%eax), %ymm0 347 ; X32-NEXT: retl 348 ; 349 ; X64-LABEL: load_splat_4f64_4f64_2222: 350 ; X64: ## BB#0: ## %entry 351 ; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0 352 ; X64-NEXT: retq 353 entry: 354 %ld = load <4 x double>, <4 x double>* %ptr 355 %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 356 ret <4 x double> %ret 357 } 358 359 ; Unsupported vbroadcasts 360 361 define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp { 362 ; X32-LABEL: G: 363 ; X32: ## BB#0: ## %entry 364 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 365 ; X32-NEXT: movl (%eax), %ecx 366 ; X32-NEXT: movl 4(%eax), %eax 367 ; X32-NEXT: vmovd %ecx, %xmm0 368 ; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 369 ; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 370 ; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 371 ; X32-NEXT: retl 372 ; 373 ; X64-LABEL: G: 374 ; X64: ## BB#0: ## %entry 375 ; X64-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 376 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 377 ; X64-NEXT: retq 378 entry: 379 %q = load i64, i64* %ptr, align 8 380 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 381 %vecinit2.i = insertelement <2 x i64> %vecinit.i, i64 %q, i32 1 382 ret <2 x i64> %vecinit2.i 383 } 384 385 define <4 x i32> @H(<4 x i32> %a) { 386 ; X32-LABEL: H: 387 ; X32: ## BB#0: ## %entry 388 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 389 ; X32-NEXT: retl 390 ; 391 ; X64-LABEL: H: 392 ; X64: ## BB#0: ## %entry 393 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 394 ; X64-NEXT: retq 395 entry: 396 %x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 397 ret <4 x i32> %x 398 } 399 400 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp { 401 ; X32-LABEL: I: 402 ; X32: ## BB#0: ## %entry 403 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 404 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 405 ; X32-NEXT: retl 406 ; 407 ; X64-LABEL: I: 408 ; X64: ## BB#0: ## %entry 409 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 410 ; X64-NEXT: retq 411 entry: 412 %q = load double, double* %ptr, align 4 413 %vecinit.i = insertelement <2 x double> undef, double %q, i32 0 414 %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1 415 ret <2 x double> %vecinit2.i 416 } 417 418 define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp { 419 ; X32-LABEL: _RR: 420 ; X32: ## BB#0: ## %entry 421 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 422 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 423 ; X32-NEXT: vbroadcastss (%ecx), %xmm0 424 ; X32-NEXT: movl (%eax), %eax 425 ; X32-NEXT: movl %eax, (%eax) 426 ; X32-NEXT: retl 427 ; 428 ; X64-LABEL: _RR: 429 ; X64: ## BB#0: ## %entry 430 ; X64-NEXT: vbroadcastss (%rdi), %xmm0 431 ; X64-NEXT: movl (%rsi), %eax 432 ; X64-NEXT: movl %eax, (%rax) 433 ; X64-NEXT: retq 434 entry: 435 %q = load float, float* %ptr, align 4 436 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 437 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 438 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 439 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 440 ; force a chain 441 %j = load i32, i32* %k, align 4 442 store i32 %j, i32* undef 443 ret <4 x float> %vecinit6.i 444 } 445 446 define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp { 447 ; X32-LABEL: _RR2: 448 ; X32: ## BB#0: ## %entry 449 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 450 ; X32-NEXT: vbroadcastss (%eax), %xmm0 451 ; X32-NEXT: retl 452 ; 453 ; X64-LABEL: _RR2: 454 ; X64: ## BB#0: ## %entry 455 ; X64-NEXT: vbroadcastss (%rdi), %xmm0 456 ; X64-NEXT: retq 457 entry: 458 %q = load float, float* %ptr, align 4 459 %v = insertelement <4 x float> undef, float %q, i32 0 460 %t = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 461 ret <4 x float> %t 462 } 463 464 ; These tests check that a vbroadcast instruction is used when we have a splat 465 ; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs 466 ; (via the insertelements). 467 468 define <8 x float> @splat_concat1(float* %p) { 469 ; X32-LABEL: splat_concat1: 470 ; X32: ## BB#0: 471 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 472 ; X32-NEXT: vbroadcastss (%eax), %ymm0 473 ; X32-NEXT: retl 474 ; 475 ; X64-LABEL: splat_concat1: 476 ; X64: ## BB#0: 477 ; X64-NEXT: vbroadcastss (%rdi), %ymm0 478 ; X64-NEXT: retq 479 %1 = load float, float* %p, align 4 480 %2 = insertelement <4 x float> undef, float %1, i32 0 481 %3 = insertelement <4 x float> %2, float %1, i32 1 482 %4 = insertelement <4 x float> %3, float %1, i32 2 483 %5 = insertelement <4 x float> %4, float %1, i32 3 484 %6 = shufflevector <4 x float> %5, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 485 ret <8 x float> %6 486 } 487 488 define <8 x float> @splat_concat2(float* %p) { 489 ; X32-LABEL: splat_concat2: 490 ; X32: ## BB#0: 491 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 492 ; X32-NEXT: vbroadcastss (%eax), %ymm0 493 ; X32-NEXT: retl 494 ; 495 ; X64-LABEL: splat_concat2: 496 ; X64: ## BB#0: 497 ; X64-NEXT: vbroadcastss (%rdi), %ymm0 498 ; X64-NEXT: retq 499 %1 = load float, float* %p, align 4 500 %2 = insertelement <4 x float> undef, float %1, i32 0 501 %3 = insertelement <4 x float> %2, float %1, i32 1 502 %4 = insertelement <4 x float> %3, float %1, i32 2 503 %5 = insertelement <4 x float> %4, float %1, i32 3 504 %6 = insertelement <4 x float> undef, float %1, i32 0 505 %7 = insertelement <4 x float> %6, float %1, i32 1 506 %8 = insertelement <4 x float> %7, float %1, i32 2 507 %9 = insertelement <4 x float> %8, float %1, i32 3 508 %10 = shufflevector <4 x float> %5, <4 x float> %9, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 509 ret <8 x float> %10 510 } 511 512 define <4 x double> @splat_concat3(double* %p) { 513 ; X32-LABEL: splat_concat3: 514 ; X32: ## BB#0: 515 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 516 ; X32-NEXT: vbroadcastsd (%eax), %ymm0 517 ; X32-NEXT: retl 518 ; 519 ; X64-LABEL: splat_concat3: 520 ; X64: ## BB#0: 521 ; X64-NEXT: vbroadcastsd (%rdi), %ymm0 522 ; X64-NEXT: retq 523 %1 = load double, double* %p, align 8 524 %2 = insertelement <2 x double> undef, double %1, i32 0 525 %3 = insertelement <2 x double> %2, double %1, i32 1 526 %4 = shufflevector <2 x double> %3, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 527 ret <4 x double> %4 528 } 529 530 define <4 x double> @splat_concat4(double* %p) { 531 ; X32-LABEL: splat_concat4: 532 ; X32: ## BB#0: 533 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 534 ; X32-NEXT: vbroadcastsd (%eax), %ymm0 535 ; X32-NEXT: retl 536 ; 537 ; X64-LABEL: splat_concat4: 538 ; X64: ## BB#0: 539 ; X64-NEXT: vbroadcastsd (%rdi), %ymm0 540 ; X64-NEXT: retq 541 %1 = load double, double* %p, align 8 542 %2 = insertelement <2 x double> undef, double %1, i32 0 543 %3 = insertelement <2 x double> %2, double %1, i32 1 544 %4 = insertelement <2 x double> undef, double %1, i32 0 545 %5 = insertelement <2 x double> %2, double %1, i32 1 546 %6 = shufflevector <2 x double> %3, <2 x double> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 547 ret <4 x double> %6 548 } 549