1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X32 3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X64 4 5 define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp { 6 ; X32-LABEL: A: 7 ; X32: ## %bb.0: ## %entry 8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 9 ; X32-NEXT: movl (%eax), %ecx 10 ; X32-NEXT: movl 4(%eax), %eax 11 ; X32-NEXT: vmovd %ecx, %xmm0 12 ; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 13 ; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 14 ; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 15 ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 16 ; X32-NEXT: retl 17 ; 18 ; X64-LABEL: A: 19 ; X64: ## %bb.0: ## %entry 20 ; X64-NEXT: vbroadcastsd (%rdi), %ymm0 21 ; X64-NEXT: retq 22 entry: 23 %q = load i64, i64* %ptr, align 8 24 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 25 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 26 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 27 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 28 ret <4 x i64> %vecinit6.i 29 } 30 31 define <4 x i64> @A2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp { 32 ; X32-LABEL: A2: 33 ; X32: ## %bb.0: ## %entry 34 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 35 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 36 ; X32-NEXT: movl (%ecx), %edx 37 ; X32-NEXT: movl 4(%ecx), %ecx 38 ; X32-NEXT: movl %ecx, 4(%eax) 39 ; X32-NEXT: movl %edx, (%eax) 40 ; X32-NEXT: vmovd %edx, %xmm0 41 ; X32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 42 ; X32-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 43 ; X32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 44 ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 45 ; X32-NEXT: retl 46 ; 47 ; X64-LABEL: A2: 48 ; X64: ## %bb.0: ## %entry 49 ; X64-NEXT: movq (%rdi), %rax 50 ; X64-NEXT: vmovq %rax, %xmm0 51 ; X64-NEXT: movq %rax, (%rsi) 52 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 53 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 54 ; X64-NEXT: retq 55 entry: 56 %q = load i64, i64* %ptr, align 8 57 store i64 %q, i64* %ptr2, align 8 ; to create a chain to prevent broadcast 58 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 59 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 60 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 61 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 62 ret <4 x i64> %vecinit6.i 63 } 64 65 define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp { 66 ; X32-LABEL: B: 67 ; X32: ## %bb.0: ## %entry 68 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 69 ; X32-NEXT: vbroadcastss (%eax), %ymm0 70 ; X32-NEXT: retl 71 ; 72 ; X64-LABEL: B: 73 ; X64: ## %bb.0: ## %entry 74 ; X64-NEXT: vbroadcastss (%rdi), %ymm0 75 ; X64-NEXT: retq 76 entry: 77 %q = load i32, i32* %ptr, align 4 78 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 79 %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1 80 %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2 81 %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3 82 ret <8 x i32> %vecinit6.i 83 } 84 85 define <8 x i32> @B2(i32* %ptr) nounwind uwtable readnone ssp { 86 ; X32-LABEL: B2: 87 ; X32: ## %bb.0: ## %entry 88 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 89 ; X32-NEXT: vbroadcastss (%eax), %ymm0 90 ; X32-NEXT: retl 91 ; 92 ; X64-LABEL: B2: 93 ; X64: ## %bb.0: ## %entry 94 ; X64-NEXT: vbroadcastss (%rdi), %ymm0 95 ; X64-NEXT: retq 96 entry: 97 %q = load i32, i32* %ptr, align 4 98 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 99 %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1 100 %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2 101 %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3 102 %vecinit8.i = insertelement <8 x i32> %vecinit6.i, i32 %q, i32 4 103 %vecinit10.i = insertelement <8 x i32> %vecinit8.i, i32 %q, i32 5 104 %vecinit12.i = insertelement <8 x i32> %vecinit10.i, i32 %q, i32 6 105 %vecinit14.i = insertelement <8 x i32> %vecinit12.i, i32 %q, i32 7 106 ret <8 x i32> %vecinit14.i 107 } 108 109 define <8 x i32> @B3(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp { 110 ; X32-LABEL: B3: 111 ; X32: ## %bb.0: ## %entry 112 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 113 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 114 ; X32-NEXT: movl (%ecx), %ecx 115 ; X32-NEXT: vmovd %ecx, %xmm0 116 ; X32-NEXT: movl %ecx, (%eax) 117 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 118 ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 119 ; X32-NEXT: retl 120 ; 121 ; X64-LABEL: B3: 122 ; X64: ## %bb.0: ## %entry 123 ; X64-NEXT: movl (%rdi), %eax 124 ; X64-NEXT: vmovd %eax, %xmm0 125 ; X64-NEXT: movl %eax, (%rsi) 126 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 127 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 128 ; X64-NEXT: retq 129 entry: 130 %q = load i32, i32* %ptr, align 4 131 store i32 %q, i32* %ptr2, align 4 ; to create a chain to prevent broadcast 132 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 133 %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1 134 %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2 135 %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3 136 %vecinit8.i = insertelement <8 x i32> %vecinit6.i, i32 %q, i32 4 137 %vecinit10.i = insertelement <8 x i32> %vecinit8.i, i32 %q, i32 5 138 %vecinit12.i = insertelement <8 x i32> %vecinit10.i, i32 %q, i32 6 139 %vecinit14.i = insertelement <8 x i32> %vecinit12.i, i32 %q, i32 7 140 ret <8 x i32> %vecinit14.i 141 } 142 143 define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp { 144 ; X32-LABEL: C: 145 ; X32: ## %bb.0: ## %entry 146 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 147 ; X32-NEXT: vbroadcastsd (%eax), %ymm0 148 ; X32-NEXT: retl 149 ; 150 ; X64-LABEL: C: 151 ; X64: ## %bb.0: ## %entry 152 ; X64-NEXT: vbroadcastsd (%rdi), %ymm0 153 ; X64-NEXT: retq 154 entry: 155 %q = load double, double* %ptr, align 8 156 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 157 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 158 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 159 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 160 ret <4 x double> %vecinit6.i 161 } 162 163 define <4 x double> @C2(double* %ptr, double* %ptr2) nounwind uwtable readnone ssp { 164 ; X32-LABEL: C2: 165 ; X32: ## %bb.0: ## %entry 166 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 167 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 168 ; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 169 ; X32-NEXT: vmovsd %xmm0, (%eax) 170 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 171 ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 172 ; X32-NEXT: retl 173 ; 174 ; X64-LABEL: C2: 175 ; X64: ## %bb.0: ## %entry 176 ; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 177 ; X64-NEXT: vmovsd %xmm0, (%rsi) 178 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 179 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 180 ; X64-NEXT: retq 181 entry: 182 %q = load double, double* %ptr, align 8 183 store double %q, double* %ptr2, align 8 ; to create a chain to prevent broadcast 184 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 185 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 186 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 187 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 188 ret <4 x double> %vecinit6.i 189 } 190 191 define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp { 192 ; X32-LABEL: D: 193 ; X32: ## %bb.0: ## %entry 194 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 195 ; X32-NEXT: vbroadcastss (%eax), %ymm0 196 ; X32-NEXT: retl 197 ; 198 ; X64-LABEL: D: 199 ; X64: ## %bb.0: ## %entry 200 ; X64-NEXT: vbroadcastss (%rdi), %ymm0 201 ; X64-NEXT: retq 202 entry: 203 %q = load float, float* %ptr, align 4 204 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 205 %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1 206 %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2 207 %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3 208 ret <8 x float> %vecinit6.i 209 } 210 211 define <8 x float> @D2(float* %ptr) nounwind uwtable readnone ssp { 212 ; X32-LABEL: D2: 213 ; X32: ## %bb.0: ## %entry 214 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 215 ; X32-NEXT: vbroadcastss (%eax), %ymm0 216 ; X32-NEXT: retl 217 ; 218 ; X64-LABEL: D2: 219 ; X64: ## %bb.0: ## %entry 220 ; X64-NEXT: vbroadcastss (%rdi), %ymm0 221 ; X64-NEXT: retq 222 entry: 223 %q = load float, float* %ptr, align 4 224 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 225 %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1 226 %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2 227 %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3 228 %vecinit8.i = insertelement <8 x float> %vecinit6.i, float %q, i32 4 229 %vecinit10.i = insertelement <8 x float> %vecinit8.i, float %q, i32 5 230 %vecinit12.i = insertelement <8 x float> %vecinit10.i, float %q, i32 6 231 %vecinit14.i = insertelement <8 x float> %vecinit12.i, float %q, i32 7 232 ret <8 x float> %vecinit14.i 233 } 234 235 define <8 x float> @D3(float* %ptr, float* %ptr2) nounwind uwtable readnone ssp { 236 ; X32-LABEL: D3: 237 ; X32: ## %bb.0: ## %entry 238 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 239 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 240 ; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 241 ; X32-NEXT: vmovss %xmm0, (%eax) 242 ; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 243 ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 244 ; X32-NEXT: retl 245 ; 246 ; X64-LABEL: D3: 247 ; X64: ## %bb.0: ## %entry 248 ; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 249 ; X64-NEXT: vmovss %xmm0, (%rsi) 250 ; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 251 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 252 ; X64-NEXT: retq 253 entry: 254 %q = load float, float* %ptr, align 4 255 store float %q, float* %ptr2, align 4 ; to create a chain to prevent broadcast 256 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 257 %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1 258 %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2 259 %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3 260 %vecinit8.i = insertelement <8 x float> %vecinit6.i, float %q, i32 4 261 %vecinit10.i = insertelement <8 x float> %vecinit8.i, float %q, i32 5 262 %vecinit12.i = insertelement <8 x float> %vecinit10.i, float %q, i32 6 263 %vecinit14.i = insertelement <8 x float> %vecinit12.i, float %q, i32 7 264 ret <8 x float> %vecinit14.i 265 } 266 267 ;;;; 128-bit versions 268 269 define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp { 270 ; X32-LABEL: e: 271 ; X32: ## %bb.0: ## %entry 272 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 273 ; X32-NEXT: vbroadcastss (%eax), %xmm0 274 ; X32-NEXT: retl 275 ; 276 ; X64-LABEL: e: 277 ; X64: ## %bb.0: ## %entry 278 ; X64-NEXT: vbroadcastss (%rdi), %xmm0 279 ; X64-NEXT: retq 280 entry: 281 %q = load float, float* %ptr, align 4 282 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 283 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 284 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 285 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 286 ret <4 x float> %vecinit6.i 287 } 288 289 define <4 x float> @e2(float* %ptr, float* %ptr2) nounwind uwtable readnone ssp { 290 ; X32-LABEL: e2: 291 ; X32: ## %bb.0: ## %entry 292 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 293 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 294 ; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 295 ; X32-NEXT: vmovss %xmm0, (%eax) 296 ; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 297 ; X32-NEXT: retl 298 ; 299 ; X64-LABEL: e2: 300 ; X64: ## %bb.0: ## %entry 301 ; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 302 ; X64-NEXT: vmovss %xmm0, (%rsi) 303 ; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 304 ; X64-NEXT: retq 305 entry: 306 %q = load float, float* %ptr, align 4 307 store float %q, float* %ptr2, align 4 ; to create a chain to prevent broadcast 308 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 309 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 310 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 311 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 312 ret <4 x float> %vecinit6.i 313 } 314 315 ; Don't broadcast constants on pre-AVX2 hardware. 316 define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp { 317 ; X32-LABEL: _e2: 318 ; X32: ## %bb.0: ## %entry 319 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03] 320 ; X32-NEXT: retl 321 ; 322 ; X64-LABEL: _e2: 323 ; X64: ## %bb.0: ## %entry 324 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03] 325 ; X64-NEXT: retq 326 entry: 327 %vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0 328 %vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1 329 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2 330 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3 331 ret <4 x float> %vecinit6.i 332 } 333 334 335 define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp { 336 ; X32-LABEL: F: 337 ; X32: ## %bb.0: ## %entry 338 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 339 ; X32-NEXT: vbroadcastss (%eax), %xmm0 340 ; X32-NEXT: retl 341 ; 342 ; X64-LABEL: F: 343 ; X64: ## %bb.0: ## %entry 344 ; X64-NEXT: vbroadcastss (%rdi), %xmm0 345 ; X64-NEXT: retq 346 entry: 347 %q = load i32, i32* %ptr, align 4 348 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 349 %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %q, i32 1 350 %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %q, i32 2 351 %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %q, i32 3 352 ret <4 x i32> %vecinit6.i 353 } 354 355 define <4 x i32> @F2(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp { 356 ; X32-LABEL: F2: 357 ; X32: ## %bb.0: ## %entry 358 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 359 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 360 ; X32-NEXT: movl (%ecx), %ecx 361 ; X32-NEXT: movl %ecx, (%eax) 362 ; X32-NEXT: vmovd %ecx, %xmm0 363 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 364 ; X32-NEXT: retl 365 ; 366 ; X64-LABEL: F2: 367 ; X64: ## %bb.0: ## %entry 368 ; X64-NEXT: movl (%rdi), %eax 369 ; X64-NEXT: movl %eax, (%rsi) 370 ; X64-NEXT: vmovd %eax, %xmm0 371 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 372 ; X64-NEXT: retq 373 entry: 374 %q = load i32, i32* %ptr, align 4 375 store i32 %q, i32* %ptr2, align 4 ; to create a chain to prevent broadcast 376 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 377 %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %q, i32 1 378 %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %q, i32 2 379 %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %q, i32 3 380 ret <4 x i32> %vecinit6.i 381 } 382 383 ; FIXME: Pointer adjusted broadcasts 384 385 define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp { 386 ; X32-LABEL: load_splat_4i32_4i32_1111: 387 ; X32: ## %bb.0: ## %entry 388 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 389 ; X32-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,1,1,1] 390 ; X32-NEXT: retl 391 ; 392 ; X64-LABEL: load_splat_4i32_4i32_1111: 393 ; X64: ## %bb.0: ## %entry 394 ; X64-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,1,1,1] 395 ; X64-NEXT: retq 396 entry: 397 %ld = load <4 x i32>, <4 x i32>* %ptr 398 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 399 ret <4 x i32> %ret 400 } 401 402 define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp { 403 ; X32-LABEL: load_splat_8i32_4i32_33333333: 404 ; X32: ## %bb.0: ## %entry 405 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 406 ; X32-NEXT: vbroadcastss 12(%eax), %ymm0 407 ; X32-NEXT: retl 408 ; 409 ; X64-LABEL: load_splat_8i32_4i32_33333333: 410 ; X64: ## %bb.0: ## %entry 411 ; X64-NEXT: vbroadcastss 12(%rdi), %ymm0 412 ; X64-NEXT: retq 413 entry: 414 %ld = load <4 x i32>, <4 x i32>* %ptr 415 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 416 ret <8 x i32> %ret 417 } 418 419 define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp { 420 ; X32-LABEL: load_splat_8i32_8i32_55555555: 421 ; X32: ## %bb.0: ## %entry 422 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 423 ; X32-NEXT: vbroadcastss 20(%eax), %ymm0 424 ; X32-NEXT: retl 425 ; 426 ; X64-LABEL: load_splat_8i32_8i32_55555555: 427 ; X64: ## %bb.0: ## %entry 428 ; X64-NEXT: vbroadcastss 20(%rdi), %ymm0 429 ; X64-NEXT: retq 430 entry: 431 %ld = load <8 x i32>, <8 x i32>* %ptr 432 %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 433 ret <8 x i32> %ret 434 } 435 436 define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp { 437 ; X32-LABEL: load_splat_4f32_4f32_1111: 438 ; X32: ## %bb.0: ## %entry 439 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 440 ; X32-NEXT: vbroadcastss 4(%eax), %xmm0 441 ; X32-NEXT: retl 442 ; 443 ; X64-LABEL: load_splat_4f32_4f32_1111: 444 ; X64: ## %bb.0: ## %entry 445 ; X64-NEXT: vbroadcastss 4(%rdi), %xmm0 446 ; X64-NEXT: retq 447 entry: 448 %ld = load <4 x float>, <4 x float>* %ptr 449 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 450 ret <4 x float> %ret 451 } 452 453 define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp { 454 ; X32-LABEL: load_splat_8f32_4f32_33333333: 455 ; X32: ## %bb.0: ## %entry 456 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 457 ; X32-NEXT: vbroadcastss 12(%eax), %ymm0 458 ; X32-NEXT: retl 459 ; 460 ; X64-LABEL: load_splat_8f32_4f32_33333333: 461 ; X64: ## %bb.0: ## %entry 462 ; X64-NEXT: vbroadcastss 12(%rdi), %ymm0 463 ; X64-NEXT: retq 464 entry: 465 %ld = load <4 x float>, <4 x float>* %ptr 466 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 467 ret <8 x float> %ret 468 } 469 470 define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp { 471 ; X32-LABEL: load_splat_8f32_8f32_55555555: 472 ; X32: ## %bb.0: ## %entry 473 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 474 ; X32-NEXT: vbroadcastss 20(%eax), %ymm0 475 ; X32-NEXT: retl 476 ; 477 ; X64-LABEL: load_splat_8f32_8f32_55555555: 478 ; X64: ## %bb.0: ## %entry 479 ; X64-NEXT: vbroadcastss 20(%rdi), %ymm0 480 ; X64-NEXT: retq 481 entry: 482 %ld = load <8 x float>, <8 x float>* %ptr 483 %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 484 ret <8 x float> %ret 485 } 486 487 define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { 488 ; X32-LABEL: load_splat_2i64_2i64_1111: 489 ; X32: ## %bb.0: ## %entry 490 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 491 ; X32-NEXT: vpermilps {{.*#+}} xmm0 = mem[2,3,2,3] 492 ; X32-NEXT: retl 493 ; 494 ; X64-LABEL: load_splat_2i64_2i64_1111: 495 ; X64: ## %bb.0: ## %entry 496 ; X64-NEXT: vpermilps {{.*#+}} xmm0 = mem[2,3,2,3] 497 ; X64-NEXT: retq 498 entry: 499 %ld = load <2 x i64>, <2 x i64>* %ptr 500 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 501 ret <2 x i64> %ret 502 } 503 504 define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { 505 ; X32-LABEL: load_splat_4i64_2i64_1111: 506 ; X32: ## %bb.0: ## %entry 507 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 508 ; X32-NEXT: vbroadcastsd 8(%eax), %ymm0 509 ; X32-NEXT: retl 510 ; 511 ; X64-LABEL: load_splat_4i64_2i64_1111: 512 ; X64: ## %bb.0: ## %entry 513 ; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0 514 ; X64-NEXT: retq 515 entry: 516 %ld = load <2 x i64>, <2 x i64>* %ptr 517 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 518 ret <4 x i64> %ret 519 } 520 521 define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp { 522 ; X32-LABEL: load_splat_4i64_4i64_2222: 523 ; X32: ## %bb.0: ## %entry 524 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 525 ; X32-NEXT: vbroadcastsd 16(%eax), %ymm0 526 ; X32-NEXT: retl 527 ; 528 ; X64-LABEL: load_splat_4i64_4i64_2222: 529 ; X64: ## %bb.0: ## %entry 530 ; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0 531 ; X64-NEXT: retq 532 entry: 533 %ld = load <4 x i64>, <4 x i64>* %ptr 534 %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 535 ret <4 x i64> %ret 536 } 537 538 define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { 539 ; X32-LABEL: load_splat_2f64_2f64_1111: 540 ; X32: ## %bb.0: ## %entry 541 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 542 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 543 ; X32-NEXT: retl 544 ; 545 ; X64-LABEL: load_splat_2f64_2f64_1111: 546 ; X64: ## %bb.0: ## %entry 547 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 548 ; X64-NEXT: retq 549 entry: 550 %ld = load <2 x double>, <2 x double>* %ptr 551 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1> 552 ret <2 x double> %ret 553 } 554 555 define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { 556 ; X32-LABEL: load_splat_4f64_2f64_1111: 557 ; X32: ## %bb.0: ## %entry 558 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 559 ; X32-NEXT: vbroadcastsd 8(%eax), %ymm0 560 ; X32-NEXT: retl 561 ; 562 ; X64-LABEL: load_splat_4f64_2f64_1111: 563 ; X64: ## %bb.0: ## %entry 564 ; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0 565 ; X64-NEXT: retq 566 entry: 567 %ld = load <2 x double>, <2 x double>* %ptr 568 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 569 ret <4 x double> %ret 570 } 571 572 define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp { 573 ; X32-LABEL: load_splat_4f64_4f64_2222: 574 ; X32: ## %bb.0: ## %entry 575 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 576 ; X32-NEXT: vbroadcastsd 16(%eax), %ymm0 577 ; X32-NEXT: retl 578 ; 579 ; X64-LABEL: load_splat_4f64_4f64_2222: 580 ; X64: ## %bb.0: ## %entry 581 ; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0 582 ; X64-NEXT: retq 583 entry: 584 %ld = load <4 x double>, <4 x double>* %ptr 585 %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 586 ret <4 x double> %ret 587 } 588 589 ; Unsupported vbroadcasts 590 591 define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp { 592 ; X32-LABEL: G: 593 ; X32: ## %bb.0: ## %entry 594 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 595 ; X32-NEXT: movl (%eax), %ecx 596 ; X32-NEXT: movl 4(%eax), %eax 597 ; X32-NEXT: vmovd %ecx, %xmm0 598 ; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 599 ; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 600 ; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 601 ; X32-NEXT: retl 602 ; 603 ; X64-LABEL: G: 604 ; X64: ## %bb.0: ## %entry 605 ; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 606 ; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1] 607 ; X64-NEXT: retq 608 entry: 609 %q = load i64, i64* %ptr, align 8 610 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 611 %vecinit2.i = insertelement <2 x i64> %vecinit.i, i64 %q, i32 1 612 ret <2 x i64> %vecinit2.i 613 } 614 615 define <2 x i64> @G2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp { 616 ; X32-LABEL: G2: 617 ; X32: ## %bb.0: ## %entry 618 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 619 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 620 ; X32-NEXT: movl (%ecx), %edx 621 ; X32-NEXT: movl 4(%ecx), %ecx 622 ; X32-NEXT: movl %ecx, 4(%eax) 623 ; X32-NEXT: movl %edx, (%eax) 624 ; X32-NEXT: vmovd %edx, %xmm0 625 ; X32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 626 ; X32-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 627 ; X32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 628 ; X32-NEXT: retl 629 ; 630 ; X64-LABEL: G2: 631 ; X64: ## %bb.0: ## %entry 632 ; X64-NEXT: movq (%rdi), %rax 633 ; X64-NEXT: movq %rax, (%rsi) 634 ; X64-NEXT: vmovq %rax, %xmm0 635 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 636 ; X64-NEXT: retq 637 entry: 638 %q = load i64, i64* %ptr, align 8 639 store i64 %q, i64* %ptr2, align 8 ; to create a chain to prevent broadcast 640 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 641 %vecinit2.i = insertelement <2 x i64> %vecinit.i, i64 %q, i32 1 642 ret <2 x i64> %vecinit2.i 643 } 644 645 define <4 x i32> @H(<4 x i32> %a) { 646 ; X32-LABEL: H: 647 ; X32: ## %bb.0: ## %entry 648 ; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,2,3] 649 ; X32-NEXT: retl 650 ; 651 ; X64-LABEL: H: 652 ; X64: ## %bb.0: ## %entry 653 ; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,2,3] 654 ; X64-NEXT: retq 655 entry: 656 %x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 657 ret <4 x i32> %x 658 } 659 660 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp { 661 ; X32-LABEL: I: 662 ; X32: ## %bb.0: ## %entry 663 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 664 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 665 ; X32-NEXT: retl 666 ; 667 ; X64-LABEL: I: 668 ; X64: ## %bb.0: ## %entry 669 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 670 ; X64-NEXT: retq 671 entry: 672 %q = load double, double* %ptr, align 4 673 %vecinit.i = insertelement <2 x double> undef, double %q, i32 0 674 %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1 675 ret <2 x double> %vecinit2.i 676 } 677 678 define <2 x double> @I2(double* %ptr, double* %ptr2) nounwind uwtable readnone ssp { 679 ; X32-LABEL: I2: 680 ; X32: ## %bb.0: ## %entry 681 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 682 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 683 ; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 684 ; X32-NEXT: vmovsd %xmm0, (%eax) 685 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 686 ; X32-NEXT: retl 687 ; 688 ; X64-LABEL: I2: 689 ; X64: ## %bb.0: ## %entry 690 ; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 691 ; X64-NEXT: vmovsd %xmm0, (%rsi) 692 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 693 ; X64-NEXT: retq 694 entry: 695 %q = load double, double* %ptr, align 4 696 store double %q, double* %ptr2, align 4 ; to create a chain to prevent broadcast 697 %vecinit.i = insertelement <2 x double> undef, double %q, i32 0 698 %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1 699 ret <2 x double> %vecinit2.i 700 } 701 702 define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp { 703 ; X32-LABEL: _RR: 704 ; X32: ## %bb.0: ## %entry 705 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 706 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 707 ; X32-NEXT: vbroadcastss (%ecx), %xmm0 708 ; X32-NEXT: movl (%eax), %eax 709 ; X32-NEXT: movl %eax, (%eax) 710 ; X32-NEXT: retl 711 ; 712 ; X64-LABEL: _RR: 713 ; X64: ## %bb.0: ## %entry 714 ; X64-NEXT: vbroadcastss (%rdi), %xmm0 715 ; X64-NEXT: movl (%rsi), %eax 716 ; X64-NEXT: movl %eax, (%rax) 717 ; X64-NEXT: retq 718 entry: 719 %q = load float, float* %ptr, align 4 720 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 721 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 722 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 723 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 724 ; force a chain 725 %j = load i32, i32* %k, align 4 726 store i32 %j, i32* undef 727 ret <4 x float> %vecinit6.i 728 } 729 730 define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp { 731 ; X32-LABEL: _RR2: 732 ; X32: ## %bb.0: ## %entry 733 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 734 ; X32-NEXT: vbroadcastss (%eax), %xmm0 735 ; X32-NEXT: retl 736 ; 737 ; X64-LABEL: _RR2: 738 ; X64: ## %bb.0: ## %entry 739 ; X64-NEXT: vbroadcastss (%rdi), %xmm0 740 ; X64-NEXT: retq 741 entry: 742 %q = load float, float* %ptr, align 4 743 %v = insertelement <4 x float> undef, float %q, i32 0 744 %t = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 745 ret <4 x float> %t 746 } 747 748 ; These tests check that a vbroadcast instruction is used when we have a splat 749 ; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs 750 ; (via the insertelements). 751 752 define <8 x float> @splat_concat1(float* %p) { 753 ; X32-LABEL: splat_concat1: 754 ; X32: ## %bb.0: 755 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 756 ; X32-NEXT: vbroadcastss (%eax), %ymm0 757 ; X32-NEXT: retl 758 ; 759 ; X64-LABEL: splat_concat1: 760 ; X64: ## %bb.0: 761 ; X64-NEXT: vbroadcastss (%rdi), %ymm0 762 ; X64-NEXT: retq 763 %1 = load float, float* %p, align 4 764 %2 = insertelement <4 x float> undef, float %1, i32 0 765 %3 = insertelement <4 x float> %2, float %1, i32 1 766 %4 = insertelement <4 x float> %3, float %1, i32 2 767 %5 = insertelement <4 x float> %4, float %1, i32 3 768 %6 = shufflevector <4 x float> %5, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 769 ret <8 x float> %6 770 } 771 772 define <8 x float> @splat_concat2(float* %p) { 773 ; X32-LABEL: splat_concat2: 774 ; X32: ## %bb.0: 775 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 776 ; X32-NEXT: vbroadcastss (%eax), %ymm0 777 ; X32-NEXT: retl 778 ; 779 ; X64-LABEL: splat_concat2: 780 ; X64: ## %bb.0: 781 ; X64-NEXT: vbroadcastss (%rdi), %ymm0 782 ; X64-NEXT: retq 783 %1 = load float, float* %p, align 4 784 %2 = insertelement <4 x float> undef, float %1, i32 0 785 %3 = insertelement <4 x float> %2, float %1, i32 1 786 %4 = insertelement <4 x float> %3, float %1, i32 2 787 %5 = insertelement <4 x float> %4, float %1, i32 3 788 %6 = insertelement <4 x float> undef, float %1, i32 0 789 %7 = insertelement <4 x float> %6, float %1, i32 1 790 %8 = insertelement <4 x float> %7, float %1, i32 2 791 %9 = insertelement <4 x float> %8, float %1, i32 3 792 %10 = shufflevector <4 x float> %5, <4 x float> %9, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 793 ret <8 x float> %10 794 } 795 796 define <4 x double> @splat_concat3(double* %p) { 797 ; X32-LABEL: splat_concat3: 798 ; X32: ## %bb.0: 799 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 800 ; X32-NEXT: vbroadcastsd (%eax), %ymm0 801 ; X32-NEXT: retl 802 ; 803 ; X64-LABEL: splat_concat3: 804 ; X64: ## %bb.0: 805 ; X64-NEXT: vbroadcastsd (%rdi), %ymm0 806 ; X64-NEXT: retq 807 %1 = load double, double* %p, align 8 808 %2 = insertelement <2 x double> undef, double %1, i32 0 809 %3 = insertelement <2 x double> %2, double %1, i32 1 810 %4 = shufflevector <2 x double> %3, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 811 ret <4 x double> %4 812 } 813 814 define <4 x double> @splat_concat4(double* %p) { 815 ; X32-LABEL: splat_concat4: 816 ; X32: ## %bb.0: 817 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 818 ; X32-NEXT: vbroadcastsd (%eax), %ymm0 819 ; X32-NEXT: retl 820 ; 821 ; X64-LABEL: splat_concat4: 822 ; X64: ## %bb.0: 823 ; X64-NEXT: vbroadcastsd (%rdi), %ymm0 824 ; X64-NEXT: retq 825 %1 = load double, double* %p, align 8 826 %2 = insertelement <2 x double> undef, double %1, i32 0 827 %3 = insertelement <2 x double> %2, double %1, i32 1 828 %4 = insertelement <2 x double> undef, double %1, i32 0 829 %5 = insertelement <2 x double> %2, double %1, i32 1 830 %6 = shufflevector <2 x double> %3, <2 x double> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 831 ret <4 x double> %6 832 } 833 834 ; PR34041 835 define <4 x double> @broadcast_shuffle_1000(double* %p) { 836 ; X32-LABEL: broadcast_shuffle_1000: 837 ; X32: ## %bb.0: 838 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 839 ; X32-NEXT: vbroadcastsd (%eax), %ymm0 840 ; X32-NEXT: retl 841 ; 842 ; X64-LABEL: broadcast_shuffle_1000: 843 ; X64: ## %bb.0: 844 ; X64-NEXT: vbroadcastsd (%rdi), %ymm0 845 ; X64-NEXT: retq 846 %1 = load double, double* %p 847 %2 = insertelement <2 x double> undef, double %1, i32 0 848 %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 849 ret <4 x double> %3 850 } 851 852 define <4 x double> @broadcast_shuffle1032(double* %p) { 853 ; X32-LABEL: broadcast_shuffle1032: 854 ; X32: ## %bb.0: 855 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 856 ; X32-NEXT: vbroadcastsd (%eax), %ymm0 857 ; X32-NEXT: retl 858 ; 859 ; X64-LABEL: broadcast_shuffle1032: 860 ; X64: ## %bb.0: 861 ; X64-NEXT: vbroadcastsd (%rdi), %ymm0 862 ; X64-NEXT: retq 863 %1 = load double, double* %p 864 %2 = insertelement <2 x double> undef, double %1, i32 1 865 %3 = insertelement <2 x double> undef, double %1, i32 0 866 %4 = shufflevector <2 x double> %2, <2 x double> %3, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 867 ret <4 x double> %4 868 } 869 870 ; 871 ; When VBROADCAST replaces an existing load, ensure it still respects lifetime dependencies. 872 ; 873 define float @broadcast_lifetime() nounwind { 874 ; X32-LABEL: broadcast_lifetime: 875 ; X32: ## %bb.0: 876 ; X32-NEXT: pushl %esi 877 ; X32-NEXT: subl $40, %esp 878 ; X32-NEXT: leal {{[0-9]+}}(%esp), %esi 879 ; X32-NEXT: movl %esi, (%esp) 880 ; X32-NEXT: calll _gfunc 881 ; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 882 ; X32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## 4-byte Spill 883 ; X32-NEXT: movl %esi, (%esp) 884 ; X32-NEXT: calll _gfunc 885 ; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 886 ; X32-NEXT: vsubss {{[0-9]+}}(%esp), %xmm0, %xmm0 ## 4-byte Folded Reload 887 ; X32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) 888 ; X32-NEXT: flds {{[0-9]+}}(%esp) 889 ; X32-NEXT: addl $40, %esp 890 ; X32-NEXT: popl %esi 891 ; X32-NEXT: retl 892 ; 893 ; X64-LABEL: broadcast_lifetime: 894 ; X64: ## %bb.0: 895 ; X64-NEXT: subq $40, %rsp 896 ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi 897 ; X64-NEXT: callq _gfunc 898 ; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 899 ; X64-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) ## 4-byte Spill 900 ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi 901 ; X64-NEXT: callq _gfunc 902 ; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 903 ; X64-NEXT: vsubss {{[0-9]+}}(%rsp), %xmm0, %xmm0 ## 4-byte Folded Reload 904 ; X64-NEXT: addq $40, %rsp 905 ; X64-NEXT: retq 906 %1 = alloca <4 x float>, align 16 907 %2 = alloca <4 x float>, align 16 908 %3 = bitcast <4 x float>* %1 to i8* 909 %4 = bitcast <4 x float>* %2 to i8* 910 911 call void @llvm.lifetime.start.p0i8(i64 16, i8* %3) 912 call void @gfunc(<4 x float>* %1) 913 %5 = load <4 x float>, <4 x float>* %1, align 16 914 call void @llvm.lifetime.end.p0i8(i64 16, i8* %3) 915 916 call void @llvm.lifetime.start.p0i8(i64 16, i8* %4) 917 call void @gfunc(<4 x float>* %2) 918 %6 = load <4 x float>, <4 x float>* %2, align 16 919 call void @llvm.lifetime.end.p0i8(i64 16, i8* %4) 920 921 %7 = extractelement <4 x float> %5, i32 1 922 %8 = extractelement <4 x float> %6, i32 1 923 %9 = fsub float %8, %7 924 ret float %9 925 } 926 927 declare void @gfunc(<4 x float>*) 928 declare void @llvm.lifetime.start.p0i8(i64, i8*) 929 declare void @llvm.lifetime.end.p0i8(i64, i8*) 930