1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX2 3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX2 4 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX512VL 5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512VL 6 7 define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp { 8 ; X32-LABEL: BB16: 9 ; X32: ## %bb.0: ## %entry 10 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 11 ; X32-NEXT: vpbroadcastb (%eax), %xmm0 12 ; X32-NEXT: retl 13 ; 14 ; X64-LABEL: BB16: 15 ; X64: ## %bb.0: ## %entry 16 ; X64-NEXT: vpbroadcastb (%rdi), %xmm0 17 ; X64-NEXT: retq 18 entry: 19 %q = load i8, i8* %ptr, align 4 20 %q0 = insertelement <16 x i8> undef, i8 %q, i32 0 21 %q1 = insertelement <16 x i8> %q0, i8 %q, i32 1 22 %q2 = insertelement <16 x i8> %q1, i8 %q, i32 2 23 %q3 = insertelement <16 x i8> %q2, i8 %q, i32 3 24 %q4 = insertelement <16 x i8> %q3, i8 %q, i32 4 25 %q5 = insertelement <16 x i8> %q4, i8 %q, i32 5 26 %q6 = insertelement <16 x i8> %q5, i8 %q, i32 6 27 %q7 = insertelement <16 x i8> %q6, i8 %q, i32 7 28 %q8 = insertelement <16 x i8> %q7, i8 %q, i32 8 29 %q9 = insertelement <16 x i8> %q8, i8 %q, i32 9 30 %qa = insertelement <16 x i8> %q9, i8 %q, i32 10 31 %qb = insertelement <16 x i8> %qa, i8 %q, i32 11 32 %qc = insertelement <16 x i8> %qb, i8 %q, i32 12 33 %qd = insertelement <16 x i8> %qc, i8 %q, i32 13 34 %qe = insertelement <16 x i8> %qd, i8 %q, i32 14 35 %qf = insertelement <16 x i8> %qe, i8 %q, i32 15 36 ret <16 x i8> %qf 37 } 38 39 define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp { 40 ; X32-LABEL: BB32: 41 ; X32: ## %bb.0: ## %entry 42 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 43 ; X32-NEXT: vpbroadcastb (%eax), %ymm0 44 ; X32-NEXT: retl 45 ; 46 ; X64-LABEL: BB32: 47 ; X64: ## %bb.0: ## %entry 48 ; X64-NEXT: vpbroadcastb (%rdi), %ymm0 49 ; X64-NEXT: retq 50 entry: 51 %q = load i8, i8* %ptr, align 4 52 %q0 = insertelement <32 x i8> undef, i8 %q, i32 0 53 %q1 = insertelement <32 x i8> %q0, i8 %q, i32 1 54 %q2 = insertelement <32 x i8> %q1, i8 %q, i32 2 55 %q3 = insertelement <32 x i8> %q2, i8 %q, i32 3 56 %q4 = insertelement <32 x i8> %q3, i8 %q, i32 4 57 %q5 = insertelement <32 x i8> %q4, i8 %q, i32 5 58 %q6 = insertelement <32 x i8> %q5, i8 %q, i32 6 59 %q7 = insertelement <32 x i8> %q6, i8 %q, i32 7 60 %q8 = insertelement <32 x i8> %q7, i8 %q, i32 8 61 %q9 = insertelement <32 x i8> %q8, i8 %q, i32 9 62 %qa = insertelement <32 x i8> %q9, i8 %q, i32 10 63 %qb = insertelement <32 x i8> %qa, i8 %q, i32 11 64 %qc = insertelement <32 x i8> %qb, i8 %q, i32 12 65 %qd = insertelement <32 x i8> %qc, i8 %q, i32 13 66 %qe = insertelement <32 x i8> %qd, i8 %q, i32 14 67 %qf = insertelement <32 x i8> %qe, i8 %q, i32 15 68 69 %q20 = insertelement <32 x i8> %qf, i8 %q, i32 16 70 %q21 = insertelement <32 x i8> %q20, i8 %q, i32 17 71 %q22 = insertelement <32 x i8> %q21, i8 %q, i32 18 72 %q23 = insertelement <32 x i8> %q22, i8 %q, i32 19 73 %q24 = insertelement <32 x i8> %q23, i8 %q, i32 20 74 %q25 = insertelement <32 x i8> %q24, i8 %q, i32 21 75 %q26 = insertelement <32 x i8> %q25, i8 %q, i32 22 76 %q27 = insertelement <32 x i8> %q26, i8 %q, i32 23 77 %q28 = insertelement <32 x i8> %q27, i8 %q, i32 24 78 %q29 = insertelement <32 x i8> %q28, i8 %q, i32 25 79 %q2a = insertelement <32 x i8> %q29, i8 %q, i32 26 80 %q2b = insertelement <32 x i8> %q2a, i8 %q, i32 27 81 %q2c = insertelement <32 x i8> %q2b, i8 %q, i32 28 82 %q2d = insertelement <32 x i8> %q2c, i8 %q, i32 29 83 %q2e = insertelement <32 x i8> %q2d, i8 %q, i32 30 84 %q2f = insertelement <32 x i8> %q2e, i8 %q, i32 31 85 ret <32 x i8> %q2f 86 } 87 88 define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp { 89 ; X32-LABEL: W16: 90 ; X32: ## %bb.0: ## %entry 91 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 92 ; X32-NEXT: vpbroadcastw (%eax), %xmm0 93 ; X32-NEXT: retl 94 ; 95 ; X64-LABEL: W16: 96 ; X64: ## %bb.0: ## %entry 97 ; X64-NEXT: vpbroadcastw (%rdi), %xmm0 98 ; X64-NEXT: retq 99 entry: 100 %q = load i16, i16* %ptr, align 4 101 %q0 = insertelement <8 x i16> undef, i16 %q, i32 0 102 %q1 = insertelement <8 x i16> %q0, i16 %q, i32 1 103 %q2 = insertelement <8 x i16> %q1, i16 %q, i32 2 104 %q3 = insertelement <8 x i16> %q2, i16 %q, i32 3 105 %q4 = insertelement <8 x i16> %q3, i16 %q, i32 4 106 %q5 = insertelement <8 x i16> %q4, i16 %q, i32 5 107 %q6 = insertelement <8 x i16> %q5, i16 %q, i32 6 108 %q7 = insertelement <8 x i16> %q6, i16 %q, i32 7 109 ret <8 x i16> %q7 110 } 111 112 define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp { 113 ; X32-LABEL: WW16: 114 ; X32: ## %bb.0: ## %entry 115 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 116 ; X32-NEXT: vpbroadcastw (%eax), %ymm0 117 ; X32-NEXT: retl 118 ; 119 ; X64-LABEL: WW16: 120 ; X64: ## %bb.0: ## %entry 121 ; X64-NEXT: vpbroadcastw (%rdi), %ymm0 122 ; X64-NEXT: retq 123 entry: 124 %q = load i16, i16* %ptr, align 4 125 %q0 = insertelement <16 x i16> undef, i16 %q, i32 0 126 %q1 = insertelement <16 x i16> %q0, i16 %q, i32 1 127 %q2 = insertelement <16 x i16> %q1, i16 %q, i32 2 128 %q3 = insertelement <16 x i16> %q2, i16 %q, i32 3 129 %q4 = insertelement <16 x i16> %q3, i16 %q, i32 4 130 %q5 = insertelement <16 x i16> %q4, i16 %q, i32 5 131 %q6 = insertelement <16 x i16> %q5, i16 %q, i32 6 132 %q7 = insertelement <16 x i16> %q6, i16 %q, i32 7 133 %q8 = insertelement <16 x i16> %q7, i16 %q, i32 8 134 %q9 = insertelement <16 x i16> %q8, i16 %q, i32 9 135 %qa = insertelement <16 x i16> %q9, i16 %q, i32 10 136 %qb = insertelement <16 x i16> %qa, i16 %q, i32 11 137 %qc = insertelement <16 x i16> %qb, i16 %q, i32 12 138 %qd = insertelement <16 x i16> %qc, i16 %q, i32 13 139 %qe = insertelement <16 x i16> %qd, i16 %q, i32 14 140 %qf = insertelement <16 x i16> %qe, i16 %q, i32 15 141 ret <16 x i16> %qf 142 } 143 144 define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp { 145 ; X32-LABEL: D32: 146 ; X32: ## %bb.0: ## %entry 147 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 148 ; X32-NEXT: vbroadcastss (%eax), %xmm0 149 ; X32-NEXT: retl 150 ; 151 ; X64-LABEL: D32: 152 ; X64: ## %bb.0: ## %entry 153 ; X64-NEXT: vbroadcastss (%rdi), %xmm0 154 ; X64-NEXT: retq 155 entry: 156 %q = load i32, i32* %ptr, align 4 157 %q0 = insertelement <4 x i32> undef, i32 %q, i32 0 158 %q1 = insertelement <4 x i32> %q0, i32 %q, i32 1 159 %q2 = insertelement <4 x i32> %q1, i32 %q, i32 2 160 %q3 = insertelement <4 x i32> %q2, i32 %q, i32 3 161 ret <4 x i32> %q3 162 } 163 164 define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp { 165 ; X32-LABEL: DD32: 166 ; X32: ## %bb.0: ## %entry 167 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 168 ; X32-NEXT: vbroadcastss (%eax), %ymm0 169 ; X32-NEXT: retl 170 ; 171 ; X64-LABEL: DD32: 172 ; X64: ## %bb.0: ## %entry 173 ; X64-NEXT: vbroadcastss (%rdi), %ymm0 174 ; X64-NEXT: retq 175 entry: 176 %q = load i32, i32* %ptr, align 4 177 %q0 = insertelement <8 x i32> undef, i32 %q, i32 0 178 %q1 = insertelement <8 x i32> %q0, i32 %q, i32 1 179 %q2 = insertelement <8 x i32> %q1, i32 %q, i32 2 180 %q3 = insertelement <8 x i32> %q2, i32 %q, i32 3 181 %q4 = insertelement <8 x i32> %q3, i32 %q, i32 4 182 %q5 = insertelement <8 x i32> %q4, i32 %q, i32 5 183 %q6 = insertelement <8 x i32> %q5, i32 %q, i32 6 184 %q7 = insertelement <8 x i32> %q6, i32 %q, i32 7 185 ret <8 x i32> %q7 186 } 187 188 define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp { 189 ; X32-LABEL: Q64: 190 ; X32: ## %bb.0: ## %entry 191 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 192 ; X32-NEXT: vpbroadcastq (%eax), %xmm0 193 ; X32-NEXT: retl 194 ; 195 ; X64-LABEL: Q64: 196 ; X64: ## %bb.0: ## %entry 197 ; X64-NEXT: vpbroadcastq (%rdi), %xmm0 198 ; X64-NEXT: retq 199 entry: 200 %q = load i64, i64* %ptr, align 4 201 %q0 = insertelement <2 x i64> undef, i64 %q, i32 0 202 %q1 = insertelement <2 x i64> %q0, i64 %q, i32 1 203 ret <2 x i64> %q1 204 } 205 206 define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp { 207 ; X32-LABEL: QQ64: 208 ; X32: ## %bb.0: ## %entry 209 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 210 ; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 211 ; X32-NEXT: vbroadcastsd %xmm0, %ymm0 212 ; X32-NEXT: retl 213 ; 214 ; X64-LABEL: QQ64: 215 ; X64: ## %bb.0: ## %entry 216 ; X64-NEXT: vbroadcastsd (%rdi), %ymm0 217 ; X64-NEXT: retq 218 entry: 219 %q = load i64, i64* %ptr, align 4 220 %q0 = insertelement <4 x i64> undef, i64 %q, i32 0 221 %q1 = insertelement <4 x i64> %q0, i64 %q, i32 1 222 %q2 = insertelement <4 x i64> %q1, i64 %q, i32 2 223 %q3 = insertelement <4 x i64> %q2, i64 %q, i32 3 224 ret <4 x i64> %q3 225 } 226 227 define <8 x i16> @broadcast_mem_v4i16_v8i16(<4 x i16>* %ptr) { 228 ; X32-LABEL: broadcast_mem_v4i16_v8i16: 229 ; X32: ## %bb.0: 230 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 231 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 232 ; X32-NEXT: retl 233 ; 234 ; X64-LABEL: broadcast_mem_v4i16_v8i16: 235 ; X64: ## %bb.0: 236 ; X64-NEXT: vpbroadcastq (%rdi), %xmm0 237 ; X64-NEXT: retq 238 %load = load <4 x i16>, <4 x i16>* %ptr 239 %shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 240 ret <8 x i16> %shuf 241 } 242 243 define <16 x i16> @broadcast_mem_v4i16_v16i16(<4 x i16>* %ptr) { 244 ; X32-LABEL: broadcast_mem_v4i16_v16i16: 245 ; X32: ## %bb.0: 246 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 247 ; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 248 ; X32-NEXT: vbroadcastsd %xmm0, %ymm0 249 ; X32-NEXT: retl 250 ; 251 ; X64-LABEL: broadcast_mem_v4i16_v16i16: 252 ; X64: ## %bb.0: 253 ; X64-NEXT: vbroadcastsd (%rdi), %ymm0 254 ; X64-NEXT: retq 255 %load = load <4 x i16>, <4 x i16>* %ptr 256 %shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 257 ret <16 x i16> %shuf 258 } 259 260 ; FIXME: Pointer adjusted broadcasts 261 262 define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp { 263 ; X32-LABEL: load_splat_16i8_16i8_1111111111111111: 264 ; X32: ## %bb.0: ## %entry 265 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 266 ; X32-NEXT: vpbroadcastb 1(%eax), %xmm0 267 ; X32-NEXT: retl 268 ; 269 ; X64-LABEL: load_splat_16i8_16i8_1111111111111111: 270 ; X64: ## %bb.0: ## %entry 271 ; X64-NEXT: vpbroadcastb 1(%rdi), %xmm0 272 ; X64-NEXT: retq 273 entry: 274 %ld = load <16 x i8>, <16 x i8>* %ptr 275 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 276 ret <16 x i8> %ret 277 } 278 279 define <32 x i8> @load_splat_32i8_16i8_11111111111111111111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp { 280 ; X32-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111: 281 ; X32: ## %bb.0: ## %entry 282 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 283 ; X32-NEXT: vpbroadcastb 1(%eax), %ymm0 284 ; X32-NEXT: retl 285 ; 286 ; X64-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111: 287 ; X64: ## %bb.0: ## %entry 288 ; X64-NEXT: vpbroadcastb 1(%rdi), %ymm0 289 ; X64-NEXT: retq 290 entry: 291 %ld = load <16 x i8>, <16 x i8>* %ptr 292 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 293 ret <32 x i8> %ret 294 } 295 296 define <32 x i8> @load_splat_32i8_32i8_11111111111111111111111111111111(<32 x i8>* %ptr) nounwind uwtable readnone ssp { 297 ; X32-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111: 298 ; X32: ## %bb.0: ## %entry 299 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 300 ; X32-NEXT: vpbroadcastb 1(%eax), %ymm0 301 ; X32-NEXT: retl 302 ; 303 ; X64-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111: 304 ; X64: ## %bb.0: ## %entry 305 ; X64-NEXT: vpbroadcastb 1(%rdi), %ymm0 306 ; X64-NEXT: retq 307 entry: 308 %ld = load <32 x i8>, <32 x i8>* %ptr 309 %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 310 ret <32 x i8> %ret 311 } 312 313 define <8 x i16> @load_splat_8i16_8i16_11111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp { 314 ; X32-LABEL: load_splat_8i16_8i16_11111111: 315 ; X32: ## %bb.0: ## %entry 316 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 317 ; X32-NEXT: vpbroadcastw 2(%eax), %xmm0 318 ; X32-NEXT: retl 319 ; 320 ; X64-LABEL: load_splat_8i16_8i16_11111111: 321 ; X64: ## %bb.0: ## %entry 322 ; X64-NEXT: vpbroadcastw 2(%rdi), %xmm0 323 ; X64-NEXT: retq 324 entry: 325 %ld = load <8 x i16>, <8 x i16>* %ptr 326 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 327 ret <8 x i16> %ret 328 } 329 330 define <16 x i16> @load_splat_16i16_8i16_1111111111111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp { 331 ; X32-LABEL: load_splat_16i16_8i16_1111111111111111: 332 ; X32: ## %bb.0: ## %entry 333 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 334 ; X32-NEXT: vpbroadcastw 2(%eax), %ymm0 335 ; X32-NEXT: retl 336 ; 337 ; X64-LABEL: load_splat_16i16_8i16_1111111111111111: 338 ; X64: ## %bb.0: ## %entry 339 ; X64-NEXT: vpbroadcastw 2(%rdi), %ymm0 340 ; X64-NEXT: retq 341 entry: 342 %ld = load <8 x i16>, <8 x i16>* %ptr 343 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 344 ret <16 x i16> %ret 345 } 346 347 define <16 x i16> @load_splat_16i16_16i16_1111111111111111(<16 x i16>* %ptr) nounwind uwtable readnone ssp { 348 ; X32-LABEL: load_splat_16i16_16i16_1111111111111111: 349 ; X32: ## %bb.0: ## %entry 350 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 351 ; X32-NEXT: vpbroadcastw 2(%eax), %ymm0 352 ; X32-NEXT: retl 353 ; 354 ; X64-LABEL: load_splat_16i16_16i16_1111111111111111: 355 ; X64: ## %bb.0: ## %entry 356 ; X64-NEXT: vpbroadcastw 2(%rdi), %ymm0 357 ; X64-NEXT: retq 358 entry: 359 %ld = load <16 x i16>, <16 x i16>* %ptr 360 %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 361 ret <16 x i16> %ret 362 } 363 364 define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp { 365 ; X32-LABEL: load_splat_4i32_4i32_1111: 366 ; X32: ## %bb.0: ## %entry 367 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 368 ; X32-NEXT: vbroadcastss 4(%eax), %xmm0 369 ; X32-NEXT: retl 370 ; 371 ; X64-LABEL: load_splat_4i32_4i32_1111: 372 ; X64: ## %bb.0: ## %entry 373 ; X64-NEXT: vbroadcastss 4(%rdi), %xmm0 374 ; X64-NEXT: retq 375 entry: 376 %ld = load <4 x i32>, <4 x i32>* %ptr 377 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 378 ret <4 x i32> %ret 379 } 380 381 define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp { 382 ; X32-LABEL: load_splat_8i32_4i32_33333333: 383 ; X32: ## %bb.0: ## %entry 384 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 385 ; X32-NEXT: vbroadcastss 12(%eax), %ymm0 386 ; X32-NEXT: retl 387 ; 388 ; X64-LABEL: load_splat_8i32_4i32_33333333: 389 ; X64: ## %bb.0: ## %entry 390 ; X64-NEXT: vbroadcastss 12(%rdi), %ymm0 391 ; X64-NEXT: retq 392 entry: 393 %ld = load <4 x i32>, <4 x i32>* %ptr 394 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 395 ret <8 x i32> %ret 396 } 397 398 define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp { 399 ; X32-LABEL: load_splat_8i32_8i32_55555555: 400 ; X32: ## %bb.0: ## %entry 401 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 402 ; X32-NEXT: vbroadcastss 20(%eax), %ymm0 403 ; X32-NEXT: retl 404 ; 405 ; X64-LABEL: load_splat_8i32_8i32_55555555: 406 ; X64: ## %bb.0: ## %entry 407 ; X64-NEXT: vbroadcastss 20(%rdi), %ymm0 408 ; X64-NEXT: retq 409 entry: 410 %ld = load <8 x i32>, <8 x i32>* %ptr 411 %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 412 ret <8 x i32> %ret 413 } 414 415 define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp { 416 ; X32-LABEL: load_splat_4f32_4f32_1111: 417 ; X32: ## %bb.0: ## %entry 418 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 419 ; X32-NEXT: vbroadcastss 4(%eax), %xmm0 420 ; X32-NEXT: retl 421 ; 422 ; X64-LABEL: load_splat_4f32_4f32_1111: 423 ; X64: ## %bb.0: ## %entry 424 ; X64-NEXT: vbroadcastss 4(%rdi), %xmm0 425 ; X64-NEXT: retq 426 entry: 427 %ld = load <4 x float>, <4 x float>* %ptr 428 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 429 ret <4 x float> %ret 430 } 431 432 define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp { 433 ; X32-LABEL: load_splat_8f32_4f32_33333333: 434 ; X32: ## %bb.0: ## %entry 435 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 436 ; X32-NEXT: vbroadcastss 12(%eax), %ymm0 437 ; X32-NEXT: retl 438 ; 439 ; X64-LABEL: load_splat_8f32_4f32_33333333: 440 ; X64: ## %bb.0: ## %entry 441 ; X64-NEXT: vbroadcastss 12(%rdi), %ymm0 442 ; X64-NEXT: retq 443 entry: 444 %ld = load <4 x float>, <4 x float>* %ptr 445 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 446 ret <8 x float> %ret 447 } 448 449 define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp { 450 ; X32-LABEL: load_splat_8f32_8f32_55555555: 451 ; X32: ## %bb.0: ## %entry 452 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 453 ; X32-NEXT: vbroadcastss 20(%eax), %ymm0 454 ; X32-NEXT: retl 455 ; 456 ; X64-LABEL: load_splat_8f32_8f32_55555555: 457 ; X64: ## %bb.0: ## %entry 458 ; X64-NEXT: vbroadcastss 20(%rdi), %ymm0 459 ; X64-NEXT: retq 460 entry: 461 %ld = load <8 x float>, <8 x float>* %ptr 462 %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 463 ret <8 x float> %ret 464 } 465 466 define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { 467 ; X32-LABEL: load_splat_2i64_2i64_1111: 468 ; X32: ## %bb.0: ## %entry 469 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 470 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 471 ; X32-NEXT: retl 472 ; 473 ; X64-LABEL: load_splat_2i64_2i64_1111: 474 ; X64: ## %bb.0: ## %entry 475 ; X64-NEXT: vpbroadcastq 8(%rdi), %xmm0 476 ; X64-NEXT: retq 477 entry: 478 %ld = load <2 x i64>, <2 x i64>* %ptr 479 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 480 ret <2 x i64> %ret 481 } 482 483 define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { 484 ; X32-LABEL: load_splat_4i64_2i64_1111: 485 ; X32: ## %bb.0: ## %entry 486 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 487 ; X32-NEXT: vbroadcastsd 8(%eax), %ymm0 488 ; X32-NEXT: retl 489 ; 490 ; X64-LABEL: load_splat_4i64_2i64_1111: 491 ; X64: ## %bb.0: ## %entry 492 ; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0 493 ; X64-NEXT: retq 494 entry: 495 %ld = load <2 x i64>, <2 x i64>* %ptr 496 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 497 ret <4 x i64> %ret 498 } 499 500 define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp { 501 ; X32-LABEL: load_splat_4i64_4i64_2222: 502 ; X32: ## %bb.0: ## %entry 503 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 504 ; X32-NEXT: vbroadcastsd 16(%eax), %ymm0 505 ; X32-NEXT: retl 506 ; 507 ; X64-LABEL: load_splat_4i64_4i64_2222: 508 ; X64: ## %bb.0: ## %entry 509 ; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0 510 ; X64-NEXT: retq 511 entry: 512 %ld = load <4 x i64>, <4 x i64>* %ptr 513 %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 514 ret <4 x i64> %ret 515 } 516 517 define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { 518 ; X32-LABEL: load_splat_2f64_2f64_1111: 519 ; X32: ## %bb.0: ## %entry 520 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 521 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 522 ; X32-NEXT: retl 523 ; 524 ; X64-LABEL: load_splat_2f64_2f64_1111: 525 ; X64: ## %bb.0: ## %entry 526 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 527 ; X64-NEXT: retq 528 entry: 529 %ld = load <2 x double>, <2 x double>* %ptr 530 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1> 531 ret <2 x double> %ret 532 } 533 534 define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { 535 ; X32-LABEL: load_splat_4f64_2f64_1111: 536 ; X32: ## %bb.0: ## %entry 537 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 538 ; X32-NEXT: vbroadcastsd 8(%eax), %ymm0 539 ; X32-NEXT: retl 540 ; 541 ; X64-LABEL: load_splat_4f64_2f64_1111: 542 ; X64: ## %bb.0: ## %entry 543 ; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0 544 ; X64-NEXT: retq 545 entry: 546 %ld = load <2 x double>, <2 x double>* %ptr 547 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 548 ret <4 x double> %ret 549 } 550 551 define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp { 552 ; X32-LABEL: load_splat_4f64_4f64_2222: 553 ; X32: ## %bb.0: ## %entry 554 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 555 ; X32-NEXT: vbroadcastsd 16(%eax), %ymm0 556 ; X32-NEXT: retl 557 ; 558 ; X64-LABEL: load_splat_4f64_4f64_2222: 559 ; X64: ## %bb.0: ## %entry 560 ; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0 561 ; X64-NEXT: retq 562 entry: 563 %ld = load <4 x double>, <4 x double>* %ptr 564 %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 565 ret <4 x double> %ret 566 } 567 568 ; make sure that we still don't support broadcast double into 128-bit vector 569 ; this used to crash 570 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp { 571 ; X32-LABEL: I: 572 ; X32: ## %bb.0: ## %entry 573 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 574 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 575 ; X32-NEXT: retl 576 ; 577 ; X64-LABEL: I: 578 ; X64: ## %bb.0: ## %entry 579 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 580 ; X64-NEXT: retq 581 entry: 582 %q = load double, double* %ptr, align 4 583 %vecinit.i = insertelement <2 x double> undef, double %q, i32 0 584 %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1 585 ret <2 x double> %vecinit2.i 586 } 587 588 define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp { 589 ; X32-AVX2-LABEL: V111: 590 ; X32-AVX2: ## %bb.0: ## %entry 591 ; X32-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2] 592 ; X32-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 593 ; X32-AVX2-NEXT: retl 594 ; 595 ; X64-AVX2-LABEL: V111: 596 ; X64-AVX2: ## %bb.0: ## %entry 597 ; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2] 598 ; X64-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 599 ; X64-AVX2-NEXT: retq 600 ; 601 ; X32-AVX512VL-LABEL: V111: 602 ; X32-AVX512VL: ## %bb.0: ## %entry 603 ; X32-AVX512VL-NEXT: vpaddd LCPI29_0{1to8}, %ymm0, %ymm0 604 ; X32-AVX512VL-NEXT: retl 605 ; 606 ; X64-AVX512VL-LABEL: V111: 607 ; X64-AVX512VL: ## %bb.0: ## %entry 608 ; X64-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 609 ; X64-AVX512VL-NEXT: retq 610 entry: 611 %g = add <8 x i32> %in, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 612 ret <8 x i32> %g 613 } 614 615 define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp { 616 ; X32-AVX2-LABEL: V113: 617 ; X32-AVX2: ## %bb.0: ## %entry 618 ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125] 619 ; X32-AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0 620 ; X32-AVX2-NEXT: retl 621 ; 622 ; X64-AVX2-LABEL: V113: 623 ; X64-AVX2: ## %bb.0: ## %entry 624 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125] 625 ; X64-AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0 626 ; X64-AVX2-NEXT: retq 627 ; 628 ; X32-AVX512VL-LABEL: V113: 629 ; X32-AVX512VL: ## %bb.0: ## %entry 630 ; X32-AVX512VL-NEXT: vaddps LCPI30_0{1to8}, %ymm0, %ymm0 631 ; X32-AVX512VL-NEXT: retl 632 ; 633 ; X64-AVX512VL-LABEL: V113: 634 ; X64-AVX512VL: ## %bb.0: ## %entry 635 ; X64-AVX512VL-NEXT: vaddps {{.*}}(%rip){1to8}, %ymm0, %ymm0 636 ; X64-AVX512VL-NEXT: retq 637 entry: 638 %g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000> 639 ret <8 x float> %g 640 } 641 642 define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp { 643 ; X32-LABEL: _e2: 644 ; X32: ## %bb.0: 645 ; X32-NEXT: vbroadcastss {{.*#+}} xmm0 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125] 646 ; X32-NEXT: retl 647 ; 648 ; X64-LABEL: _e2: 649 ; X64: ## %bb.0: 650 ; X64-NEXT: vbroadcastss {{.*#+}} xmm0 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125] 651 ; X64-NEXT: retq 652 %vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0 653 %vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1 654 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2 655 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3 656 ret <4 x float> %vecinit6.i 657 } 658 659 define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp { 660 ; X32-LABEL: _e4: 661 ; X32: ## %bb.0: 662 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52] 663 ; X32-NEXT: retl 664 ; 665 ; X64-LABEL: _e4: 666 ; X64: ## %bb.0: 667 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52] 668 ; X64-NEXT: retq 669 %vecinit0.i = insertelement <8 x i8> undef, i8 52, i32 0 670 %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1 671 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2 672 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3 673 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 4 674 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 5 675 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 6 676 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 7 677 ret <8 x i8> %vecinit7.i 678 } 679 680 define void @crash() nounwind alwaysinline { 681 ; X32-LABEL: crash: 682 ; X32: ## %bb.0: ## %WGLoopsEntry 683 ; X32-NEXT: xorl %eax, %eax 684 ; X32-NEXT: testb %al, %al 685 ; X32-NEXT: je LBB33_1 686 ; X32-NEXT: ## %bb.2: ## %ret 687 ; X32-NEXT: retl 688 ; X32-NEXT: .p2align 4, 0x90 689 ; X32-NEXT: LBB33_1: ## %footer329VF 690 ; X32-NEXT: ## =>This Inner Loop Header: Depth=1 691 ; X32-NEXT: jmp LBB33_1 692 ; 693 ; X64-LABEL: crash: 694 ; X64: ## %bb.0: ## %WGLoopsEntry 695 ; X64-NEXT: xorl %eax, %eax 696 ; X64-NEXT: testb %al, %al 697 ; X64-NEXT: je LBB33_1 698 ; X64-NEXT: ## %bb.2: ## %ret 699 ; X64-NEXT: retq 700 ; X64-NEXT: .p2align 4, 0x90 701 ; X64-NEXT: LBB33_1: ## %footer329VF 702 ; X64-NEXT: ## =>This Inner Loop Header: Depth=1 703 ; X64-NEXT: jmp LBB33_1 704 WGLoopsEntry: 705 br i1 undef, label %ret, label %footer329VF 706 707 footer329VF: 708 %A.0.inVF = fmul float undef, 6.553600e+04 709 %B.0.in407VF = fmul <8 x float> undef, <float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04> 710 %A.0VF = fptosi float %A.0.inVF to i32 711 %B.0408VF = fptosi <8 x float> %B.0.in407VF to <8 x i32> 712 %0 = and <8 x i32> %B.0408VF, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535> 713 %1 = and i32 %A.0VF, 65535 714 %temp1098VF = insertelement <8 x i32> undef, i32 %1, i32 0 715 %vector1099VF = shufflevector <8 x i32> %temp1098VF, <8 x i32> undef, <8 x i32> zeroinitializer 716 br i1 undef, label %preload1201VF, label %footer349VF 717 718 preload1201VF: 719 br label %footer349VF 720 721 footer349VF: 722 %2 = mul nsw <8 x i32> undef, %0 723 %3 = mul nsw <8 x i32> undef, %vector1099VF 724 br label %footer329VF 725 726 ret: 727 ret void 728 } 729 730 define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp { 731 ; X32-LABEL: _inreg0: 732 ; X32: ## %bb.0: 733 ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 734 ; X32-NEXT: retl 735 ; 736 ; X64-AVX2-LABEL: _inreg0: 737 ; X64-AVX2: ## %bb.0: 738 ; X64-AVX2-NEXT: vmovd %edi, %xmm0 739 ; X64-AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 740 ; X64-AVX2-NEXT: retq 741 ; 742 ; X64-AVX512VL-LABEL: _inreg0: 743 ; X64-AVX512VL: ## %bb.0: 744 ; X64-AVX512VL-NEXT: vpbroadcastd %edi, %ymm0 745 ; X64-AVX512VL-NEXT: retq 746 %in = insertelement <8 x i32> undef, i32 %scalar, i32 0 747 %wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer 748 ret <8 x i32> %wide 749 } 750 751 define <8 x float> @_inreg1(float %scalar) nounwind uwtable readnone ssp { 752 ; X32-LABEL: _inreg1: 753 ; X32: ## %bb.0: 754 ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 755 ; X32-NEXT: retl 756 ; 757 ; X64-LABEL: _inreg1: 758 ; X64: ## %bb.0: 759 ; X64-NEXT: vbroadcastss %xmm0, %ymm0 760 ; X64-NEXT: retq 761 %in = insertelement <8 x float> undef, float %scalar, i32 0 762 %wide = shufflevector <8 x float> %in, <8 x float> undef, <8 x i32> zeroinitializer 763 ret <8 x float> %wide 764 } 765 766 define <4 x float> @_inreg2(float %scalar) nounwind uwtable readnone ssp { 767 ; X32-LABEL: _inreg2: 768 ; X32: ## %bb.0: 769 ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0 770 ; X32-NEXT: retl 771 ; 772 ; X64-LABEL: _inreg2: 773 ; X64: ## %bb.0: 774 ; X64-NEXT: vbroadcastss %xmm0, %xmm0 775 ; X64-NEXT: retq 776 %in = insertelement <4 x float> undef, float %scalar, i32 0 777 %wide = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> zeroinitializer 778 ret <4 x float> %wide 779 } 780 781 define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp { 782 ; X32-LABEL: _inreg3: 783 ; X32: ## %bb.0: 784 ; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0 785 ; X32-NEXT: retl 786 ; 787 ; X64-LABEL: _inreg3: 788 ; X64: ## %bb.0: 789 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0 790 ; X64-NEXT: retq 791 %in = insertelement <4 x double> undef, double %scalar, i32 0 792 %wide = shufflevector <4 x double> %in, <4 x double> undef, <4 x i32> zeroinitializer 793 ret <4 x double> %wide 794 } 795 796 define <8 x float> @_inreg8xfloat(<8 x float> %a) { 797 ; X32-LABEL: _inreg8xfloat: 798 ; X32: ## %bb.0: 799 ; X32-NEXT: vbroadcastss %xmm0, %ymm0 800 ; X32-NEXT: retl 801 ; 802 ; X64-LABEL: _inreg8xfloat: 803 ; X64: ## %bb.0: 804 ; X64-NEXT: vbroadcastss %xmm0, %ymm0 805 ; X64-NEXT: retq 806 %b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer 807 ret <8 x float> %b 808 } 809 810 define <4 x float> @_inreg4xfloat(<4 x float> %a) { 811 ; X32-LABEL: _inreg4xfloat: 812 ; X32: ## %bb.0: 813 ; X32-NEXT: vbroadcastss %xmm0, %xmm0 814 ; X32-NEXT: retl 815 ; 816 ; X64-LABEL: _inreg4xfloat: 817 ; X64: ## %bb.0: 818 ; X64-NEXT: vbroadcastss %xmm0, %xmm0 819 ; X64-NEXT: retq 820 %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer 821 ret <4 x float> %b 822 } 823 824 define <16 x i16> @_inreg16xi16(<16 x i16> %a) { 825 ; X32-LABEL: _inreg16xi16: 826 ; X32: ## %bb.0: 827 ; X32-NEXT: vpbroadcastw %xmm0, %ymm0 828 ; X32-NEXT: retl 829 ; 830 ; X64-LABEL: _inreg16xi16: 831 ; X64: ## %bb.0: 832 ; X64-NEXT: vpbroadcastw %xmm0, %ymm0 833 ; X64-NEXT: retq 834 %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer 835 ret <16 x i16> %b 836 } 837 838 define <8 x i16> @_inreg8xi16(<8 x i16> %a) { 839 ; X32-LABEL: _inreg8xi16: 840 ; X32: ## %bb.0: 841 ; X32-NEXT: vpbroadcastw %xmm0, %xmm0 842 ; X32-NEXT: retl 843 ; 844 ; X64-LABEL: _inreg8xi16: 845 ; X64: ## %bb.0: 846 ; X64-NEXT: vpbroadcastw %xmm0, %xmm0 847 ; X64-NEXT: retq 848 %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer 849 ret <8 x i16> %b 850 } 851 852 define <4 x i64> @_inreg4xi64(<4 x i64> %a) { 853 ; X32-LABEL: _inreg4xi64: 854 ; X32: ## %bb.0: 855 ; X32-NEXT: vbroadcastsd %xmm0, %ymm0 856 ; X32-NEXT: retl 857 ; 858 ; X64-LABEL: _inreg4xi64: 859 ; X64: ## %bb.0: 860 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0 861 ; X64-NEXT: retq 862 %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer 863 ret <4 x i64> %b 864 } 865 866 define <2 x i64> @_inreg2xi64(<2 x i64> %a) { 867 ; X32-LABEL: _inreg2xi64: 868 ; X32: ## %bb.0: 869 ; X32-NEXT: vpbroadcastq %xmm0, %xmm0 870 ; X32-NEXT: retl 871 ; 872 ; X64-LABEL: _inreg2xi64: 873 ; X64: ## %bb.0: 874 ; X64-NEXT: vpbroadcastq %xmm0, %xmm0 875 ; X64-NEXT: retq 876 %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer 877 ret <2 x i64> %b 878 } 879 880 define <4 x double> @_inreg4xdouble(<4 x double> %a) { 881 ; X32-LABEL: _inreg4xdouble: 882 ; X32: ## %bb.0: 883 ; X32-NEXT: vbroadcastsd %xmm0, %ymm0 884 ; X32-NEXT: retl 885 ; 886 ; X64-LABEL: _inreg4xdouble: 887 ; X64: ## %bb.0: 888 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0 889 ; X64-NEXT: retq 890 %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer 891 ret <4 x double> %b 892 } 893 894 define <2 x double> @_inreg2xdouble(<2 x double> %a) { 895 ; X32-LABEL: _inreg2xdouble: 896 ; X32: ## %bb.0: 897 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 898 ; X32-NEXT: retl 899 ; 900 ; X64-LABEL: _inreg2xdouble: 901 ; X64: ## %bb.0: 902 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 903 ; X64-NEXT: retq 904 %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer 905 ret <2 x double> %b 906 } 907 908 define <8 x i32> @_inreg8xi32(<8 x i32> %a) { 909 ; X32-LABEL: _inreg8xi32: 910 ; X32: ## %bb.0: 911 ; X32-NEXT: vbroadcastss %xmm0, %ymm0 912 ; X32-NEXT: retl 913 ; 914 ; X64-LABEL: _inreg8xi32: 915 ; X64: ## %bb.0: 916 ; X64-NEXT: vbroadcastss %xmm0, %ymm0 917 ; X64-NEXT: retq 918 %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer 919 ret <8 x i32> %b 920 } 921 922 define <4 x i32> @_inreg4xi32(<4 x i32> %a) { 923 ; X32-LABEL: _inreg4xi32: 924 ; X32: ## %bb.0: 925 ; X32-NEXT: vbroadcastss %xmm0, %xmm0 926 ; X32-NEXT: retl 927 ; 928 ; X64-LABEL: _inreg4xi32: 929 ; X64: ## %bb.0: 930 ; X64-NEXT: vbroadcastss %xmm0, %xmm0 931 ; X64-NEXT: retq 932 %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer 933 ret <4 x i32> %b 934 } 935 936 define <32 x i8> @_inreg32xi8(<32 x i8> %a) { 937 ; X32-LABEL: _inreg32xi8: 938 ; X32: ## %bb.0: 939 ; X32-NEXT: vpbroadcastb %xmm0, %ymm0 940 ; X32-NEXT: retl 941 ; 942 ; X64-LABEL: _inreg32xi8: 943 ; X64: ## %bb.0: 944 ; X64-NEXT: vpbroadcastb %xmm0, %ymm0 945 ; X64-NEXT: retq 946 %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer 947 ret <32 x i8> %b 948 } 949 950 define <16 x i8> @_inreg16xi8(<16 x i8> %a) { 951 ; X32-LABEL: _inreg16xi8: 952 ; X32: ## %bb.0: 953 ; X32-NEXT: vpbroadcastb %xmm0, %xmm0 954 ; X32-NEXT: retl 955 ; 956 ; X64-LABEL: _inreg16xi8: 957 ; X64: ## %bb.0: 958 ; X64-NEXT: vpbroadcastb %xmm0, %xmm0 959 ; X64-NEXT: retq 960 %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer 961 ret <16 x i8> %b 962 } 963 964 ; These tests check that a vbroadcast instruction is used when we have a splat 965 ; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs 966 ; (via the insertelements). 967 968 define <8 x float> @splat_concat1(float %f) { 969 ; X32-LABEL: splat_concat1: 970 ; X32: ## %bb.0: 971 ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 972 ; X32-NEXT: retl 973 ; 974 ; X64-LABEL: splat_concat1: 975 ; X64: ## %bb.0: 976 ; X64-NEXT: vbroadcastss %xmm0, %ymm0 977 ; X64-NEXT: retq 978 %1 = insertelement <4 x float> undef, float %f, i32 0 979 %2 = insertelement <4 x float> %1, float %f, i32 1 980 %3 = insertelement <4 x float> %2, float %f, i32 2 981 %4 = insertelement <4 x float> %3, float %f, i32 3 982 %5 = shufflevector <4 x float> %4, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 983 ret <8 x float> %5 984 } 985 986 define <8 x float> @splat_concat2(float %f) { 987 ; X32-LABEL: splat_concat2: 988 ; X32: ## %bb.0: 989 ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 990 ; X32-NEXT: retl 991 ; 992 ; X64-LABEL: splat_concat2: 993 ; X64: ## %bb.0: 994 ; X64-NEXT: vbroadcastss %xmm0, %ymm0 995 ; X64-NEXT: retq 996 %1 = insertelement <4 x float> undef, float %f, i32 0 997 %2 = insertelement <4 x float> %1, float %f, i32 1 998 %3 = insertelement <4 x float> %2, float %f, i32 2 999 %4 = insertelement <4 x float> %3, float %f, i32 3 1000 %5 = insertelement <4 x float> undef, float %f, i32 0 1001 %6 = insertelement <4 x float> %5, float %f, i32 1 1002 %7 = insertelement <4 x float> %6, float %f, i32 2 1003 %8 = insertelement <4 x float> %7, float %f, i32 3 1004 %9 = shufflevector <4 x float> %4, <4 x float> %8, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1005 ret <8 x float> %9 1006 } 1007 1008 define <4 x double> @splat_concat3(double %d) { 1009 ; X32-LABEL: splat_concat3: 1010 ; X32: ## %bb.0: 1011 ; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0 1012 ; X32-NEXT: retl 1013 ; 1014 ; X64-LABEL: splat_concat3: 1015 ; X64: ## %bb.0: 1016 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0 1017 ; X64-NEXT: retq 1018 %1 = insertelement <2 x double> undef, double %d, i32 0 1019 %2 = insertelement <2 x double> %1, double %d, i32 1 1020 %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1021 ret <4 x double> %3 1022 } 1023 1024 define <4 x double> @splat_concat4(double %d) { 1025 ; X32-LABEL: splat_concat4: 1026 ; X32: ## %bb.0: 1027 ; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0 1028 ; X32-NEXT: retl 1029 ; 1030 ; X64-LABEL: splat_concat4: 1031 ; X64: ## %bb.0: 1032 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0 1033 ; X64-NEXT: retq 1034 %1 = insertelement <2 x double> undef, double %d, i32 0 1035 %2 = insertelement <2 x double> %1, double %d, i32 1 1036 %3 = insertelement <2 x double> undef, double %d, i32 0 1037 %4 = insertelement <2 x double> %3, double %d, i32 1 1038 %5 = shufflevector <2 x double> %2, <2 x double> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1039 ret <4 x double> %5 1040 } 1041 1042 ; Test cases for <rdar://problem/16074331>. 1043 ; Instruction selection for broacast instruction fails if 1044 ; the load cannot be folded into the broadcast. 1045 ; This happens if the load has initial one use but other uses are 1046 ; created later, or if selection DAG cannot prove that folding the 1047 ; load will not create a cycle in the DAG. 1048 ; Those test cases exerce the latter. 1049 1050 define void @isel_crash_16b(i8* %cV_R.addr) { 1051 ; X32-LABEL: isel_crash_16b: 1052 ; X32: ## %bb.0: ## %eintry 1053 ; X32-NEXT: subl $60, %esp 1054 ; X32-NEXT: .cfi_def_cfa_offset 64 1055 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1056 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 1057 ; X32-NEXT: vmovaps %xmm0, (%esp) 1058 ; X32-NEXT: vpbroadcastb (%eax), %xmm1 1059 ; X32-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) 1060 ; X32-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp) 1061 ; X32-NEXT: addl $60, %esp 1062 ; X32-NEXT: retl 1063 ; 1064 ; X64-LABEL: isel_crash_16b: 1065 ; X64: ## %bb.0: ## %eintry 1066 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 1067 ; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1068 ; X64-NEXT: vpbroadcastb (%rdi), %xmm1 1069 ; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1070 ; X64-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp) 1071 ; X64-NEXT: retq 1072 eintry: 1073 %__a.addr.i = alloca <2 x i64>, align 16 1074 %__b.addr.i = alloca <2 x i64>, align 16 1075 %vCr = alloca <2 x i64>, align 16 1076 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16 1077 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16 1078 %tmp2 = load i8, i8* %cV_R.addr, align 4 1079 %splat.splatinsert = insertelement <16 x i8> undef, i8 %tmp2, i32 0 1080 %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer 1081 %tmp3 = bitcast <16 x i8> %splat.splat to <2 x i64> 1082 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16 1083 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16 1084 ret void 1085 } 1086 1087 define void @isel_crash_32b(i8* %cV_R.addr) { 1088 ; X32-LABEL: isel_crash_32b: 1089 ; X32: ## %bb.0: ## %eintry 1090 ; X32-NEXT: pushl %ebp 1091 ; X32-NEXT: .cfi_def_cfa_offset 8 1092 ; X32-NEXT: .cfi_offset %ebp, -8 1093 ; X32-NEXT: movl %esp, %ebp 1094 ; X32-NEXT: .cfi_def_cfa_register %ebp 1095 ; X32-NEXT: andl $-32, %esp 1096 ; X32-NEXT: subl $128, %esp 1097 ; X32-NEXT: movl 8(%ebp), %eax 1098 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 1099 ; X32-NEXT: vmovaps %ymm0, (%esp) 1100 ; X32-NEXT: vpbroadcastb (%eax), %ymm1 1101 ; X32-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) 1102 ; X32-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp) 1103 ; X32-NEXT: movl %ebp, %esp 1104 ; X32-NEXT: popl %ebp 1105 ; X32-NEXT: vzeroupper 1106 ; X32-NEXT: retl 1107 ; 1108 ; X64-LABEL: isel_crash_32b: 1109 ; X64: ## %bb.0: ## %eintry 1110 ; X64-NEXT: pushq %rbp 1111 ; X64-NEXT: .cfi_def_cfa_offset 16 1112 ; X64-NEXT: .cfi_offset %rbp, -16 1113 ; X64-NEXT: movq %rsp, %rbp 1114 ; X64-NEXT: .cfi_def_cfa_register %rbp 1115 ; X64-NEXT: andq $-32, %rsp 1116 ; X64-NEXT: subq $128, %rsp 1117 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 1118 ; X64-NEXT: vmovaps %ymm0, (%rsp) 1119 ; X64-NEXT: vpbroadcastb (%rdi), %ymm1 1120 ; X64-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 1121 ; X64-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) 1122 ; X64-NEXT: movq %rbp, %rsp 1123 ; X64-NEXT: popq %rbp 1124 ; X64-NEXT: vzeroupper 1125 ; X64-NEXT: retq 1126 eintry: 1127 %__a.addr.i = alloca <4 x i64>, align 16 1128 %__b.addr.i = alloca <4 x i64>, align 16 1129 %vCr = alloca <4 x i64>, align 16 1130 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16 1131 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16 1132 %tmp2 = load i8, i8* %cV_R.addr, align 4 1133 %splat.splatinsert = insertelement <32 x i8> undef, i8 %tmp2, i32 0 1134 %splat.splat = shufflevector <32 x i8> %splat.splatinsert, <32 x i8> undef, <32 x i32> zeroinitializer 1135 %tmp3 = bitcast <32 x i8> %splat.splat to <4 x i64> 1136 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16 1137 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16 1138 ret void 1139 } 1140 1141 define void @isel_crash_8w(i16* %cV_R.addr) { 1142 ; X32-LABEL: isel_crash_8w: 1143 ; X32: ## %bb.0: ## %entry 1144 ; X32-NEXT: subl $60, %esp 1145 ; X32-NEXT: .cfi_def_cfa_offset 64 1146 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1147 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 1148 ; X32-NEXT: vmovaps %xmm0, (%esp) 1149 ; X32-NEXT: vpbroadcastw (%eax), %xmm1 1150 ; X32-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) 1151 ; X32-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp) 1152 ; X32-NEXT: addl $60, %esp 1153 ; X32-NEXT: retl 1154 ; 1155 ; X64-LABEL: isel_crash_8w: 1156 ; X64: ## %bb.0: ## %entry 1157 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 1158 ; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1159 ; X64-NEXT: vpbroadcastw (%rdi), %xmm1 1160 ; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1161 ; X64-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp) 1162 ; X64-NEXT: retq 1163 entry: 1164 %__a.addr.i = alloca <2 x i64>, align 16 1165 %__b.addr.i = alloca <2 x i64>, align 16 1166 %vCr = alloca <2 x i64>, align 16 1167 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16 1168 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16 1169 %tmp2 = load i16, i16* %cV_R.addr, align 4 1170 %splat.splatinsert = insertelement <8 x i16> undef, i16 %tmp2, i32 0 1171 %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer 1172 %tmp3 = bitcast <8 x i16> %splat.splat to <2 x i64> 1173 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16 1174 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16 1175 ret void 1176 } 1177 1178 define void @isel_crash_16w(i16* %cV_R.addr) { 1179 ; X32-LABEL: isel_crash_16w: 1180 ; X32: ## %bb.0: ## %eintry 1181 ; X32-NEXT: pushl %ebp 1182 ; X32-NEXT: .cfi_def_cfa_offset 8 1183 ; X32-NEXT: .cfi_offset %ebp, -8 1184 ; X32-NEXT: movl %esp, %ebp 1185 ; X32-NEXT: .cfi_def_cfa_register %ebp 1186 ; X32-NEXT: andl $-32, %esp 1187 ; X32-NEXT: subl $128, %esp 1188 ; X32-NEXT: movl 8(%ebp), %eax 1189 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 1190 ; X32-NEXT: vmovaps %ymm0, (%esp) 1191 ; X32-NEXT: vpbroadcastw (%eax), %ymm1 1192 ; X32-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) 1193 ; X32-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp) 1194 ; X32-NEXT: movl %ebp, %esp 1195 ; X32-NEXT: popl %ebp 1196 ; X32-NEXT: vzeroupper 1197 ; X32-NEXT: retl 1198 ; 1199 ; X64-LABEL: isel_crash_16w: 1200 ; X64: ## %bb.0: ## %eintry 1201 ; X64-NEXT: pushq %rbp 1202 ; X64-NEXT: .cfi_def_cfa_offset 16 1203 ; X64-NEXT: .cfi_offset %rbp, -16 1204 ; X64-NEXT: movq %rsp, %rbp 1205 ; X64-NEXT: .cfi_def_cfa_register %rbp 1206 ; X64-NEXT: andq $-32, %rsp 1207 ; X64-NEXT: subq $128, %rsp 1208 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 1209 ; X64-NEXT: vmovaps %ymm0, (%rsp) 1210 ; X64-NEXT: vpbroadcastw (%rdi), %ymm1 1211 ; X64-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 1212 ; X64-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) 1213 ; X64-NEXT: movq %rbp, %rsp 1214 ; X64-NEXT: popq %rbp 1215 ; X64-NEXT: vzeroupper 1216 ; X64-NEXT: retq 1217 eintry: 1218 %__a.addr.i = alloca <4 x i64>, align 16 1219 %__b.addr.i = alloca <4 x i64>, align 16 1220 %vCr = alloca <4 x i64>, align 16 1221 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16 1222 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16 1223 %tmp2 = load i16, i16* %cV_R.addr, align 4 1224 %splat.splatinsert = insertelement <16 x i16> undef, i16 %tmp2, i32 0 1225 %splat.splat = shufflevector <16 x i16> %splat.splatinsert, <16 x i16> undef, <16 x i32> zeroinitializer 1226 %tmp3 = bitcast <16 x i16> %splat.splat to <4 x i64> 1227 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16 1228 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16 1229 ret void 1230 } 1231 1232 define void @isel_crash_4d(i32* %cV_R.addr) { 1233 ; X32-LABEL: isel_crash_4d: 1234 ; X32: ## %bb.0: ## %entry 1235 ; X32-NEXT: subl $60, %esp 1236 ; X32-NEXT: .cfi_def_cfa_offset 64 1237 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1238 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 1239 ; X32-NEXT: vmovaps %xmm0, (%esp) 1240 ; X32-NEXT: vbroadcastss (%eax), %xmm1 1241 ; X32-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) 1242 ; X32-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp) 1243 ; X32-NEXT: addl $60, %esp 1244 ; X32-NEXT: retl 1245 ; 1246 ; X64-LABEL: isel_crash_4d: 1247 ; X64: ## %bb.0: ## %entry 1248 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 1249 ; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1250 ; X64-NEXT: vbroadcastss (%rdi), %xmm1 1251 ; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1252 ; X64-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp) 1253 ; X64-NEXT: retq 1254 entry: 1255 %__a.addr.i = alloca <2 x i64>, align 16 1256 %__b.addr.i = alloca <2 x i64>, align 16 1257 %vCr = alloca <2 x i64>, align 16 1258 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16 1259 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16 1260 %tmp2 = load i32, i32* %cV_R.addr, align 4 1261 %splat.splatinsert = insertelement <4 x i32> undef, i32 %tmp2, i32 0 1262 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer 1263 %tmp3 = bitcast <4 x i32> %splat.splat to <2 x i64> 1264 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16 1265 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16 1266 ret void 1267 } 1268 1269 define void @isel_crash_8d(i32* %cV_R.addr) { 1270 ; X32-LABEL: isel_crash_8d: 1271 ; X32: ## %bb.0: ## %eintry 1272 ; X32-NEXT: pushl %ebp 1273 ; X32-NEXT: .cfi_def_cfa_offset 8 1274 ; X32-NEXT: .cfi_offset %ebp, -8 1275 ; X32-NEXT: movl %esp, %ebp 1276 ; X32-NEXT: .cfi_def_cfa_register %ebp 1277 ; X32-NEXT: andl $-32, %esp 1278 ; X32-NEXT: subl $128, %esp 1279 ; X32-NEXT: movl 8(%ebp), %eax 1280 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 1281 ; X32-NEXT: vmovaps %ymm0, (%esp) 1282 ; X32-NEXT: vbroadcastss (%eax), %ymm1 1283 ; X32-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) 1284 ; X32-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp) 1285 ; X32-NEXT: movl %ebp, %esp 1286 ; X32-NEXT: popl %ebp 1287 ; X32-NEXT: vzeroupper 1288 ; X32-NEXT: retl 1289 ; 1290 ; X64-LABEL: isel_crash_8d: 1291 ; X64: ## %bb.0: ## %eintry 1292 ; X64-NEXT: pushq %rbp 1293 ; X64-NEXT: .cfi_def_cfa_offset 16 1294 ; X64-NEXT: .cfi_offset %rbp, -16 1295 ; X64-NEXT: movq %rsp, %rbp 1296 ; X64-NEXT: .cfi_def_cfa_register %rbp 1297 ; X64-NEXT: andq $-32, %rsp 1298 ; X64-NEXT: subq $128, %rsp 1299 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 1300 ; X64-NEXT: vmovaps %ymm0, (%rsp) 1301 ; X64-NEXT: vbroadcastss (%rdi), %ymm1 1302 ; X64-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 1303 ; X64-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 1304 ; X64-NEXT: movq %rbp, %rsp 1305 ; X64-NEXT: popq %rbp 1306 ; X64-NEXT: vzeroupper 1307 ; X64-NEXT: retq 1308 eintry: 1309 %__a.addr.i = alloca <4 x i64>, align 16 1310 %__b.addr.i = alloca <4 x i64>, align 16 1311 %vCr = alloca <4 x i64>, align 16 1312 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16 1313 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16 1314 %tmp2 = load i32, i32* %cV_R.addr, align 4 1315 %splat.splatinsert = insertelement <8 x i32> undef, i32 %tmp2, i32 0 1316 %splat.splat = shufflevector <8 x i32> %splat.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer 1317 %tmp3 = bitcast <8 x i32> %splat.splat to <4 x i64> 1318 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16 1319 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16 1320 ret void 1321 } 1322 1323 define void @isel_crash_2q(i64* %cV_R.addr) { 1324 ; X32-LABEL: isel_crash_2q: 1325 ; X32: ## %bb.0: ## %entry 1326 ; X32-NEXT: subl $60, %esp 1327 ; X32-NEXT: .cfi_def_cfa_offset 64 1328 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1329 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 1330 ; X32-NEXT: vmovaps %xmm0, (%esp) 1331 ; X32-NEXT: vpbroadcastq (%eax), %xmm1 1332 ; X32-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) 1333 ; X32-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp) 1334 ; X32-NEXT: addl $60, %esp 1335 ; X32-NEXT: retl 1336 ; 1337 ; X64-LABEL: isel_crash_2q: 1338 ; X64: ## %bb.0: ## %entry 1339 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 1340 ; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1341 ; X64-NEXT: vpbroadcastq (%rdi), %xmm1 1342 ; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1343 ; X64-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp) 1344 ; X64-NEXT: retq 1345 entry: 1346 %__a.addr.i = alloca <2 x i64>, align 16 1347 %__b.addr.i = alloca <2 x i64>, align 16 1348 %vCr = alloca <2 x i64>, align 16 1349 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16 1350 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16 1351 %tmp2 = load i64, i64* %cV_R.addr, align 4 1352 %splat.splatinsert = insertelement <2 x i64> undef, i64 %tmp2, i32 0 1353 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer 1354 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16 1355 store <2 x i64> %splat.splat, <2 x i64>* %__b.addr.i, align 16 1356 ret void 1357 } 1358 1359 define void @isel_crash_4q(i64* %cV_R.addr) { 1360 ; X32-LABEL: isel_crash_4q: 1361 ; X32: ## %bb.0: ## %eintry 1362 ; X32-NEXT: pushl %ebp 1363 ; X32-NEXT: .cfi_def_cfa_offset 8 1364 ; X32-NEXT: .cfi_offset %ebp, -8 1365 ; X32-NEXT: movl %esp, %ebp 1366 ; X32-NEXT: .cfi_def_cfa_register %ebp 1367 ; X32-NEXT: andl $-32, %esp 1368 ; X32-NEXT: subl $128, %esp 1369 ; X32-NEXT: movl 8(%ebp), %eax 1370 ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 1371 ; X32-NEXT: vmovaps %ymm0, (%esp) 1372 ; X32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 1373 ; X32-NEXT: vbroadcastsd %xmm1, %ymm1 1374 ; X32-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) 1375 ; X32-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp) 1376 ; X32-NEXT: movl %ebp, %esp 1377 ; X32-NEXT: popl %ebp 1378 ; X32-NEXT: vzeroupper 1379 ; X32-NEXT: retl 1380 ; 1381 ; X64-LABEL: isel_crash_4q: 1382 ; X64: ## %bb.0: ## %eintry 1383 ; X64-NEXT: pushq %rbp 1384 ; X64-NEXT: .cfi_def_cfa_offset 16 1385 ; X64-NEXT: .cfi_offset %rbp, -16 1386 ; X64-NEXT: movq %rsp, %rbp 1387 ; X64-NEXT: .cfi_def_cfa_register %rbp 1388 ; X64-NEXT: andq $-32, %rsp 1389 ; X64-NEXT: subq $128, %rsp 1390 ; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 1391 ; X64-NEXT: vmovaps %ymm0, (%rsp) 1392 ; X64-NEXT: vbroadcastsd (%rdi), %ymm1 1393 ; X64-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 1394 ; X64-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 1395 ; X64-NEXT: movq %rbp, %rsp 1396 ; X64-NEXT: popq %rbp 1397 ; X64-NEXT: vzeroupper 1398 ; X64-NEXT: retq 1399 eintry: 1400 %__a.addr.i = alloca <4 x i64>, align 16 1401 %__b.addr.i = alloca <4 x i64>, align 16 1402 %vCr = alloca <4 x i64>, align 16 1403 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16 1404 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16 1405 %tmp2 = load i64, i64* %cV_R.addr, align 4 1406 %splat.splatinsert = insertelement <4 x i64> undef, i64 %tmp2, i32 0 1407 %splat.splat = shufflevector <4 x i64> %splat.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer 1408 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16 1409 store <4 x i64> %splat.splat, <4 x i64>* %__b.addr.i, align 16 1410 ret void 1411 } 1412