1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE,SSE2,X86-SSE2 3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE,SSE41,X86-SSE41 4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 5 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE,SSE2,X64-SSE2 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE,SSE41,X64-SSE41 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 10 11 ; Ensure that the backend no longer emits unnecessary vector insert 12 ; instructions immediately after SSE scalar fp instructions 13 ; like addss or mulss. 14 15 define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) { 16 ; SSE-LABEL: test_add_ss: 17 ; SSE: # %bb.0: 18 ; SSE-NEXT: addss %xmm1, %xmm0 19 ; SSE-NEXT: ret{{[l|q]}} 20 ; 21 ; AVX-LABEL: test_add_ss: 22 ; AVX: # %bb.0: 23 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 24 ; AVX-NEXT: ret{{[l|q]}} 25 %1 = extractelement <4 x float> %b, i32 0 26 %2 = extractelement <4 x float> %a, i32 0 27 %add = fadd float %2, %1 28 %3 = insertelement <4 x float> %a, float %add, i32 0 29 ret <4 x float> %3 30 } 31 32 define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) { 33 ; SSE-LABEL: test_sub_ss: 34 ; SSE: # %bb.0: 35 ; SSE-NEXT: subss %xmm1, %xmm0 36 ; SSE-NEXT: ret{{[l|q]}} 37 ; 38 ; AVX-LABEL: test_sub_ss: 39 ; AVX: # %bb.0: 40 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 41 ; AVX-NEXT: ret{{[l|q]}} 42 %1 = extractelement <4 x float> %b, i32 0 43 %2 = extractelement <4 x float> %a, i32 0 44 %sub = fsub float %2, %1 45 %3 = insertelement <4 x float> %a, float %sub, i32 0 46 ret <4 x float> %3 47 } 48 49 define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) { 50 ; SSE-LABEL: test_mul_ss: 51 ; SSE: # %bb.0: 52 ; SSE-NEXT: mulss %xmm1, %xmm0 53 ; SSE-NEXT: ret{{[l|q]}} 54 ; 55 ; AVX-LABEL: test_mul_ss: 56 ; AVX: # %bb.0: 57 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 58 ; AVX-NEXT: ret{{[l|q]}} 59 %1 = extractelement <4 x float> %b, i32 0 60 %2 = extractelement <4 x float> %a, i32 0 61 %mul = fmul float %2, %1 62 %3 = insertelement <4 x float> %a, float %mul, i32 0 63 ret <4 x float> %3 64 } 65 66 define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) { 67 ; SSE-LABEL: test_div_ss: 68 ; SSE: # %bb.0: 69 ; SSE-NEXT: divss %xmm1, %xmm0 70 ; SSE-NEXT: ret{{[l|q]}} 71 ; 72 ; AVX-LABEL: test_div_ss: 73 ; AVX: # %bb.0: 74 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 75 ; AVX-NEXT: ret{{[l|q]}} 76 %1 = extractelement <4 x float> %b, i32 0 77 %2 = extractelement <4 x float> %a, i32 0 78 %div = fdiv float %2, %1 79 %3 = insertelement <4 x float> %a, float %div, i32 0 80 ret <4 x float> %3 81 } 82 83 define <4 x float> @test_sqrt_ss(<4 x float> %a) { 84 ; SSE-LABEL: test_sqrt_ss: 85 ; SSE: # %bb.0: 86 ; SSE-NEXT: sqrtss %xmm0, %xmm0 87 ; SSE-NEXT: ret{{[l|q]}} 88 ; 89 ; AVX-LABEL: test_sqrt_ss: 90 ; AVX: # %bb.0: 91 ; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 92 ; AVX-NEXT: ret{{[l|q]}} 93 %1 = extractelement <4 x float> %a, i32 0 94 %2 = call float @llvm.sqrt.f32(float %1) 95 %3 = insertelement <4 x float> %a, float %2, i32 0 96 ret <4 x float> %3 97 } 98 declare float @llvm.sqrt.f32(float) 99 100 define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) { 101 ; SSE-LABEL: test_add_sd: 102 ; SSE: # %bb.0: 103 ; SSE-NEXT: addsd %xmm1, %xmm0 104 ; SSE-NEXT: ret{{[l|q]}} 105 ; 106 ; AVX-LABEL: test_add_sd: 107 ; AVX: # %bb.0: 108 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 109 ; AVX-NEXT: ret{{[l|q]}} 110 %1 = extractelement <2 x double> %b, i32 0 111 %2 = extractelement <2 x double> %a, i32 0 112 %add = fadd double %2, %1 113 %3 = insertelement <2 x double> %a, double %add, i32 0 114 ret <2 x double> %3 115 } 116 117 define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) { 118 ; SSE-LABEL: test_sub_sd: 119 ; SSE: # %bb.0: 120 ; SSE-NEXT: subsd %xmm1, %xmm0 121 ; SSE-NEXT: ret{{[l|q]}} 122 ; 123 ; AVX-LABEL: test_sub_sd: 124 ; AVX: # %bb.0: 125 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 126 ; AVX-NEXT: ret{{[l|q]}} 127 %1 = extractelement <2 x double> %b, i32 0 128 %2 = extractelement <2 x double> %a, i32 0 129 %sub = fsub double %2, %1 130 %3 = insertelement <2 x double> %a, double %sub, i32 0 131 ret <2 x double> %3 132 } 133 134 define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) { 135 ; SSE-LABEL: test_mul_sd: 136 ; SSE: # %bb.0: 137 ; SSE-NEXT: mulsd %xmm1, %xmm0 138 ; SSE-NEXT: ret{{[l|q]}} 139 ; 140 ; AVX-LABEL: test_mul_sd: 141 ; AVX: # %bb.0: 142 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 143 ; AVX-NEXT: ret{{[l|q]}} 144 %1 = extractelement <2 x double> %b, i32 0 145 %2 = extractelement <2 x double> %a, i32 0 146 %mul = fmul double %2, %1 147 %3 = insertelement <2 x double> %a, double %mul, i32 0 148 ret <2 x double> %3 149 } 150 151 define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) { 152 ; SSE-LABEL: test_div_sd: 153 ; SSE: # %bb.0: 154 ; SSE-NEXT: divsd %xmm1, %xmm0 155 ; SSE-NEXT: ret{{[l|q]}} 156 ; 157 ; AVX-LABEL: test_div_sd: 158 ; AVX: # %bb.0: 159 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 160 ; AVX-NEXT: ret{{[l|q]}} 161 %1 = extractelement <2 x double> %b, i32 0 162 %2 = extractelement <2 x double> %a, i32 0 163 %div = fdiv double %2, %1 164 %3 = insertelement <2 x double> %a, double %div, i32 0 165 ret <2 x double> %3 166 } 167 168 define <2 x double> @test_sqrt_sd(<2 x double> %a) { 169 ; SSE-LABEL: test_sqrt_sd: 170 ; SSE: # %bb.0: 171 ; SSE-NEXT: sqrtsd %xmm0, %xmm0 172 ; SSE-NEXT: ret{{[l|q]}} 173 ; 174 ; AVX-LABEL: test_sqrt_sd: 175 ; AVX: # %bb.0: 176 ; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 177 ; AVX-NEXT: ret{{[l|q]}} 178 %1 = extractelement <2 x double> %a, i32 0 179 %2 = call double @llvm.sqrt.f64(double %1) 180 %3 = insertelement <2 x double> %a, double %2, i32 0 181 ret <2 x double> %3 182 } 183 declare double @llvm.sqrt.f64(double) 184 185 define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) { 186 ; SSE-LABEL: test2_add_ss: 187 ; SSE: # %bb.0: 188 ; SSE-NEXT: addss %xmm0, %xmm1 189 ; SSE-NEXT: movaps %xmm1, %xmm0 190 ; SSE-NEXT: ret{{[l|q]}} 191 ; 192 ; AVX-LABEL: test2_add_ss: 193 ; AVX: # %bb.0: 194 ; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 195 ; AVX-NEXT: ret{{[l|q]}} 196 %1 = extractelement <4 x float> %a, i32 0 197 %2 = extractelement <4 x float> %b, i32 0 198 %add = fadd float %1, %2 199 %3 = insertelement <4 x float> %b, float %add, i32 0 200 ret <4 x float> %3 201 } 202 203 define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) { 204 ; SSE-LABEL: test2_sub_ss: 205 ; SSE: # %bb.0: 206 ; SSE-NEXT: subss %xmm0, %xmm1 207 ; SSE-NEXT: movaps %xmm1, %xmm0 208 ; SSE-NEXT: ret{{[l|q]}} 209 ; 210 ; AVX-LABEL: test2_sub_ss: 211 ; AVX: # %bb.0: 212 ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 213 ; AVX-NEXT: ret{{[l|q]}} 214 %1 = extractelement <4 x float> %a, i32 0 215 %2 = extractelement <4 x float> %b, i32 0 216 %sub = fsub float %2, %1 217 %3 = insertelement <4 x float> %b, float %sub, i32 0 218 ret <4 x float> %3 219 } 220 221 define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) { 222 ; SSE-LABEL: test2_mul_ss: 223 ; SSE: # %bb.0: 224 ; SSE-NEXT: mulss %xmm0, %xmm1 225 ; SSE-NEXT: movaps %xmm1, %xmm0 226 ; SSE-NEXT: ret{{[l|q]}} 227 ; 228 ; AVX-LABEL: test2_mul_ss: 229 ; AVX: # %bb.0: 230 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 231 ; AVX-NEXT: ret{{[l|q]}} 232 %1 = extractelement <4 x float> %a, i32 0 233 %2 = extractelement <4 x float> %b, i32 0 234 %mul = fmul float %1, %2 235 %3 = insertelement <4 x float> %b, float %mul, i32 0 236 ret <4 x float> %3 237 } 238 239 define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) { 240 ; SSE-LABEL: test2_div_ss: 241 ; SSE: # %bb.0: 242 ; SSE-NEXT: divss %xmm0, %xmm1 243 ; SSE-NEXT: movaps %xmm1, %xmm0 244 ; SSE-NEXT: ret{{[l|q]}} 245 ; 246 ; AVX-LABEL: test2_div_ss: 247 ; AVX: # %bb.0: 248 ; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 249 ; AVX-NEXT: ret{{[l|q]}} 250 %1 = extractelement <4 x float> %a, i32 0 251 %2 = extractelement <4 x float> %b, i32 0 252 %div = fdiv float %2, %1 253 %3 = insertelement <4 x float> %b, float %div, i32 0 254 ret <4 x float> %3 255 } 256 257 define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) { 258 ; SSE-LABEL: test2_add_sd: 259 ; SSE: # %bb.0: 260 ; SSE-NEXT: addsd %xmm0, %xmm1 261 ; SSE-NEXT: movapd %xmm1, %xmm0 262 ; SSE-NEXT: ret{{[l|q]}} 263 ; 264 ; AVX-LABEL: test2_add_sd: 265 ; AVX: # %bb.0: 266 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 267 ; AVX-NEXT: ret{{[l|q]}} 268 %1 = extractelement <2 x double> %a, i32 0 269 %2 = extractelement <2 x double> %b, i32 0 270 %add = fadd double %1, %2 271 %3 = insertelement <2 x double> %b, double %add, i32 0 272 ret <2 x double> %3 273 } 274 275 define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) { 276 ; SSE-LABEL: test2_sub_sd: 277 ; SSE: # %bb.0: 278 ; SSE-NEXT: subsd %xmm0, %xmm1 279 ; SSE-NEXT: movapd %xmm1, %xmm0 280 ; SSE-NEXT: ret{{[l|q]}} 281 ; 282 ; AVX-LABEL: test2_sub_sd: 283 ; AVX: # %bb.0: 284 ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 285 ; AVX-NEXT: ret{{[l|q]}} 286 %1 = extractelement <2 x double> %a, i32 0 287 %2 = extractelement <2 x double> %b, i32 0 288 %sub = fsub double %2, %1 289 %3 = insertelement <2 x double> %b, double %sub, i32 0 290 ret <2 x double> %3 291 } 292 293 define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) { 294 ; SSE-LABEL: test2_mul_sd: 295 ; SSE: # %bb.0: 296 ; SSE-NEXT: mulsd %xmm0, %xmm1 297 ; SSE-NEXT: movapd %xmm1, %xmm0 298 ; SSE-NEXT: ret{{[l|q]}} 299 ; 300 ; AVX-LABEL: test2_mul_sd: 301 ; AVX: # %bb.0: 302 ; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 303 ; AVX-NEXT: ret{{[l|q]}} 304 %1 = extractelement <2 x double> %a, i32 0 305 %2 = extractelement <2 x double> %b, i32 0 306 %mul = fmul double %1, %2 307 %3 = insertelement <2 x double> %b, double %mul, i32 0 308 ret <2 x double> %3 309 } 310 311 define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) { 312 ; SSE-LABEL: test2_div_sd: 313 ; SSE: # %bb.0: 314 ; SSE-NEXT: divsd %xmm0, %xmm1 315 ; SSE-NEXT: movapd %xmm1, %xmm0 316 ; SSE-NEXT: ret{{[l|q]}} 317 ; 318 ; AVX-LABEL: test2_div_sd: 319 ; AVX: # %bb.0: 320 ; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 321 ; AVX-NEXT: ret{{[l|q]}} 322 %1 = extractelement <2 x double> %a, i32 0 323 %2 = extractelement <2 x double> %b, i32 0 324 %div = fdiv double %2, %1 325 %3 = insertelement <2 x double> %b, double %div, i32 0 326 ret <2 x double> %3 327 } 328 329 define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) { 330 ; SSE-LABEL: test_multiple_add_ss: 331 ; SSE: # %bb.0: 332 ; SSE-NEXT: addss %xmm0, %xmm1 333 ; SSE-NEXT: addss %xmm1, %xmm0 334 ; SSE-NEXT: ret{{[l|q]}} 335 ; 336 ; AVX-LABEL: test_multiple_add_ss: 337 ; AVX: # %bb.0: 338 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm1 339 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 340 ; AVX-NEXT: ret{{[l|q]}} 341 %1 = extractelement <4 x float> %b, i32 0 342 %2 = extractelement <4 x float> %a, i32 0 343 %add = fadd float %2, %1 344 %add2 = fadd float %2, %add 345 %3 = insertelement <4 x float> %a, float %add2, i32 0 346 ret <4 x float> %3 347 } 348 349 define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) { 350 ; SSE-LABEL: test_multiple_sub_ss: 351 ; SSE: # %bb.0: 352 ; SSE-NEXT: movaps %xmm0, %xmm2 353 ; SSE-NEXT: subss %xmm1, %xmm2 354 ; SSE-NEXT: subss %xmm2, %xmm0 355 ; SSE-NEXT: ret{{[l|q]}} 356 ; 357 ; AVX-LABEL: test_multiple_sub_ss: 358 ; AVX: # %bb.0: 359 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm1 360 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 361 ; AVX-NEXT: ret{{[l|q]}} 362 %1 = extractelement <4 x float> %b, i32 0 363 %2 = extractelement <4 x float> %a, i32 0 364 %sub = fsub float %2, %1 365 %sub2 = fsub float %2, %sub 366 %3 = insertelement <4 x float> %a, float %sub2, i32 0 367 ret <4 x float> %3 368 } 369 370 define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) { 371 ; SSE-LABEL: test_multiple_mul_ss: 372 ; SSE: # %bb.0: 373 ; SSE-NEXT: mulss %xmm0, %xmm1 374 ; SSE-NEXT: mulss %xmm1, %xmm0 375 ; SSE-NEXT: ret{{[l|q]}} 376 ; 377 ; AVX-LABEL: test_multiple_mul_ss: 378 ; AVX: # %bb.0: 379 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm1 380 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 381 ; AVX-NEXT: ret{{[l|q]}} 382 %1 = extractelement <4 x float> %b, i32 0 383 %2 = extractelement <4 x float> %a, i32 0 384 %mul = fmul float %2, %1 385 %mul2 = fmul float %2, %mul 386 %3 = insertelement <4 x float> %a, float %mul2, i32 0 387 ret <4 x float> %3 388 } 389 390 define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) { 391 ; SSE-LABEL: test_multiple_div_ss: 392 ; SSE: # %bb.0: 393 ; SSE-NEXT: movaps %xmm0, %xmm2 394 ; SSE-NEXT: divss %xmm1, %xmm2 395 ; SSE-NEXT: divss %xmm2, %xmm0 396 ; SSE-NEXT: ret{{[l|q]}} 397 ; 398 ; AVX-LABEL: test_multiple_div_ss: 399 ; AVX: # %bb.0: 400 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm1 401 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 402 ; AVX-NEXT: ret{{[l|q]}} 403 %1 = extractelement <4 x float> %b, i32 0 404 %2 = extractelement <4 x float> %a, i32 0 405 %div = fdiv float %2, %1 406 %div2 = fdiv float %2, %div 407 %3 = insertelement <4 x float> %a, float %div2, i32 0 408 ret <4 x float> %3 409 } 410 411 ; With SSE4.1 or greater, the shuffles in the following tests may 412 ; be lowered to X86Blendi nodes. 413 414 define <4 x float> @blend_add_ss(<4 x float> %a, float %b) { 415 ; X86-SSE-LABEL: blend_add_ss: 416 ; X86-SSE: # %bb.0: 417 ; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 418 ; X86-SSE-NEXT: addss %xmm1, %xmm0 419 ; X86-SSE-NEXT: retl 420 ; 421 ; X86-AVX-LABEL: blend_add_ss: 422 ; X86-AVX: # %bb.0: 423 ; X86-AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 424 ; X86-AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 425 ; X86-AVX-NEXT: retl 426 ; 427 ; X64-SSE-LABEL: blend_add_ss: 428 ; X64-SSE: # %bb.0: 429 ; X64-SSE-NEXT: addss %xmm1, %xmm0 430 ; X64-SSE-NEXT: retq 431 ; 432 ; X64-AVX-LABEL: blend_add_ss: 433 ; X64-AVX: # %bb.0: 434 ; X64-AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 435 ; X64-AVX-NEXT: retq 436 437 %ext = extractelement <4 x float> %a, i32 0 438 %op = fadd float %b, %ext 439 %ins = insertelement <4 x float> undef, float %op, i32 0 440 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 441 ret <4 x float> %shuf 442 } 443 444 define <4 x float> @blend_sub_ss(<4 x float> %a, float %b) { 445 ; X86-SSE-LABEL: blend_sub_ss: 446 ; X86-SSE: # %bb.0: 447 ; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 448 ; X86-SSE-NEXT: subss %xmm1, %xmm0 449 ; X86-SSE-NEXT: retl 450 ; 451 ; X86-AVX-LABEL: blend_sub_ss: 452 ; X86-AVX: # %bb.0: 453 ; X86-AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 454 ; X86-AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 455 ; X86-AVX-NEXT: retl 456 ; 457 ; X64-SSE-LABEL: blend_sub_ss: 458 ; X64-SSE: # %bb.0: 459 ; X64-SSE-NEXT: subss %xmm1, %xmm0 460 ; X64-SSE-NEXT: retq 461 ; 462 ; X64-AVX-LABEL: blend_sub_ss: 463 ; X64-AVX: # %bb.0: 464 ; X64-AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 465 ; X64-AVX-NEXT: retq 466 467 %ext = extractelement <4 x float> %a, i32 0 468 %op = fsub float %ext, %b 469 %ins = insertelement <4 x float> undef, float %op, i32 0 470 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 471 ret <4 x float> %shuf 472 } 473 474 define <4 x float> @blend_mul_ss(<4 x float> %a, float %b) { 475 ; X86-SSE-LABEL: blend_mul_ss: 476 ; X86-SSE: # %bb.0: 477 ; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 478 ; X86-SSE-NEXT: mulss %xmm1, %xmm0 479 ; X86-SSE-NEXT: retl 480 ; 481 ; X86-AVX-LABEL: blend_mul_ss: 482 ; X86-AVX: # %bb.0: 483 ; X86-AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 484 ; X86-AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 485 ; X86-AVX-NEXT: retl 486 ; 487 ; X64-SSE-LABEL: blend_mul_ss: 488 ; X64-SSE: # %bb.0: 489 ; X64-SSE-NEXT: mulss %xmm1, %xmm0 490 ; X64-SSE-NEXT: retq 491 ; 492 ; X64-AVX-LABEL: blend_mul_ss: 493 ; X64-AVX: # %bb.0: 494 ; X64-AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 495 ; X64-AVX-NEXT: retq 496 497 %ext = extractelement <4 x float> %a, i32 0 498 %op = fmul float %b, %ext 499 %ins = insertelement <4 x float> undef, float %op, i32 0 500 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 501 ret <4 x float> %shuf 502 } 503 504 define <4 x float> @blend_div_ss(<4 x float> %a, float %b) { 505 ; X86-SSE-LABEL: blend_div_ss: 506 ; X86-SSE: # %bb.0: 507 ; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 508 ; X86-SSE-NEXT: divss %xmm1, %xmm0 509 ; X86-SSE-NEXT: retl 510 ; 511 ; X86-AVX-LABEL: blend_div_ss: 512 ; X86-AVX: # %bb.0: 513 ; X86-AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 514 ; X86-AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 515 ; X86-AVX-NEXT: retl 516 ; 517 ; X64-SSE-LABEL: blend_div_ss: 518 ; X64-SSE: # %bb.0: 519 ; X64-SSE-NEXT: divss %xmm1, %xmm0 520 ; X64-SSE-NEXT: retq 521 ; 522 ; X64-AVX-LABEL: blend_div_ss: 523 ; X64-AVX: # %bb.0: 524 ; X64-AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 525 ; X64-AVX-NEXT: retq 526 527 %ext = extractelement <4 x float> %a, i32 0 528 %op = fdiv float %ext, %b 529 %ins = insertelement <4 x float> undef, float %op, i32 0 530 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 531 ret <4 x float> %shuf 532 } 533 534 define <2 x double> @blend_add_sd(<2 x double> %a, double %b) { 535 ; X86-SSE-LABEL: blend_add_sd: 536 ; X86-SSE: # %bb.0: 537 ; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 538 ; X86-SSE-NEXT: addsd %xmm1, %xmm0 539 ; X86-SSE-NEXT: retl 540 ; 541 ; X86-AVX-LABEL: blend_add_sd: 542 ; X86-AVX: # %bb.0: 543 ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 544 ; X86-AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 545 ; X86-AVX-NEXT: retl 546 ; 547 ; X64-SSE-LABEL: blend_add_sd: 548 ; X64-SSE: # %bb.0: 549 ; X64-SSE-NEXT: addsd %xmm1, %xmm0 550 ; X64-SSE-NEXT: retq 551 ; 552 ; X64-AVX-LABEL: blend_add_sd: 553 ; X64-AVX: # %bb.0: 554 ; X64-AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 555 ; X64-AVX-NEXT: retq 556 557 %ext = extractelement <2 x double> %a, i32 0 558 %op = fadd double %b, %ext 559 %ins = insertelement <2 x double> undef, double %op, i32 0 560 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 561 ret <2 x double> %shuf 562 } 563 564 define <2 x double> @blend_sub_sd(<2 x double> %a, double %b) { 565 ; X86-SSE-LABEL: blend_sub_sd: 566 ; X86-SSE: # %bb.0: 567 ; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 568 ; X86-SSE-NEXT: subsd %xmm1, %xmm0 569 ; X86-SSE-NEXT: retl 570 ; 571 ; X86-AVX-LABEL: blend_sub_sd: 572 ; X86-AVX: # %bb.0: 573 ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 574 ; X86-AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 575 ; X86-AVX-NEXT: retl 576 ; 577 ; X64-SSE-LABEL: blend_sub_sd: 578 ; X64-SSE: # %bb.0: 579 ; X64-SSE-NEXT: subsd %xmm1, %xmm0 580 ; X64-SSE-NEXT: retq 581 ; 582 ; X64-AVX-LABEL: blend_sub_sd: 583 ; X64-AVX: # %bb.0: 584 ; X64-AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 585 ; X64-AVX-NEXT: retq 586 587 %ext = extractelement <2 x double> %a, i32 0 588 %op = fsub double %ext, %b 589 %ins = insertelement <2 x double> undef, double %op, i32 0 590 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 591 ret <2 x double> %shuf 592 } 593 594 define <2 x double> @blend_mul_sd(<2 x double> %a, double %b) { 595 ; X86-SSE-LABEL: blend_mul_sd: 596 ; X86-SSE: # %bb.0: 597 ; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 598 ; X86-SSE-NEXT: mulsd %xmm1, %xmm0 599 ; X86-SSE-NEXT: retl 600 ; 601 ; X86-AVX-LABEL: blend_mul_sd: 602 ; X86-AVX: # %bb.0: 603 ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 604 ; X86-AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 605 ; X86-AVX-NEXT: retl 606 ; 607 ; X64-SSE-LABEL: blend_mul_sd: 608 ; X64-SSE: # %bb.0: 609 ; X64-SSE-NEXT: mulsd %xmm1, %xmm0 610 ; X64-SSE-NEXT: retq 611 ; 612 ; X64-AVX-LABEL: blend_mul_sd: 613 ; X64-AVX: # %bb.0: 614 ; X64-AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 615 ; X64-AVX-NEXT: retq 616 617 %ext = extractelement <2 x double> %a, i32 0 618 %op = fmul double %b, %ext 619 %ins = insertelement <2 x double> undef, double %op, i32 0 620 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 621 ret <2 x double> %shuf 622 } 623 624 define <2 x double> @blend_div_sd(<2 x double> %a, double %b) { 625 ; X86-SSE-LABEL: blend_div_sd: 626 ; X86-SSE: # %bb.0: 627 ; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 628 ; X86-SSE-NEXT: divsd %xmm1, %xmm0 629 ; X86-SSE-NEXT: retl 630 ; 631 ; X86-AVX-LABEL: blend_div_sd: 632 ; X86-AVX: # %bb.0: 633 ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 634 ; X86-AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 635 ; X86-AVX-NEXT: retl 636 ; 637 ; X64-SSE-LABEL: blend_div_sd: 638 ; X64-SSE: # %bb.0: 639 ; X64-SSE-NEXT: divsd %xmm1, %xmm0 640 ; X64-SSE-NEXT: retq 641 ; 642 ; X64-AVX-LABEL: blend_div_sd: 643 ; X64-AVX: # %bb.0: 644 ; X64-AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 645 ; X64-AVX-NEXT: retq 646 647 %ext = extractelement <2 x double> %a, i32 0 648 %op = fdiv double %ext, %b 649 %ins = insertelement <2 x double> undef, double %op, i32 0 650 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 651 ret <2 x double> %shuf 652 } 653 654 ; Ensure that the backend selects SSE/AVX scalar fp instructions 655 ; from a packed fp instruction plus a vector insert. 656 657 define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) { 658 ; SSE-LABEL: insert_test_add_ss: 659 ; SSE: # %bb.0: 660 ; SSE-NEXT: addss %xmm1, %xmm0 661 ; SSE-NEXT: ret{{[l|q]}} 662 ; 663 ; AVX-LABEL: insert_test_add_ss: 664 ; AVX: # %bb.0: 665 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 666 ; AVX-NEXT: ret{{[l|q]}} 667 %1 = fadd <4 x float> %a, %b 668 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 669 ret <4 x float> %2 670 } 671 672 define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) { 673 ; SSE-LABEL: insert_test_sub_ss: 674 ; SSE: # %bb.0: 675 ; SSE-NEXT: subss %xmm1, %xmm0 676 ; SSE-NEXT: ret{{[l|q]}} 677 ; 678 ; AVX-LABEL: insert_test_sub_ss: 679 ; AVX: # %bb.0: 680 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 681 ; AVX-NEXT: ret{{[l|q]}} 682 %1 = fsub <4 x float> %a, %b 683 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 684 ret <4 x float> %2 685 } 686 687 define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) { 688 ; SSE-LABEL: insert_test_mul_ss: 689 ; SSE: # %bb.0: 690 ; SSE-NEXT: mulss %xmm1, %xmm0 691 ; SSE-NEXT: ret{{[l|q]}} 692 ; 693 ; AVX-LABEL: insert_test_mul_ss: 694 ; AVX: # %bb.0: 695 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 696 ; AVX-NEXT: ret{{[l|q]}} 697 %1 = fmul <4 x float> %a, %b 698 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 699 ret <4 x float> %2 700 } 701 702 define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) { 703 ; SSE-LABEL: insert_test_div_ss: 704 ; SSE: # %bb.0: 705 ; SSE-NEXT: divss %xmm1, %xmm0 706 ; SSE-NEXT: ret{{[l|q]}} 707 ; 708 ; AVX-LABEL: insert_test_div_ss: 709 ; AVX: # %bb.0: 710 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 711 ; AVX-NEXT: ret{{[l|q]}} 712 %1 = fdiv <4 x float> %a, %b 713 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 714 ret <4 x float> %2 715 } 716 717 define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) { 718 ; SSE-LABEL: insert_test_add_sd: 719 ; SSE: # %bb.0: 720 ; SSE-NEXT: addsd %xmm1, %xmm0 721 ; SSE-NEXT: ret{{[l|q]}} 722 ; 723 ; AVX-LABEL: insert_test_add_sd: 724 ; AVX: # %bb.0: 725 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 726 ; AVX-NEXT: ret{{[l|q]}} 727 %1 = fadd <2 x double> %a, %b 728 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 729 ret <2 x double> %2 730 } 731 732 define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) { 733 ; SSE-LABEL: insert_test_sub_sd: 734 ; SSE: # %bb.0: 735 ; SSE-NEXT: subsd %xmm1, %xmm0 736 ; SSE-NEXT: ret{{[l|q]}} 737 ; 738 ; AVX-LABEL: insert_test_sub_sd: 739 ; AVX: # %bb.0: 740 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 741 ; AVX-NEXT: ret{{[l|q]}} 742 %1 = fsub <2 x double> %a, %b 743 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 744 ret <2 x double> %2 745 } 746 747 define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) { 748 ; SSE-LABEL: insert_test_mul_sd: 749 ; SSE: # %bb.0: 750 ; SSE-NEXT: mulsd %xmm1, %xmm0 751 ; SSE-NEXT: ret{{[l|q]}} 752 ; 753 ; AVX-LABEL: insert_test_mul_sd: 754 ; AVX: # %bb.0: 755 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 756 ; AVX-NEXT: ret{{[l|q]}} 757 %1 = fmul <2 x double> %a, %b 758 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 759 ret <2 x double> %2 760 } 761 762 define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) { 763 ; SSE-LABEL: insert_test_div_sd: 764 ; SSE: # %bb.0: 765 ; SSE-NEXT: divsd %xmm1, %xmm0 766 ; SSE-NEXT: ret{{[l|q]}} 767 ; 768 ; AVX-LABEL: insert_test_div_sd: 769 ; AVX: # %bb.0: 770 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 771 ; AVX-NEXT: ret{{[l|q]}} 772 %1 = fdiv <2 x double> %a, %b 773 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 774 ret <2 x double> %2 775 } 776 777 define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) { 778 ; SSE-LABEL: insert_test2_add_ss: 779 ; SSE: # %bb.0: 780 ; SSE-NEXT: addss %xmm0, %xmm1 781 ; SSE-NEXT: movaps %xmm1, %xmm0 782 ; SSE-NEXT: ret{{[l|q]}} 783 ; 784 ; AVX-LABEL: insert_test2_add_ss: 785 ; AVX: # %bb.0: 786 ; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 787 ; AVX-NEXT: ret{{[l|q]}} 788 %1 = fadd <4 x float> %b, %a 789 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 790 ret <4 x float> %2 791 } 792 793 define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) { 794 ; SSE-LABEL: insert_test2_sub_ss: 795 ; SSE: # %bb.0: 796 ; SSE-NEXT: subss %xmm0, %xmm1 797 ; SSE-NEXT: movaps %xmm1, %xmm0 798 ; SSE-NEXT: ret{{[l|q]}} 799 ; 800 ; AVX-LABEL: insert_test2_sub_ss: 801 ; AVX: # %bb.0: 802 ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 803 ; AVX-NEXT: ret{{[l|q]}} 804 %1 = fsub <4 x float> %b, %a 805 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 806 ret <4 x float> %2 807 } 808 809 define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) { 810 ; SSE-LABEL: insert_test2_mul_ss: 811 ; SSE: # %bb.0: 812 ; SSE-NEXT: mulss %xmm0, %xmm1 813 ; SSE-NEXT: movaps %xmm1, %xmm0 814 ; SSE-NEXT: ret{{[l|q]}} 815 ; 816 ; AVX-LABEL: insert_test2_mul_ss: 817 ; AVX: # %bb.0: 818 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 819 ; AVX-NEXT: ret{{[l|q]}} 820 %1 = fmul <4 x float> %b, %a 821 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 822 ret <4 x float> %2 823 } 824 825 define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) { 826 ; SSE-LABEL: insert_test2_div_ss: 827 ; SSE: # %bb.0: 828 ; SSE-NEXT: divss %xmm0, %xmm1 829 ; SSE-NEXT: movaps %xmm1, %xmm0 830 ; SSE-NEXT: ret{{[l|q]}} 831 ; 832 ; AVX-LABEL: insert_test2_div_ss: 833 ; AVX: # %bb.0: 834 ; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 835 ; AVX-NEXT: ret{{[l|q]}} 836 %1 = fdiv <4 x float> %b, %a 837 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 838 ret <4 x float> %2 839 } 840 841 define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) { 842 ; SSE-LABEL: insert_test2_add_sd: 843 ; SSE: # %bb.0: 844 ; SSE-NEXT: addsd %xmm0, %xmm1 845 ; SSE-NEXT: movapd %xmm1, %xmm0 846 ; SSE-NEXT: ret{{[l|q]}} 847 ; 848 ; AVX-LABEL: insert_test2_add_sd: 849 ; AVX: # %bb.0: 850 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 851 ; AVX-NEXT: ret{{[l|q]}} 852 %1 = fadd <2 x double> %b, %a 853 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 854 ret <2 x double> %2 855 } 856 857 define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) { 858 ; SSE-LABEL: insert_test2_sub_sd: 859 ; SSE: # %bb.0: 860 ; SSE-NEXT: subsd %xmm0, %xmm1 861 ; SSE-NEXT: movapd %xmm1, %xmm0 862 ; SSE-NEXT: ret{{[l|q]}} 863 ; 864 ; AVX-LABEL: insert_test2_sub_sd: 865 ; AVX: # %bb.0: 866 ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 867 ; AVX-NEXT: ret{{[l|q]}} 868 %1 = fsub <2 x double> %b, %a 869 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 870 ret <2 x double> %2 871 } 872 873 define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) { 874 ; SSE-LABEL: insert_test2_mul_sd: 875 ; SSE: # %bb.0: 876 ; SSE-NEXT: mulsd %xmm0, %xmm1 877 ; SSE-NEXT: movapd %xmm1, %xmm0 878 ; SSE-NEXT: ret{{[l|q]}} 879 ; 880 ; AVX-LABEL: insert_test2_mul_sd: 881 ; AVX: # %bb.0: 882 ; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 883 ; AVX-NEXT: ret{{[l|q]}} 884 %1 = fmul <2 x double> %b, %a 885 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 886 ret <2 x double> %2 887 } 888 889 define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) { 890 ; SSE-LABEL: insert_test2_div_sd: 891 ; SSE: # %bb.0: 892 ; SSE-NEXT: divsd %xmm0, %xmm1 893 ; SSE-NEXT: movapd %xmm1, %xmm0 894 ; SSE-NEXT: ret{{[l|q]}} 895 ; 896 ; AVX-LABEL: insert_test2_div_sd: 897 ; AVX: # %bb.0: 898 ; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 899 ; AVX-NEXT: ret{{[l|q]}} 900 %1 = fdiv <2 x double> %b, %a 901 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 902 ret <2 x double> %2 903 } 904 905 define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) { 906 ; SSE-LABEL: insert_test3_add_ss: 907 ; SSE: # %bb.0: 908 ; SSE-NEXT: addss %xmm1, %xmm0 909 ; SSE-NEXT: ret{{[l|q]}} 910 ; 911 ; AVX-LABEL: insert_test3_add_ss: 912 ; AVX: # %bb.0: 913 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 914 ; AVX-NEXT: ret{{[l|q]}} 915 %1 = fadd <4 x float> %a, %b 916 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 917 ret <4 x float> %2 918 } 919 920 define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) { 921 ; SSE-LABEL: insert_test3_sub_ss: 922 ; SSE: # %bb.0: 923 ; SSE-NEXT: subss %xmm1, %xmm0 924 ; SSE-NEXT: ret{{[l|q]}} 925 ; 926 ; AVX-LABEL: insert_test3_sub_ss: 927 ; AVX: # %bb.0: 928 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 929 ; AVX-NEXT: ret{{[l|q]}} 930 %1 = fsub <4 x float> %a, %b 931 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 932 ret <4 x float> %2 933 } 934 935 define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) { 936 ; SSE-LABEL: insert_test3_mul_ss: 937 ; SSE: # %bb.0: 938 ; SSE-NEXT: mulss %xmm1, %xmm0 939 ; SSE-NEXT: ret{{[l|q]}} 940 ; 941 ; AVX-LABEL: insert_test3_mul_ss: 942 ; AVX: # %bb.0: 943 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 944 ; AVX-NEXT: ret{{[l|q]}} 945 %1 = fmul <4 x float> %a, %b 946 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 947 ret <4 x float> %2 948 } 949 950 define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) { 951 ; SSE-LABEL: insert_test3_div_ss: 952 ; SSE: # %bb.0: 953 ; SSE-NEXT: divss %xmm1, %xmm0 954 ; SSE-NEXT: ret{{[l|q]}} 955 ; 956 ; AVX-LABEL: insert_test3_div_ss: 957 ; AVX: # %bb.0: 958 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 959 ; AVX-NEXT: ret{{[l|q]}} 960 %1 = fdiv <4 x float> %a, %b 961 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 962 ret <4 x float> %2 963 } 964 965 define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) { 966 ; SSE-LABEL: insert_test3_add_sd: 967 ; SSE: # %bb.0: 968 ; SSE-NEXT: addsd %xmm1, %xmm0 969 ; SSE-NEXT: ret{{[l|q]}} 970 ; 971 ; AVX-LABEL: insert_test3_add_sd: 972 ; AVX: # %bb.0: 973 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 974 ; AVX-NEXT: ret{{[l|q]}} 975 %1 = fadd <2 x double> %a, %b 976 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 977 ret <2 x double> %2 978 } 979 980 define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) { 981 ; SSE-LABEL: insert_test3_sub_sd: 982 ; SSE: # %bb.0: 983 ; SSE-NEXT: subsd %xmm1, %xmm0 984 ; SSE-NEXT: ret{{[l|q]}} 985 ; 986 ; AVX-LABEL: insert_test3_sub_sd: 987 ; AVX: # %bb.0: 988 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 989 ; AVX-NEXT: ret{{[l|q]}} 990 %1 = fsub <2 x double> %a, %b 991 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 992 ret <2 x double> %2 993 } 994 995 define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) { 996 ; SSE-LABEL: insert_test3_mul_sd: 997 ; SSE: # %bb.0: 998 ; SSE-NEXT: mulsd %xmm1, %xmm0 999 ; SSE-NEXT: ret{{[l|q]}} 1000 ; 1001 ; AVX-LABEL: insert_test3_mul_sd: 1002 ; AVX: # %bb.0: 1003 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1004 ; AVX-NEXT: ret{{[l|q]}} 1005 %1 = fmul <2 x double> %a, %b 1006 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 1007 ret <2 x double> %2 1008 } 1009 1010 define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) { 1011 ; SSE-LABEL: insert_test3_div_sd: 1012 ; SSE: # %bb.0: 1013 ; SSE-NEXT: divsd %xmm1, %xmm0 1014 ; SSE-NEXT: ret{{[l|q]}} 1015 ; 1016 ; AVX-LABEL: insert_test3_div_sd: 1017 ; AVX: # %bb.0: 1018 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 1019 ; AVX-NEXT: ret{{[l|q]}} 1020 %1 = fdiv <2 x double> %a, %b 1021 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 1022 ret <2 x double> %2 1023 } 1024 1025 define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) { 1026 ; SSE-LABEL: insert_test4_add_ss: 1027 ; SSE: # %bb.0: 1028 ; SSE-NEXT: addss %xmm0, %xmm1 1029 ; SSE-NEXT: movaps %xmm1, %xmm0 1030 ; SSE-NEXT: ret{{[l|q]}} 1031 ; 1032 ; AVX-LABEL: insert_test4_add_ss: 1033 ; AVX: # %bb.0: 1034 ; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 1035 ; AVX-NEXT: ret{{[l|q]}} 1036 %1 = fadd <4 x float> %b, %a 1037 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 1038 ret <4 x float> %2 1039 } 1040 1041 define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) { 1042 ; SSE-LABEL: insert_test4_sub_ss: 1043 ; SSE: # %bb.0: 1044 ; SSE-NEXT: subss %xmm0, %xmm1 1045 ; SSE-NEXT: movaps %xmm1, %xmm0 1046 ; SSE-NEXT: ret{{[l|q]}} 1047 ; 1048 ; AVX-LABEL: insert_test4_sub_ss: 1049 ; AVX: # %bb.0: 1050 ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 1051 ; AVX-NEXT: ret{{[l|q]}} 1052 %1 = fsub <4 x float> %b, %a 1053 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 1054 ret <4 x float> %2 1055 } 1056 1057 define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) { 1058 ; SSE-LABEL: insert_test4_mul_ss: 1059 ; SSE: # %bb.0: 1060 ; SSE-NEXT: mulss %xmm0, %xmm1 1061 ; SSE-NEXT: movaps %xmm1, %xmm0 1062 ; SSE-NEXT: ret{{[l|q]}} 1063 ; 1064 ; AVX-LABEL: insert_test4_mul_ss: 1065 ; AVX: # %bb.0: 1066 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 1067 ; AVX-NEXT: ret{{[l|q]}} 1068 %1 = fmul <4 x float> %b, %a 1069 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 1070 ret <4 x float> %2 1071 } 1072 1073 define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) { 1074 ; SSE-LABEL: insert_test4_div_ss: 1075 ; SSE: # %bb.0: 1076 ; SSE-NEXT: divss %xmm0, %xmm1 1077 ; SSE-NEXT: movaps %xmm1, %xmm0 1078 ; SSE-NEXT: ret{{[l|q]}} 1079 ; 1080 ; AVX-LABEL: insert_test4_div_ss: 1081 ; AVX: # %bb.0: 1082 ; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 1083 ; AVX-NEXT: ret{{[l|q]}} 1084 %1 = fdiv <4 x float> %b, %a 1085 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 1086 ret <4 x float> %2 1087 } 1088 1089 define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) { 1090 ; SSE-LABEL: insert_test4_add_sd: 1091 ; SSE: # %bb.0: 1092 ; SSE-NEXT: addsd %xmm0, %xmm1 1093 ; SSE-NEXT: movapd %xmm1, %xmm0 1094 ; SSE-NEXT: ret{{[l|q]}} 1095 ; 1096 ; AVX-LABEL: insert_test4_add_sd: 1097 ; AVX: # %bb.0: 1098 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 1099 ; AVX-NEXT: ret{{[l|q]}} 1100 %1 = fadd <2 x double> %b, %a 1101 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1102 ret <2 x double> %2 1103 } 1104 1105 define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) { 1106 ; SSE-LABEL: insert_test4_sub_sd: 1107 ; SSE: # %bb.0: 1108 ; SSE-NEXT: subsd %xmm0, %xmm1 1109 ; SSE-NEXT: movapd %xmm1, %xmm0 1110 ; SSE-NEXT: ret{{[l|q]}} 1111 ; 1112 ; AVX-LABEL: insert_test4_sub_sd: 1113 ; AVX: # %bb.0: 1114 ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 1115 ; AVX-NEXT: ret{{[l|q]}} 1116 %1 = fsub <2 x double> %b, %a 1117 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1118 ret <2 x double> %2 1119 } 1120 1121 define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) { 1122 ; SSE-LABEL: insert_test4_mul_sd: 1123 ; SSE: # %bb.0: 1124 ; SSE-NEXT: mulsd %xmm0, %xmm1 1125 ; SSE-NEXT: movapd %xmm1, %xmm0 1126 ; SSE-NEXT: ret{{[l|q]}} 1127 ; 1128 ; AVX-LABEL: insert_test4_mul_sd: 1129 ; AVX: # %bb.0: 1130 ; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 1131 ; AVX-NEXT: ret{{[l|q]}} 1132 %1 = fmul <2 x double> %b, %a 1133 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1134 ret <2 x double> %2 1135 } 1136 1137 define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) { 1138 ; SSE-LABEL: insert_test4_div_sd: 1139 ; SSE: # %bb.0: 1140 ; SSE-NEXT: divsd %xmm0, %xmm1 1141 ; SSE-NEXT: movapd %xmm1, %xmm0 1142 ; SSE-NEXT: ret{{[l|q]}} 1143 ; 1144 ; AVX-LABEL: insert_test4_div_sd: 1145 ; AVX: # %bb.0: 1146 ; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 1147 ; AVX-NEXT: ret{{[l|q]}} 1148 %1 = fdiv <2 x double> %b, %a 1149 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1150 ret <2 x double> %2 1151 } 1152 1153 define <4 x float> @insert_test5_add_ss(<4 x float> %a, <4 x float> %b) { 1154 ; SSE-LABEL: insert_test5_add_ss: 1155 ; SSE: # %bb.0: 1156 ; SSE-NEXT: addss %xmm1, %xmm0 1157 ; SSE-NEXT: ret{{[l|q]}} 1158 ; 1159 ; AVX-LABEL: insert_test5_add_ss: 1160 ; AVX: # %bb.0: 1161 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 1162 ; AVX-NEXT: ret{{[l|q]}} 1163 %1 = fadd <4 x float> %b, %a 1164 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1165 ret <4 x float> %2 1166 } 1167 1168 define <4 x float> @insert_test5_sub_ss(<4 x float> %a, <4 x float> %b) { 1169 ; SSE2-LABEL: insert_test5_sub_ss: 1170 ; SSE2: # %bb.0: 1171 ; SSE2-NEXT: subps %xmm0, %xmm1 1172 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1173 ; SSE2-NEXT: ret{{[l|q]}} 1174 ; 1175 ; SSE41-LABEL: insert_test5_sub_ss: 1176 ; SSE41: # %bb.0: 1177 ; SSE41-NEXT: subps %xmm0, %xmm1 1178 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1179 ; SSE41-NEXT: ret{{[l|q]}} 1180 ; 1181 ; AVX-LABEL: insert_test5_sub_ss: 1182 ; AVX: # %bb.0: 1183 ; AVX-NEXT: vsubps %xmm0, %xmm1, %xmm1 1184 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1185 ; AVX-NEXT: ret{{[l|q]}} 1186 %1 = fsub <4 x float> %b, %a 1187 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1188 ret <4 x float> %2 1189 } 1190 1191 define <4 x float> @insert_test5_mul_ss(<4 x float> %a, <4 x float> %b) { 1192 ; SSE-LABEL: insert_test5_mul_ss: 1193 ; SSE: # %bb.0: 1194 ; SSE-NEXT: mulss %xmm1, %xmm0 1195 ; SSE-NEXT: ret{{[l|q]}} 1196 ; 1197 ; AVX-LABEL: insert_test5_mul_ss: 1198 ; AVX: # %bb.0: 1199 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 1200 ; AVX-NEXT: ret{{[l|q]}} 1201 %1 = fmul <4 x float> %b, %a 1202 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1203 ret <4 x float> %2 1204 } 1205 1206 define <4 x float> @insert_test5_div_ss(<4 x float> %a, <4 x float> %b) { 1207 ; SSE2-LABEL: insert_test5_div_ss: 1208 ; SSE2: # %bb.0: 1209 ; SSE2-NEXT: divps %xmm0, %xmm1 1210 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1211 ; SSE2-NEXT: ret{{[l|q]}} 1212 ; 1213 ; SSE41-LABEL: insert_test5_div_ss: 1214 ; SSE41: # %bb.0: 1215 ; SSE41-NEXT: divps %xmm0, %xmm1 1216 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1217 ; SSE41-NEXT: ret{{[l|q]}} 1218 ; 1219 ; AVX-LABEL: insert_test5_div_ss: 1220 ; AVX: # %bb.0: 1221 ; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm1 1222 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1223 ; AVX-NEXT: ret{{[l|q]}} 1224 %1 = fdiv <4 x float> %b, %a 1225 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1226 ret <4 x float> %2 1227 } 1228 1229 define <2 x double> @insert_test5_add_sd(<2 x double> %a, <2 x double> %b) { 1230 ; SSE-LABEL: insert_test5_add_sd: 1231 ; SSE: # %bb.0: 1232 ; SSE-NEXT: addsd %xmm1, %xmm0 1233 ; SSE-NEXT: ret{{[l|q]}} 1234 ; 1235 ; AVX-LABEL: insert_test5_add_sd: 1236 ; AVX: # %bb.0: 1237 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 1238 ; AVX-NEXT: ret{{[l|q]}} 1239 %1 = fadd <2 x double> %b, %a 1240 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 1241 ret <2 x double> %2 1242 } 1243 1244 define <2 x double> @insert_test5_sub_sd(<2 x double> %a, <2 x double> %b) { 1245 ; SSE2-LABEL: insert_test5_sub_sd: 1246 ; SSE2: # %bb.0: 1247 ; SSE2-NEXT: subpd %xmm0, %xmm1 1248 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1249 ; SSE2-NEXT: ret{{[l|q]}} 1250 ; 1251 ; SSE41-LABEL: insert_test5_sub_sd: 1252 ; SSE41: # %bb.0: 1253 ; SSE41-NEXT: subpd %xmm0, %xmm1 1254 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1255 ; SSE41-NEXT: ret{{[l|q]}} 1256 ; 1257 ; AVX-LABEL: insert_test5_sub_sd: 1258 ; AVX: # %bb.0: 1259 ; AVX-NEXT: vsubpd %xmm0, %xmm1, %xmm1 1260 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1261 ; AVX-NEXT: ret{{[l|q]}} 1262 %1 = fsub <2 x double> %b, %a 1263 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 1264 ret <2 x double> %2 1265 } 1266 1267 define <2 x double> @insert_test5_mul_sd(<2 x double> %a, <2 x double> %b) { 1268 ; SSE-LABEL: insert_test5_mul_sd: 1269 ; SSE: # %bb.0: 1270 ; SSE-NEXT: mulsd %xmm1, %xmm0 1271 ; SSE-NEXT: ret{{[l|q]}} 1272 ; 1273 ; AVX-LABEL: insert_test5_mul_sd: 1274 ; AVX: # %bb.0: 1275 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1276 ; AVX-NEXT: ret{{[l|q]}} 1277 %1 = fmul <2 x double> %b, %a 1278 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 1279 ret <2 x double> %2 1280 } 1281 1282 define <2 x double> @insert_test5_div_sd(<2 x double> %a, <2 x double> %b) { 1283 ; SSE2-LABEL: insert_test5_div_sd: 1284 ; SSE2: # %bb.0: 1285 ; SSE2-NEXT: divpd %xmm0, %xmm1 1286 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1287 ; SSE2-NEXT: ret{{[l|q]}} 1288 ; 1289 ; SSE41-LABEL: insert_test5_div_sd: 1290 ; SSE41: # %bb.0: 1291 ; SSE41-NEXT: divpd %xmm0, %xmm1 1292 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1293 ; SSE41-NEXT: ret{{[l|q]}} 1294 ; 1295 ; AVX-LABEL: insert_test5_div_sd: 1296 ; AVX: # %bb.0: 1297 ; AVX-NEXT: vdivpd %xmm0, %xmm1, %xmm1 1298 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1299 ; AVX-NEXT: ret{{[l|q]}} 1300 %1 = fdiv <2 x double> %b, %a 1301 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 1302 ret <2 x double> %2 1303 } 1304 1305 define <4 x float> @add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 1306 ; X86-SSE2-LABEL: add_ss_mask: 1307 ; X86-SSE2: # %bb.0: 1308 ; X86-SSE2-NEXT: testb $1, {{[0-9]+}}(%esp) 1309 ; X86-SSE2-NEXT: jne .LBB70_1 1310 ; X86-SSE2-NEXT: # %bb.2: 1311 ; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 1312 ; X86-SSE2-NEXT: retl 1313 ; X86-SSE2-NEXT: .LBB70_1: 1314 ; X86-SSE2-NEXT: addss %xmm0, %xmm1 1315 ; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1316 ; X86-SSE2-NEXT: retl 1317 ; 1318 ; X86-SSE41-LABEL: add_ss_mask: 1319 ; X86-SSE41: # %bb.0: 1320 ; X86-SSE41-NEXT: testb $1, {{[0-9]+}}(%esp) 1321 ; X86-SSE41-NEXT: jne .LBB70_1 1322 ; X86-SSE41-NEXT: # %bb.2: 1323 ; X86-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 1324 ; X86-SSE41-NEXT: retl 1325 ; X86-SSE41-NEXT: .LBB70_1: 1326 ; X86-SSE41-NEXT: addss %xmm0, %xmm1 1327 ; X86-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1328 ; X86-SSE41-NEXT: retl 1329 ; 1330 ; X86-AVX1-LABEL: add_ss_mask: 1331 ; X86-AVX1: # %bb.0: 1332 ; X86-AVX1-NEXT: testb $1, {{[0-9]+}}(%esp) 1333 ; X86-AVX1-NEXT: je .LBB70_2 1334 ; X86-AVX1-NEXT: # %bb.1: 1335 ; X86-AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm2 1336 ; X86-AVX1-NEXT: .LBB70_2: 1337 ; X86-AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 1338 ; X86-AVX1-NEXT: retl 1339 ; 1340 ; X86-AVX512-LABEL: add_ss_mask: 1341 ; X86-AVX512: # %bb.0: 1342 ; X86-AVX512-NEXT: movb {{[0-9]+}}(%esp), %al 1343 ; X86-AVX512-NEXT: kmovw %eax, %k1 1344 ; X86-AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1} 1345 ; X86-AVX512-NEXT: vmovaps %xmm2, %xmm0 1346 ; X86-AVX512-NEXT: retl 1347 ; 1348 ; X64-SSE2-LABEL: add_ss_mask: 1349 ; X64-SSE2: # %bb.0: 1350 ; X64-SSE2-NEXT: testb $1, %dil 1351 ; X64-SSE2-NEXT: jne .LBB70_1 1352 ; X64-SSE2-NEXT: # %bb.2: 1353 ; X64-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 1354 ; X64-SSE2-NEXT: retq 1355 ; X64-SSE2-NEXT: .LBB70_1: 1356 ; X64-SSE2-NEXT: addss %xmm0, %xmm1 1357 ; X64-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1358 ; X64-SSE2-NEXT: retq 1359 ; 1360 ; X64-SSE41-LABEL: add_ss_mask: 1361 ; X64-SSE41: # %bb.0: 1362 ; X64-SSE41-NEXT: testb $1, %dil 1363 ; X64-SSE41-NEXT: jne .LBB70_1 1364 ; X64-SSE41-NEXT: # %bb.2: 1365 ; X64-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 1366 ; X64-SSE41-NEXT: retq 1367 ; X64-SSE41-NEXT: .LBB70_1: 1368 ; X64-SSE41-NEXT: addss %xmm0, %xmm1 1369 ; X64-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1370 ; X64-SSE41-NEXT: retq 1371 ; 1372 ; X64-AVX1-LABEL: add_ss_mask: 1373 ; X64-AVX1: # %bb.0: 1374 ; X64-AVX1-NEXT: testb $1, %dil 1375 ; X64-AVX1-NEXT: je .LBB70_2 1376 ; X64-AVX1-NEXT: # %bb.1: 1377 ; X64-AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm2 1378 ; X64-AVX1-NEXT: .LBB70_2: 1379 ; X64-AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 1380 ; X64-AVX1-NEXT: retq 1381 ; 1382 ; X64-AVX512-LABEL: add_ss_mask: 1383 ; X64-AVX512: # %bb.0: 1384 ; X64-AVX512-NEXT: kmovw %edi, %k1 1385 ; X64-AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1} 1386 ; X64-AVX512-NEXT: vmovaps %xmm2, %xmm0 1387 ; X64-AVX512-NEXT: retq 1388 %1 = extractelement <4 x float> %a, i64 0 1389 %2 = extractelement <4 x float> %b, i64 0 1390 %3 = fadd float %1, %2 1391 %4 = extractelement <4 x float> %c, i32 0 1392 %5 = bitcast i8 %mask to <8 x i1> 1393 %6 = extractelement <8 x i1> %5, i64 0 1394 %7 = select i1 %6, float %3, float %4 1395 %8 = insertelement <4 x float> %a, float %7, i64 0 1396 ret <4 x float> %8 1397 } 1398 1399 define <2 x double> @add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 1400 ; X86-SSE2-LABEL: add_sd_mask: 1401 ; X86-SSE2: # %bb.0: 1402 ; X86-SSE2-NEXT: testb $1, {{[0-9]+}}(%esp) 1403 ; X86-SSE2-NEXT: jne .LBB71_1 1404 ; X86-SSE2-NEXT: # %bb.2: 1405 ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1406 ; X86-SSE2-NEXT: retl 1407 ; X86-SSE2-NEXT: .LBB71_1: 1408 ; X86-SSE2-NEXT: addsd %xmm0, %xmm1 1409 ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1410 ; X86-SSE2-NEXT: retl 1411 ; 1412 ; X86-SSE41-LABEL: add_sd_mask: 1413 ; X86-SSE41: # %bb.0: 1414 ; X86-SSE41-NEXT: testb $1, {{[0-9]+}}(%esp) 1415 ; X86-SSE41-NEXT: jne .LBB71_1 1416 ; X86-SSE41-NEXT: # %bb.2: 1417 ; X86-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] 1418 ; X86-SSE41-NEXT: retl 1419 ; X86-SSE41-NEXT: .LBB71_1: 1420 ; X86-SSE41-NEXT: addsd %xmm0, %xmm1 1421 ; X86-SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1422 ; X86-SSE41-NEXT: retl 1423 ; 1424 ; X86-AVX1-LABEL: add_sd_mask: 1425 ; X86-AVX1: # %bb.0: 1426 ; X86-AVX1-NEXT: testb $1, {{[0-9]+}}(%esp) 1427 ; X86-AVX1-NEXT: je .LBB71_2 1428 ; X86-AVX1-NEXT: # %bb.1: 1429 ; X86-AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm2 1430 ; X86-AVX1-NEXT: .LBB71_2: 1431 ; X86-AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1432 ; X86-AVX1-NEXT: retl 1433 ; 1434 ; X86-AVX512-LABEL: add_sd_mask: 1435 ; X86-AVX512: # %bb.0: 1436 ; X86-AVX512-NEXT: movb {{[0-9]+}}(%esp), %al 1437 ; X86-AVX512-NEXT: kmovw %eax, %k1 1438 ; X86-AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1} 1439 ; X86-AVX512-NEXT: vmovapd %xmm2, %xmm0 1440 ; X86-AVX512-NEXT: retl 1441 ; 1442 ; X64-SSE2-LABEL: add_sd_mask: 1443 ; X64-SSE2: # %bb.0: 1444 ; X64-SSE2-NEXT: testb $1, %dil 1445 ; X64-SSE2-NEXT: jne .LBB71_1 1446 ; X64-SSE2-NEXT: # %bb.2: 1447 ; X64-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1448 ; X64-SSE2-NEXT: retq 1449 ; X64-SSE2-NEXT: .LBB71_1: 1450 ; X64-SSE2-NEXT: addsd %xmm0, %xmm1 1451 ; X64-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1452 ; X64-SSE2-NEXT: retq 1453 ; 1454 ; X64-SSE41-LABEL: add_sd_mask: 1455 ; X64-SSE41: # %bb.0: 1456 ; X64-SSE41-NEXT: testb $1, %dil 1457 ; X64-SSE41-NEXT: jne .LBB71_1 1458 ; X64-SSE41-NEXT: # %bb.2: 1459 ; X64-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] 1460 ; X64-SSE41-NEXT: retq 1461 ; X64-SSE41-NEXT: .LBB71_1: 1462 ; X64-SSE41-NEXT: addsd %xmm0, %xmm1 1463 ; X64-SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1464 ; X64-SSE41-NEXT: retq 1465 ; 1466 ; X64-AVX1-LABEL: add_sd_mask: 1467 ; X64-AVX1: # %bb.0: 1468 ; X64-AVX1-NEXT: testb $1, %dil 1469 ; X64-AVX1-NEXT: je .LBB71_2 1470 ; X64-AVX1-NEXT: # %bb.1: 1471 ; X64-AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm2 1472 ; X64-AVX1-NEXT: .LBB71_2: 1473 ; X64-AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1474 ; X64-AVX1-NEXT: retq 1475 ; 1476 ; X64-AVX512-LABEL: add_sd_mask: 1477 ; X64-AVX512: # %bb.0: 1478 ; X64-AVX512-NEXT: kmovw %edi, %k1 1479 ; X64-AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1} 1480 ; X64-AVX512-NEXT: vmovapd %xmm2, %xmm0 1481 ; X64-AVX512-NEXT: retq 1482 %1 = extractelement <2 x double> %a, i64 0 1483 %2 = extractelement <2 x double> %b, i64 0 1484 %3 = fadd double %1, %2 1485 %4 = extractelement <2 x double> %c, i32 0 1486 %5 = bitcast i8 %mask to <8 x i1> 1487 %6 = extractelement <8 x i1> %5, i64 0 1488 %7 = select i1 %6, double %3, double %4 1489 %8 = insertelement <2 x double> %a, double %7, i64 0 1490 ret <2 x double> %8 1491 } 1492