1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE 3 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX 4 5 ; Verify that we're folding the load into the math instruction. 6 ; This pattern is generated out of the simplest intrinsics usage: 7 ; _mm_add_ss(a, _mm_load_ss(b)); 8 9 define <4 x float> @addss(<4 x float> %va, float* %pb) { 10 ; SSE-LABEL: addss: 11 ; SSE: # BB#0: 12 ; SSE-NEXT: addss (%rdi), %xmm0 13 ; SSE-NEXT: retq 14 ; 15 ; AVX-LABEL: addss: 16 ; AVX: # BB#0: 17 ; AVX-NEXT: vaddss (%rdi), %xmm0, %xmm0 18 ; AVX-NEXT: retq 19 %a = extractelement <4 x float> %va, i32 0 20 %b = load float, float* %pb 21 %r = fadd float %a, %b 22 %vr = insertelement <4 x float> %va, float %r, i32 0 23 ret <4 x float> %vr 24 } 25 26 define <2 x double> @addsd(<2 x double> %va, double* %pb) { 27 ; SSE-LABEL: addsd: 28 ; SSE: # BB#0: 29 ; SSE-NEXT: addsd (%rdi), %xmm0 30 ; SSE-NEXT: retq 31 ; 32 ; AVX-LABEL: addsd: 33 ; AVX: # BB#0: 34 ; AVX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 35 ; AVX-NEXT: retq 36 %a = extractelement <2 x double> %va, i32 0 37 %b = load double, double* %pb 38 %r = fadd double %a, %b 39 %vr = insertelement <2 x double> %va, double %r, i32 0 40 ret <2 x double> %vr 41 } 42 43 define <4 x float> @subss(<4 x float> %va, float* %pb) { 44 ; SSE-LABEL: subss: 45 ; SSE: # BB#0: 46 ; SSE-NEXT: subss (%rdi), %xmm0 47 ; SSE-NEXT: retq 48 ; 49 ; AVX-LABEL: subss: 50 ; AVX: # BB#0: 51 ; AVX-NEXT: vsubss (%rdi), %xmm0, %xmm0 52 ; AVX-NEXT: retq 53 %a = extractelement <4 x float> %va, i32 0 54 %b = load float, float* %pb 55 %r = fsub float %a, %b 56 %vr = insertelement <4 x float> %va, float %r, i32 0 57 ret <4 x float> %vr 58 } 59 60 define <2 x double> @subsd(<2 x double> %va, double* %pb) { 61 ; SSE-LABEL: subsd: 62 ; SSE: # BB#0: 63 ; SSE-NEXT: subsd (%rdi), %xmm0 64 ; SSE-NEXT: retq 65 ; 66 ; AVX-LABEL: subsd: 67 ; AVX: # BB#0: 68 ; AVX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 69 ; AVX-NEXT: retq 70 %a = extractelement <2 x double> %va, i32 0 71 %b = load double, double* %pb 72 %r = fsub double %a, %b 73 %vr = insertelement <2 x double> %va, double %r, i32 0 74 ret <2 x double> %vr 75 } 76 77 define <4 x float> @mulss(<4 x float> %va, float* %pb) { 78 ; SSE-LABEL: mulss: 79 ; SSE: # BB#0: 80 ; SSE-NEXT: mulss (%rdi), %xmm0 81 ; SSE-NEXT: retq 82 ; 83 ; AVX-LABEL: mulss: 84 ; AVX: # BB#0: 85 ; AVX-NEXT: vmulss (%rdi), %xmm0, %xmm0 86 ; AVX-NEXT: retq 87 %a = extractelement <4 x float> %va, i32 0 88 %b = load float, float* %pb 89 %r = fmul float %a, %b 90 %vr = insertelement <4 x float> %va, float %r, i32 0 91 ret <4 x float> %vr 92 } 93 94 define <2 x double> @mulsd(<2 x double> %va, double* %pb) { 95 ; SSE-LABEL: mulsd: 96 ; SSE: # BB#0: 97 ; SSE-NEXT: mulsd (%rdi), %xmm0 98 ; SSE-NEXT: retq 99 ; 100 ; AVX-LABEL: mulsd: 101 ; AVX: # BB#0: 102 ; AVX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 103 ; AVX-NEXT: retq 104 %a = extractelement <2 x double> %va, i32 0 105 %b = load double, double* %pb 106 %r = fmul double %a, %b 107 %vr = insertelement <2 x double> %va, double %r, i32 0 108 ret <2 x double> %vr 109 } 110 111 define <4 x float> @divss(<4 x float> %va, float* %pb) { 112 ; SSE-LABEL: divss: 113 ; SSE: # BB#0: 114 ; SSE-NEXT: divss (%rdi), %xmm0 115 ; SSE-NEXT: retq 116 ; 117 ; AVX-LABEL: divss: 118 ; AVX: # BB#0: 119 ; AVX-NEXT: vdivss (%rdi), %xmm0, %xmm0 120 ; AVX-NEXT: retq 121 %a = extractelement <4 x float> %va, i32 0 122 %b = load float, float* %pb 123 %r = fdiv float %a, %b 124 %vr = insertelement <4 x float> %va, float %r, i32 0 125 ret <4 x float> %vr 126 } 127 128 define <2 x double> @divsd(<2 x double> %va, double* %pb) { 129 ; SSE-LABEL: divsd: 130 ; SSE: # BB#0: 131 ; SSE-NEXT: divsd (%rdi), %xmm0 132 ; SSE-NEXT: retq 133 ; 134 ; AVX-LABEL: divsd: 135 ; AVX: # BB#0: 136 ; AVX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 137 ; AVX-NEXT: retq 138 %a = extractelement <2 x double> %va, i32 0 139 %b = load double, double* %pb 140 %r = fdiv double %a, %b 141 %vr = insertelement <2 x double> %va, double %r, i32 0 142 ret <2 x double> %vr 143 } 144