1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE 3 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 4 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512f < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 5 6 ; Verify that we're folding the load into the math instruction. 7 ; This pattern is generated out of the simplest intrinsics usage: 8 ; _mm_add_ss(a, _mm_load_ss(b)); 9 10 define <4 x float> @addss(<4 x float> %va, float* %pb) { 11 ; SSE-LABEL: addss: 12 ; SSE: # %bb.0: 13 ; SSE-NEXT: addss (%rdi), %xmm0 14 ; SSE-NEXT: retq 15 ; 16 ; AVX-LABEL: addss: 17 ; AVX: # %bb.0: 18 ; AVX-NEXT: vaddss (%rdi), %xmm0, %xmm0 19 ; AVX-NEXT: retq 20 %a = extractelement <4 x float> %va, i32 0 21 %b = load float, float* %pb 22 %r = fadd float %a, %b 23 %vr = insertelement <4 x float> %va, float %r, i32 0 24 ret <4 x float> %vr 25 } 26 27 define <2 x double> @addsd(<2 x double> %va, double* %pb) { 28 ; SSE-LABEL: addsd: 29 ; SSE: # %bb.0: 30 ; SSE-NEXT: addsd (%rdi), %xmm0 31 ; SSE-NEXT: retq 32 ; 33 ; AVX-LABEL: addsd: 34 ; AVX: # %bb.0: 35 ; AVX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 36 ; AVX-NEXT: retq 37 %a = extractelement <2 x double> %va, i32 0 38 %b = load double, double* %pb 39 %r = fadd double %a, %b 40 %vr = insertelement <2 x double> %va, double %r, i32 0 41 ret <2 x double> %vr 42 } 43 44 define <4 x float> @subss(<4 x float> %va, float* %pb) { 45 ; SSE-LABEL: subss: 46 ; SSE: # %bb.0: 47 ; SSE-NEXT: subss (%rdi), %xmm0 48 ; SSE-NEXT: retq 49 ; 50 ; AVX-LABEL: subss: 51 ; AVX: # %bb.0: 52 ; AVX-NEXT: vsubss (%rdi), %xmm0, %xmm0 53 ; AVX-NEXT: retq 54 %a = extractelement <4 x float> %va, i32 0 55 %b = load float, float* %pb 56 %r = fsub float %a, %b 57 %vr = insertelement <4 x float> %va, float %r, i32 0 58 ret <4 x float> %vr 59 } 60 61 define <2 x double> @subsd(<2 x double> %va, double* %pb) { 62 ; SSE-LABEL: subsd: 63 ; SSE: # %bb.0: 64 ; SSE-NEXT: subsd (%rdi), %xmm0 65 ; SSE-NEXT: retq 66 ; 67 ; AVX-LABEL: subsd: 68 ; AVX: # %bb.0: 69 ; AVX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 70 ; AVX-NEXT: retq 71 %a = extractelement <2 x double> %va, i32 0 72 %b = load double, double* %pb 73 %r = fsub double %a, %b 74 %vr = insertelement <2 x double> %va, double %r, i32 0 75 ret <2 x double> %vr 76 } 77 78 define <4 x float> @mulss(<4 x float> %va, float* %pb) { 79 ; SSE-LABEL: mulss: 80 ; SSE: # %bb.0: 81 ; SSE-NEXT: mulss (%rdi), %xmm0 82 ; SSE-NEXT: retq 83 ; 84 ; AVX-LABEL: mulss: 85 ; AVX: # %bb.0: 86 ; AVX-NEXT: vmulss (%rdi), %xmm0, %xmm0 87 ; AVX-NEXT: retq 88 %a = extractelement <4 x float> %va, i32 0 89 %b = load float, float* %pb 90 %r = fmul float %a, %b 91 %vr = insertelement <4 x float> %va, float %r, i32 0 92 ret <4 x float> %vr 93 } 94 95 define <2 x double> @mulsd(<2 x double> %va, double* %pb) { 96 ; SSE-LABEL: mulsd: 97 ; SSE: # %bb.0: 98 ; SSE-NEXT: mulsd (%rdi), %xmm0 99 ; SSE-NEXT: retq 100 ; 101 ; AVX-LABEL: mulsd: 102 ; AVX: # %bb.0: 103 ; AVX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 104 ; AVX-NEXT: retq 105 %a = extractelement <2 x double> %va, i32 0 106 %b = load double, double* %pb 107 %r = fmul double %a, %b 108 %vr = insertelement <2 x double> %va, double %r, i32 0 109 ret <2 x double> %vr 110 } 111 112 define <4 x float> @divss(<4 x float> %va, float* %pb) { 113 ; SSE-LABEL: divss: 114 ; SSE: # %bb.0: 115 ; SSE-NEXT: divss (%rdi), %xmm0 116 ; SSE-NEXT: retq 117 ; 118 ; AVX-LABEL: divss: 119 ; AVX: # %bb.0: 120 ; AVX-NEXT: vdivss (%rdi), %xmm0, %xmm0 121 ; AVX-NEXT: retq 122 %a = extractelement <4 x float> %va, i32 0 123 %b = load float, float* %pb 124 %r = fdiv float %a, %b 125 %vr = insertelement <4 x float> %va, float %r, i32 0 126 ret <4 x float> %vr 127 } 128 129 define <2 x double> @divsd(<2 x double> %va, double* %pb) { 130 ; SSE-LABEL: divsd: 131 ; SSE: # %bb.0: 132 ; SSE-NEXT: divsd (%rdi), %xmm0 133 ; SSE-NEXT: retq 134 ; 135 ; AVX-LABEL: divsd: 136 ; AVX: # %bb.0: 137 ; AVX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 138 ; AVX-NEXT: retq 139 %a = extractelement <2 x double> %va, i32 0 140 %b = load double, double* %pb 141 %r = fdiv double %a, %b 142 %vr = insertelement <2 x double> %va, double %r, i32 0 143 ret <2 x double> %vr 144 } 145