1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE 3 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX 4 5 ; Verify we fold loads into unary sse intrinsics only when optimizing for size 6 7 define float @rcpss(float* %a) { 8 ; SSE-LABEL: rcpss: 9 ; SSE: # BB#0: 10 ; SSE-NEXT: movss (%rdi), %xmm0 11 ; SSE-NEXT: rcpss %xmm0, %xmm0 12 ; SSE-NEXT: retq 13 ; 14 ; AVX-LABEL: rcpss: 15 ; AVX: # BB#0: 16 ; AVX-NEXT: vmovss (%rdi), %xmm0 17 ; AVX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 18 ; AVX-NEXT: retq 19 %ld = load float, float* %a 20 %ins = insertelement <4 x float> undef, float %ld, i32 0 21 %res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ins) 22 %ext = extractelement <4 x float> %res, i32 0 23 ret float %ext 24 } 25 26 define float @rsqrtss(float* %a) { 27 ; SSE-LABEL: rsqrtss: 28 ; SSE: # BB#0: 29 ; SSE-NEXT: movss (%rdi), %xmm0 30 ; SSE-NEXT: rsqrtss %xmm0, %xmm0 31 ; SSE-NEXT: retq 32 ; 33 ; AVX-LABEL: rsqrtss: 34 ; AVX: # BB#0: 35 ; AVX-NEXT: vmovss (%rdi), %xmm0 36 ; AVX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 37 ; AVX-NEXT: retq 38 %ld = load float, float* %a 39 %ins = insertelement <4 x float> undef, float %ld, i32 0 40 %res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ins) 41 %ext = extractelement <4 x float> %res, i32 0 42 ret float %ext 43 } 44 45 define float @sqrtss(float* %a) { 46 ; SSE-LABEL: sqrtss: 47 ; SSE: # BB#0: 48 ; SSE-NEXT: movss (%rdi), %xmm0 49 ; SSE-NEXT: sqrtss %xmm0, %xmm0 50 ; SSE-NEXT: retq 51 ; 52 ; AVX-LABEL: sqrtss: 53 ; AVX: # BB#0: 54 ; AVX-NEXT: vmovss (%rdi), %xmm0 55 ; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 56 ; AVX-NEXT: retq 57 %ld = load float, float* %a 58 %ins = insertelement <4 x float> undef, float %ld, i32 0 59 %res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ins) 60 %ext = extractelement <4 x float> %res, i32 0 61 ret float %ext 62 } 63 64 define double @sqrtsd(double* %a) { 65 ; SSE-LABEL: sqrtsd: 66 ; SSE: # BB#0: 67 ; SSE-NEXT: movsd (%rdi), %xmm0 68 ; SSE-NEXT: sqrtsd %xmm0, %xmm0 69 ; SSE-NEXT: retq 70 ; 71 ; AVX-LABEL: sqrtsd: 72 ; AVX: # BB#0: 73 ; AVX-NEXT: vmovsd (%rdi), %xmm0 74 ; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 75 ; AVX-NEXT: retq 76 %ld = load double, double* %a 77 %ins = insertelement <2 x double> undef, double %ld, i32 0 78 %res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ins) 79 %ext = extractelement <2 x double> %res, i32 0 80 ret double %ext 81 } 82 83 define float @rcpss_size(float* %a) optsize { 84 ; SSE-LABEL: rcpss_size: 85 ; SSE: # BB#0: 86 ; SSE-NEXT: rcpss (%rdi), %xmm0 87 ; SSE-NEXT: retq 88 ; 89 ; AVX-LABEL: rcpss_size: 90 ; AVX: # BB#0: 91 ; AVX-NEXT: vrcpss (%rdi), %xmm0, %xmm0 92 ; AVX-NEXT: retq 93 %ld = load float, float* %a 94 %ins = insertelement <4 x float> undef, float %ld, i32 0 95 %res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ins) 96 %ext = extractelement <4 x float> %res, i32 0 97 ret float %ext 98 } 99 100 define float @rsqrtss_size(float* %a) optsize { 101 ; SSE-LABEL: rsqrtss_size: 102 ; SSE: # BB#0: 103 ; SSE-NEXT: rsqrtss (%rdi), %xmm0 104 ; SSE-NEXT: retq 105 ; 106 ; AVX-LABEL: rsqrtss_size: 107 ; AVX: # BB#0: 108 ; AVX-NEXT: vrsqrtss (%rdi), %xmm0, %xmm0 109 ; AVX-NEXT: retq 110 %ld = load float, float* %a 111 %ins = insertelement <4 x float> undef, float %ld, i32 0 112 %res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ins) 113 %ext = extractelement <4 x float> %res, i32 0 114 ret float %ext 115 } 116 117 define float @sqrtss_size(float* %a) optsize{ 118 ; SSE-LABEL: sqrtss_size: 119 ; SSE: # BB#0: 120 ; SSE-NEXT: sqrtss (%rdi), %xmm0 121 ; SSE-NEXT: retq 122 ; 123 ; AVX-LABEL: sqrtss_size: 124 ; AVX: # BB#0: 125 ; AVX-NEXT: vsqrtss (%rdi), %xmm0, %xmm0 126 ; AVX-NEXT: retq 127 %ld = load float, float* %a 128 %ins = insertelement <4 x float> undef, float %ld, i32 0 129 %res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ins) 130 %ext = extractelement <4 x float> %res, i32 0 131 ret float %ext 132 } 133 134 define double @sqrtsd_size(double* %a) optsize { 135 ; SSE-LABEL: sqrtsd_size: 136 ; SSE: # BB#0: 137 ; SSE-NEXT: sqrtsd (%rdi), %xmm0 138 ; SSE-NEXT: retq 139 ; 140 ; AVX-LABEL: sqrtsd_size: 141 ; AVX: # BB#0: 142 ; AVX-NEXT: vsqrtsd (%rdi), %xmm0, %xmm0 143 ; AVX-NEXT: retq 144 %ld = load double, double* %a 145 %ins = insertelement <2 x double> undef, double %ld, i32 0 146 %res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ins) 147 %ext = extractelement <2 x double> %res, i32 0 148 ret double %ext 149 } 150 151 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 152 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 153 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 154 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 155