1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=sse4.1 | FileCheck %s --check-prefixes=SSE 3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1 4 ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f,avx512vl | FileCheck %s --check-prefixes=AVX,AVX512 5 6 ; PR37751 - https://bugs.llvm.org/show_bug.cgi?id=37751 7 ; We can't combine into 'round' instructions because the behavior is different for out-of-range values. 8 9 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) 10 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) 11 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) 12 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) 13 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) 14 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) 15 16 define float @float_to_int_to_float_mem_f32_i32(<4 x float>* %p) #0 { 17 ; SSE-LABEL: float_to_int_to_float_mem_f32_i32: 18 ; SSE: # %bb.0: 19 ; SSE-NEXT: cvttss2si (%rdi), %eax 20 ; SSE-NEXT: cvtsi2ssl %eax, %xmm0 21 ; SSE-NEXT: retq 22 ; 23 ; AVX-LABEL: float_to_int_to_float_mem_f32_i32: 24 ; AVX: # %bb.0: 25 ; AVX-NEXT: vcvttss2si (%rdi), %eax 26 ; AVX-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0 27 ; AVX-NEXT: retq 28 %x = load <4 x float>, <4 x float>* %p, align 16 29 %fptosi = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %x) 30 %sitofp = sitofp i32 %fptosi to float 31 ret float %sitofp 32 } 33 34 define float @float_to_int_to_float_reg_f32_i32(<4 x float> %x) #0 { 35 ; SSE-LABEL: float_to_int_to_float_reg_f32_i32: 36 ; SSE: # %bb.0: 37 ; SSE-NEXT: cvttss2si %xmm0, %eax 38 ; SSE-NEXT: xorps %xmm0, %xmm0 39 ; SSE-NEXT: cvtsi2ssl %eax, %xmm0 40 ; SSE-NEXT: retq 41 ; 42 ; AVX-LABEL: float_to_int_to_float_reg_f32_i32: 43 ; AVX: # %bb.0: 44 ; AVX-NEXT: vcvttss2si %xmm0, %eax 45 ; AVX-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 46 ; AVX-NEXT: retq 47 %fptosi = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %x) 48 %sitofp = sitofp i32 %fptosi to float 49 ret float %sitofp 50 } 51 52 define float @float_to_int_to_float_mem_f32_i64(<4 x float>* %p) #0 { 53 ; SSE-LABEL: float_to_int_to_float_mem_f32_i64: 54 ; SSE: # %bb.0: 55 ; SSE-NEXT: cvttss2si (%rdi), %rax 56 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 57 ; SSE-NEXT: retq 58 ; 59 ; AVX-LABEL: float_to_int_to_float_mem_f32_i64: 60 ; AVX: # %bb.0: 61 ; AVX-NEXT: vcvttss2si (%rdi), %rax 62 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0 63 ; AVX-NEXT: retq 64 %x = load <4 x float>, <4 x float>* %p, align 16 65 %fptosi = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %x) 66 %sitofp = sitofp i64 %fptosi to float 67 ret float %sitofp 68 } 69 70 define float @float_to_int_to_float_reg_f32_i64(<4 x float> %x) #0 { 71 ; SSE-LABEL: float_to_int_to_float_reg_f32_i64: 72 ; SSE: # %bb.0: 73 ; SSE-NEXT: cvttss2si %xmm0, %rax 74 ; SSE-NEXT: xorps %xmm0, %xmm0 75 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 76 ; SSE-NEXT: retq 77 ; 78 ; AVX-LABEL: float_to_int_to_float_reg_f32_i64: 79 ; AVX: # %bb.0: 80 ; AVX-NEXT: vcvttss2si %xmm0, %rax 81 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm0 82 ; AVX-NEXT: retq 83 %fptosi = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %x) 84 %sitofp = sitofp i64 %fptosi to float 85 ret float %sitofp 86 } 87 88 define double @float_to_int_to_float_mem_f64_i32(<2 x double>* %p) #0 { 89 ; SSE-LABEL: float_to_int_to_float_mem_f64_i32: 90 ; SSE: # %bb.0: 91 ; SSE-NEXT: cvttsd2si (%rdi), %eax 92 ; SSE-NEXT: cvtsi2sdl %eax, %xmm0 93 ; SSE-NEXT: retq 94 ; 95 ; AVX-LABEL: float_to_int_to_float_mem_f64_i32: 96 ; AVX: # %bb.0: 97 ; AVX-NEXT: vcvttsd2si (%rdi), %eax 98 ; AVX-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0 99 ; AVX-NEXT: retq 100 %x = load <2 x double>, <2 x double>* %p, align 16 101 %fptosi = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %x) 102 %sitofp = sitofp i32 %fptosi to double 103 ret double %sitofp 104 } 105 106 define double @float_to_int_to_float_reg_f64_i32(<2 x double> %x) #0 { 107 ; SSE-LABEL: float_to_int_to_float_reg_f64_i32: 108 ; SSE: # %bb.0: 109 ; SSE-NEXT: cvttsd2si %xmm0, %eax 110 ; SSE-NEXT: xorps %xmm0, %xmm0 111 ; SSE-NEXT: cvtsi2sdl %eax, %xmm0 112 ; SSE-NEXT: retq 113 ; 114 ; AVX-LABEL: float_to_int_to_float_reg_f64_i32: 115 ; AVX: # %bb.0: 116 ; AVX-NEXT: vcvttsd2si %xmm0, %eax 117 ; AVX-NEXT: vcvtsi2sdl %eax, %xmm1, %xmm0 118 ; AVX-NEXT: retq 119 %fptosi = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %x) 120 %sitofp = sitofp i32 %fptosi to double 121 ret double %sitofp 122 } 123 124 define double @float_to_int_to_float_mem_f64_i64(<2 x double>* %p) #0 { 125 ; SSE-LABEL: float_to_int_to_float_mem_f64_i64: 126 ; SSE: # %bb.0: 127 ; SSE-NEXT: cvttsd2si (%rdi), %rax 128 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0 129 ; SSE-NEXT: retq 130 ; 131 ; AVX-LABEL: float_to_int_to_float_mem_f64_i64: 132 ; AVX: # %bb.0: 133 ; AVX-NEXT: vcvttsd2si (%rdi), %rax 134 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0 135 ; AVX-NEXT: retq 136 %x = load <2 x double>, <2 x double>* %p, align 16 137 %fptosi = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %x) 138 %sitofp = sitofp i64 %fptosi to double 139 ret double %sitofp 140 } 141 142 define double @float_to_int_to_float_reg_f64_i64(<2 x double> %x) #0 { 143 ; SSE-LABEL: float_to_int_to_float_reg_f64_i64: 144 ; SSE: # %bb.0: 145 ; SSE-NEXT: cvttsd2si %xmm0, %rax 146 ; SSE-NEXT: xorps %xmm0, %xmm0 147 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0 148 ; SSE-NEXT: retq 149 ; 150 ; AVX-LABEL: float_to_int_to_float_reg_f64_i64: 151 ; AVX: # %bb.0: 152 ; AVX-NEXT: vcvttsd2si %xmm0, %rax 153 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm0 154 ; AVX-NEXT: retq 155 %fptosi = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %x) 156 %sitofp = sitofp i64 %fptosi to double 157 ret double %sitofp 158 } 159 160 define <4 x float> @float_to_int_to_float_mem_v4f32(<4 x float>* %p) #0 { 161 ; SSE-LABEL: float_to_int_to_float_mem_v4f32: 162 ; SSE: # %bb.0: 163 ; SSE-NEXT: cvttps2dq (%rdi), %xmm0 164 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 165 ; SSE-NEXT: retq 166 ; 167 ; AVX-LABEL: float_to_int_to_float_mem_v4f32: 168 ; AVX: # %bb.0: 169 ; AVX-NEXT: vcvttps2dq (%rdi), %xmm0 170 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 171 ; AVX-NEXT: retq 172 %x = load <4 x float>, <4 x float>* %p, align 16 173 %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %x) 174 %sitofp = sitofp <4 x i32> %fptosi to <4 x float> 175 ret <4 x float> %sitofp 176 } 177 178 define <4 x float> @float_to_int_to_float_reg_v4f32(<4 x float> %x) #0 { 179 ; SSE-LABEL: float_to_int_to_float_reg_v4f32: 180 ; SSE: # %bb.0: 181 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0 182 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 183 ; SSE-NEXT: retq 184 ; 185 ; AVX-LABEL: float_to_int_to_float_reg_v4f32: 186 ; AVX: # %bb.0: 187 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 188 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 189 ; AVX-NEXT: retq 190 %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %x) 191 %sitofp = sitofp <4 x i32> %fptosi to <4 x float> 192 ret <4 x float> %sitofp 193 } 194 195 define <2 x double> @float_to_int_to_float_mem_v2f64(<2 x double>* %p) #0 { 196 ; SSE-LABEL: float_to_int_to_float_mem_v2f64: 197 ; SSE: # %bb.0: 198 ; SSE-NEXT: cvttpd2dq (%rdi), %xmm0 199 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 200 ; SSE-NEXT: retq 201 ; 202 ; AVX-LABEL: float_to_int_to_float_mem_v2f64: 203 ; AVX: # %bb.0: 204 ; AVX-NEXT: vcvttpd2dqx (%rdi), %xmm0 205 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 206 ; AVX-NEXT: retq 207 %x = load <2 x double>, <2 x double>* %p, align 16 208 %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %x) 209 %concat = shufflevector <4 x i32> %fptosi, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 210 %sitofp = sitofp <2 x i32> %concat to <2 x double> 211 ret <2 x double> %sitofp 212 } 213 214 define <2 x double> @float_to_int_to_float_reg_v2f64(<2 x double> %x) #0 { 215 ; SSE-LABEL: float_to_int_to_float_reg_v2f64: 216 ; SSE: # %bb.0: 217 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 218 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 219 ; SSE-NEXT: retq 220 ; 221 ; AVX-LABEL: float_to_int_to_float_reg_v2f64: 222 ; AVX: # %bb.0: 223 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 224 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 225 ; AVX-NEXT: retq 226 %fptosi = tail call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %x) 227 %concat = shufflevector <4 x i32> %fptosi, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 228 %sitofp = sitofp <2 x i32> %concat to <2 x double> 229 ret <2 x double> %sitofp 230 } 231 232 attributes #0 = { "no-signed-zeros-fp-math"="true" } 233 234