1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f,avx512vl,avx512dq | FileCheck %s --check-prefixes=CHECK 3 4 ; PR37751 - https://bugs.llvm.org/show_bug.cgi?id=37751 5 ; We can't combine into 'round' instructions because the behavior is different for out-of-range values. 6 7 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32) 8 declare <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float>, <4 x i32>, i8) 9 declare <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float>, <8 x i32>, i8) 10 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32) 11 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double>, <4 x i32>, i8) 12 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32) 13 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32) 14 declare <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float>, <4 x i64>, i8) 15 declare <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float>, <8 x i64>, i8, i32) 16 declare <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float>, <4 x i64>, i8) 17 declare <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float>, <8 x i64>, i8, i32) 18 declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double>, <2 x i64>, i8) 19 declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double>, <4 x i64>, i8) 20 declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double>, <8 x i64>, i8, i32) 21 declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double>, <2 x i64>, i8) 22 declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double>, <4 x i64>, i8) 23 declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double>, <8 x i64>, i8, i32) 24 25 define <16 x float> @float_to_sint_to_float_mem_v16f32(<16 x float>* %p) { 26 ; CHECK-LABEL: float_to_sint_to_float_mem_v16f32: 27 ; CHECK: # %bb.0: 28 ; CHECK-NEXT: vcvttps2dq (%rdi), %zmm0 29 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 30 ; CHECK-NEXT: retq 31 %x = load <16 x float>, <16 x float>* %p 32 %fptosi = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4) 33 %sitofp = sitofp <16 x i32> %fptosi to <16 x float> 34 ret <16 x float> %sitofp 35 } 36 37 define <16 x float> @float_to_sint_to_float_reg_v16f32(<16 x float> %x) { 38 ; CHECK-LABEL: float_to_sint_to_float_reg_v16f32: 39 ; CHECK: # %bb.0: 40 ; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0 41 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 42 ; CHECK-NEXT: retq 43 %fptosi = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4) 44 %sitofp = sitofp <16 x i32> %fptosi to <16 x float> 45 ret <16 x float> %sitofp 46 } 47 48 define <16 x float> @float_to_uint_to_float_mem_v16f32(<16 x float>* %p) { 49 ; CHECK-LABEL: float_to_uint_to_float_mem_v16f32: 50 ; CHECK: # %bb.0: 51 ; CHECK-NEXT: vcvttps2udq (%rdi), %zmm0 52 ; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm0 53 ; CHECK-NEXT: retq 54 %x = load <16 x float>, <16 x float>* %p 55 %fptoui = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4) 56 %uitofp = uitofp <16 x i32> %fptoui to <16 x float> 57 ret <16 x float> %uitofp 58 } 59 60 define <16 x float> @float_to_uint_to_float_reg_v16f32(<16 x float> %x) { 61 ; CHECK-LABEL: float_to_uint_to_float_reg_v16f32: 62 ; CHECK: # %bb.0: 63 ; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0 64 ; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm0 65 ; CHECK-NEXT: retq 66 %fptoui = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4) 67 %uitofp = uitofp <16 x i32> %fptoui to <16 x float> 68 ret <16 x float> %uitofp 69 } 70 71 define <4 x float> @float_to_uint_to_float_mem_v4f32(<4 x float>* %p) { 72 ; CHECK-LABEL: float_to_uint_to_float_mem_v4f32: 73 ; CHECK: # %bb.0: 74 ; CHECK-NEXT: vcvttps2udq (%rdi), %xmm0 75 ; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 76 ; CHECK-NEXT: retq 77 %x = load <4 x float>, <4 x float>* %p 78 %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x, <4 x i32> undef, i8 -1) 79 %uitofp = uitofp <4 x i32> %fptoui to <4 x float> 80 ret <4 x float> %uitofp 81 } 82 83 define <4 x float> @float_to_uint_to_float_reg_v4f32(<4 x float> %x) { 84 ; CHECK-LABEL: float_to_uint_to_float_reg_v4f32: 85 ; CHECK: # %bb.0: 86 ; CHECK-NEXT: vcvttps2udq %xmm0, %xmm0 87 ; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 88 ; CHECK-NEXT: retq 89 %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x, <4 x i32> undef, i8 -1) 90 %uitofp = uitofp <4 x i32> %fptoui to <4 x float> 91 ret <4 x float> %uitofp 92 } 93 94 define <8 x float> @float_to_uint_to_float_mem_v8f32(<8 x float>* %p) { 95 ; CHECK-LABEL: float_to_uint_to_float_mem_v8f32: 96 ; CHECK: # %bb.0: 97 ; CHECK-NEXT: vcvttps2udq (%rdi), %ymm0 98 ; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 99 ; CHECK-NEXT: retq 100 %x = load <8 x float>, <8 x float>* %p 101 %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x, <8 x i32> undef, i8 -1) 102 %uitofp = uitofp <8 x i32> %fptoui to <8 x float> 103 ret <8 x float> %uitofp 104 } 105 106 define <8 x float> @float_to_uint_to_float_reg_v8f32(<8 x float> %x) { 107 ; CHECK-LABEL: float_to_uint_to_float_reg_v8f32: 108 ; CHECK: # %bb.0: 109 ; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0 110 ; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 111 ; CHECK-NEXT: retq 112 %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x, <8 x i32> undef, i8 -1) 113 %uitofp = uitofp <8 x i32> %fptoui to <8 x float> 114 ret <8 x float> %uitofp 115 } 116 117 define <4 x double> @double_to_uint_to_double_mem_v4f64(<4 x double>* %p) { 118 ; CHECK-LABEL: double_to_uint_to_double_mem_v4f64: 119 ; CHECK: # %bb.0: 120 ; CHECK-NEXT: vcvttpd2udqy (%rdi), %xmm0 121 ; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 122 ; CHECK-NEXT: retq 123 %x = load <4 x double>, <4 x double>* %p 124 %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x, <4 x i32> undef, i8 -1) 125 %uitofp = uitofp <4 x i32> %fptoui to <4 x double> 126 ret <4 x double> %uitofp 127 } 128 129 define <4 x double> @double_to_uint_to_double_reg_v4f64(<4 x double> %x) { 130 ; CHECK-LABEL: double_to_uint_to_double_reg_v4f64: 131 ; CHECK: # %bb.0: 132 ; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm0 133 ; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 134 ; CHECK-NEXT: retq 135 %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x, <4 x i32> undef, i8 -1) 136 %uitofp = uitofp <4 x i32> %fptoui to <4 x double> 137 ret <4 x double> %uitofp 138 } 139 140 define <8 x double> @double_to_sint_to_double_mem_v8f64(<8 x double>* %p) { 141 ; CHECK-LABEL: double_to_sint_to_double_mem_v8f64: 142 ; CHECK: # %bb.0: 143 ; CHECK-NEXT: vcvttpd2dq (%rdi), %ymm0 144 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 145 ; CHECK-NEXT: retq 146 %x = load <8 x double>, <8 x double>* %p 147 %fptosi = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4) 148 %sitofp = sitofp <8 x i32> %fptosi to <8 x double> 149 ret <8 x double> %sitofp 150 } 151 152 define <8 x double> @double_to_sint_to_double_reg_v8f64(<8 x double> %x) { 153 ; CHECK-LABEL: double_to_sint_to_double_reg_v8f64: 154 ; CHECK: # %bb.0: 155 ; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm0 156 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 157 ; CHECK-NEXT: retq 158 %fptosi = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4) 159 %sitofp = sitofp <8 x i32> %fptosi to <8 x double> 160 ret <8 x double> %sitofp 161 } 162 163 define <8 x double> @double_to_uint_to_double_mem_v8f64(<8 x double>* %p) { 164 ; CHECK-LABEL: double_to_uint_to_double_mem_v8f64: 165 ; CHECK: # %bb.0: 166 ; CHECK-NEXT: vcvttpd2udq (%rdi), %ymm0 167 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 168 ; CHECK-NEXT: retq 169 %x = load <8 x double>, <8 x double>* %p 170 %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4) 171 %uitofp = uitofp <8 x i32> %fptoui to <8 x double> 172 ret <8 x double> %uitofp 173 } 174 175 define <8 x double> @double_to_uint_to_double_reg_v8f64(<8 x double> %x) { 176 ; CHECK-LABEL: double_to_uint_to_double_reg_v8f64: 177 ; CHECK: # %bb.0: 178 ; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm0 179 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 180 ; CHECK-NEXT: retq 181 %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4) 182 %uitofp = uitofp <8 x i32> %fptoui to <8 x double> 183 ret <8 x double> %uitofp 184 } 185 186 define <4 x float> @float_to_sint64_to_float_mem_v4f32(<4 x float>* %p) { 187 ; CHECK-LABEL: float_to_sint64_to_float_mem_v4f32: 188 ; CHECK: # %bb.0: 189 ; CHECK-NEXT: vcvttps2qq (%rdi), %ymm0 190 ; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0 191 ; CHECK-NEXT: vzeroupper 192 ; CHECK-NEXT: retq 193 %x = load <4 x float>, <4 x float>* %p 194 %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x, <4 x i64> undef, i8 -1) 195 %sitofp = sitofp <4 x i64> %fptosi to <4 x float> 196 ret <4 x float> %sitofp 197 } 198 199 define <4 x float> @float_to_sint64_to_float_reg_v4f32(<4 x float> %x) { 200 ; CHECK-LABEL: float_to_sint64_to_float_reg_v4f32: 201 ; CHECK: # %bb.0: 202 ; CHECK-NEXT: vcvttps2qq %xmm0, %ymm0 203 ; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0 204 ; CHECK-NEXT: vzeroupper 205 ; CHECK-NEXT: retq 206 %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x, <4 x i64> undef, i8 -1) 207 %sitofp = sitofp <4 x i64> %fptosi to <4 x float> 208 ret <4 x float> %sitofp 209 } 210 211 define <4 x float> @float_to_uint64_to_float_mem_v4f32(<4 x float>* %p) { 212 ; CHECK-LABEL: float_to_uint64_to_float_mem_v4f32: 213 ; CHECK: # %bb.0: 214 ; CHECK-NEXT: vcvttps2uqq (%rdi), %ymm0 215 ; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0 216 ; CHECK-NEXT: vzeroupper 217 ; CHECK-NEXT: retq 218 %x = load <4 x float>, <4 x float>* %p 219 %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x, <4 x i64> undef, i8 -1) 220 %uitofp = uitofp <4 x i64> %fptoui to <4 x float> 221 ret <4 x float> %uitofp 222 } 223 224 define <4 x float> @float_to_uint64_to_float_reg_v4f32(<4 x float> %x) { 225 ; CHECK-LABEL: float_to_uint64_to_float_reg_v4f32: 226 ; CHECK: # %bb.0: 227 ; CHECK-NEXT: vcvttps2uqq %xmm0, %ymm0 228 ; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0 229 ; CHECK-NEXT: vzeroupper 230 ; CHECK-NEXT: retq 231 %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x, <4 x i64> undef, i8 -1) 232 %uitofp = uitofp <4 x i64> %fptoui to <4 x float> 233 ret <4 x float> %uitofp 234 } 235 236 define <8 x float> @float_to_sint64_to_float_mem_v8f32(<8 x float>* %p) { 237 ; CHECK-LABEL: float_to_sint64_to_float_mem_v8f32: 238 ; CHECK: # %bb.0: 239 ; CHECK-NEXT: vcvttps2qq (%rdi), %zmm0 240 ; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm0 241 ; CHECK-NEXT: retq 242 %x = load <8 x float>, <8 x float>* %p 243 %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4) 244 %sitofp = sitofp <8 x i64> %fptosi to <8 x float> 245 ret <8 x float> %sitofp 246 } 247 248 define <8 x float> @float_to_sint64_to_float_reg_v8f32(<8 x float> %x) { 249 ; CHECK-LABEL: float_to_sint64_to_float_reg_v8f32: 250 ; CHECK: # %bb.0: 251 ; CHECK-NEXT: vcvttps2qq %ymm0, %zmm0 252 ; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm0 253 ; CHECK-NEXT: retq 254 %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4) 255 %sitofp = sitofp <8 x i64> %fptosi to <8 x float> 256 ret <8 x float> %sitofp 257 } 258 259 define <8 x float> @float_to_uint64_to_float_mem_v8f32(<8 x float>* %p) { 260 ; CHECK-LABEL: float_to_uint64_to_float_mem_v8f32: 261 ; CHECK: # %bb.0: 262 ; CHECK-NEXT: vcvttps2uqq (%rdi), %zmm0 263 ; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm0 264 ; CHECK-NEXT: retq 265 %x = load <8 x float>, <8 x float>* %p 266 %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4) 267 %uitofp = uitofp <8 x i64> %fptoui to <8 x float> 268 ret <8 x float> %uitofp 269 } 270 271 define <8 x float> @float_to_uint64_to_float_reg_v8f32(<8 x float> %x) { 272 ; CHECK-LABEL: float_to_uint64_to_float_reg_v8f32: 273 ; CHECK: # %bb.0: 274 ; CHECK-NEXT: vcvttps2uqq %ymm0, %zmm0 275 ; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm0 276 ; CHECK-NEXT: retq 277 %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4) 278 %uitofp = uitofp <8 x i64> %fptoui to <8 x float> 279 ret <8 x float> %uitofp 280 } 281 282 define <2 x double> @double_to_sint64_to_double_mem_v2f64(<2 x double>* %p) { 283 ; CHECK-LABEL: double_to_sint64_to_double_mem_v2f64: 284 ; CHECK: # %bb.0: 285 ; CHECK-NEXT: vcvttpd2qq (%rdi), %xmm0 286 ; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 287 ; CHECK-NEXT: retq 288 %x = load <2 x double>, <2 x double>* %p 289 %fptosi = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x, <2 x i64> undef, i8 -1) 290 %sitofp = sitofp <2 x i64> %fptosi to <2 x double> 291 ret <2 x double> %sitofp 292 } 293 294 define <2 x double> @double_to_sint64_to_double_reg_v2f64(<2 x double> %x) { 295 ; CHECK-LABEL: double_to_sint64_to_double_reg_v2f64: 296 ; CHECK: # %bb.0: 297 ; CHECK-NEXT: vcvttpd2qq %xmm0, %xmm0 298 ; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 299 ; CHECK-NEXT: retq 300 %fptosi = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x, <2 x i64> undef, i8 -1) 301 %sitofp = sitofp <2 x i64> %fptosi to <2 x double> 302 ret <2 x double> %sitofp 303 } 304 305 define <2 x double> @double_to_uint64_to_double_mem_v2f64(<2 x double>* %p) { 306 ; CHECK-LABEL: double_to_uint64_to_double_mem_v2f64: 307 ; CHECK: # %bb.0: 308 ; CHECK-NEXT: vcvttpd2uqq (%rdi), %xmm0 309 ; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0 310 ; CHECK-NEXT: retq 311 %x = load <2 x double>, <2 x double>* %p 312 %fptoui = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x, <2 x i64> undef, i8 -1) 313 %uitofp = uitofp <2 x i64> %fptoui to <2 x double> 314 ret <2 x double> %uitofp 315 } 316 317 define <2 x double> @double_to_uint64_to_double_reg_v2f64(<2 x double> %x) { 318 ; CHECK-LABEL: double_to_uint64_to_double_reg_v2f64: 319 ; CHECK: # %bb.0: 320 ; CHECK-NEXT: vcvttpd2uqq %xmm0, %xmm0 321 ; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0 322 ; CHECK-NEXT: retq 323 %fptoui = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x, <2 x i64> undef, i8 -1) 324 %uitofp = uitofp <2 x i64> %fptoui to <2 x double> 325 ret <2 x double> %uitofp 326 } 327 328 define <4 x double> @double_to_sint64_to_double_mem_v4f64(<4 x double>* %p) { 329 ; CHECK-LABEL: double_to_sint64_to_double_mem_v4f64: 330 ; CHECK: # %bb.0: 331 ; CHECK-NEXT: vcvttpd2qq (%rdi), %ymm0 332 ; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0 333 ; CHECK-NEXT: retq 334 %x = load <4 x double>, <4 x double>* %p 335 %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x, <4 x i64> undef, i8 -1) 336 %sitofp = sitofp <4 x i64> %fptosi to <4 x double> 337 ret <4 x double> %sitofp 338 } 339 340 define <4 x double> @double_to_sint64_to_double_reg_v4f64(<4 x double> %x) { 341 ; CHECK-LABEL: double_to_sint64_to_double_reg_v4f64: 342 ; CHECK: # %bb.0: 343 ; CHECK-NEXT: vcvttpd2qq %ymm0, %ymm0 344 ; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0 345 ; CHECK-NEXT: retq 346 %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x, <4 x i64> undef, i8 -1) 347 %sitofp = sitofp <4 x i64> %fptosi to <4 x double> 348 ret <4 x double> %sitofp 349 } 350 351 define <4 x double> @double_to_uint64_to_double_mem_v4f64(<4 x double>* %p) { 352 ; CHECK-LABEL: double_to_uint64_to_double_mem_v4f64: 353 ; CHECK: # %bb.0: 354 ; CHECK-NEXT: vcvttpd2uqq (%rdi), %ymm0 355 ; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm0 356 ; CHECK-NEXT: retq 357 %x = load <4 x double>, <4 x double>* %p 358 %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x, <4 x i64> undef, i8 -1) 359 %uitofp = uitofp <4 x i64> %fptoui to <4 x double> 360 ret <4 x double> %uitofp 361 } 362 363 define <4 x double> @double_to_uint64_to_double_reg_v4f64(<4 x double> %x) { 364 ; CHECK-LABEL: double_to_uint64_to_double_reg_v4f64: 365 ; CHECK: # %bb.0: 366 ; CHECK-NEXT: vcvttpd2uqq %ymm0, %ymm0 367 ; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm0 368 ; CHECK-NEXT: retq 369 %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x, <4 x i64> undef, i8 -1) 370 %uitofp = uitofp <4 x i64> %fptoui to <4 x double> 371 ret <4 x double> %uitofp 372 } 373 374 define <8 x double> @double_to_sint64_to_double_mem_v8f64(<8 x double>* %p) { 375 ; CHECK-LABEL: double_to_sint64_to_double_mem_v8f64: 376 ; CHECK: # %bb.0: 377 ; CHECK-NEXT: vcvttpd2qq (%rdi), %zmm0 378 ; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm0 379 ; CHECK-NEXT: retq 380 %x = load <8 x double>, <8 x double>* %p 381 %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4) 382 %sitofp = sitofp <8 x i64> %fptosi to <8 x double> 383 ret <8 x double> %sitofp 384 } 385 386 define <8 x double> @double_to_sint64_to_double_reg_v8f64(<8 x double> %x) { 387 ; CHECK-LABEL: double_to_sint64_to_double_reg_v8f64: 388 ; CHECK: # %bb.0: 389 ; CHECK-NEXT: vcvttpd2qq %zmm0, %zmm0 390 ; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm0 391 ; CHECK-NEXT: retq 392 %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4) 393 %sitofp = sitofp <8 x i64> %fptosi to <8 x double> 394 ret <8 x double> %sitofp 395 } 396 397 define <8 x double> @double_to_uint64_to_double_mem_v8f64(<8 x double>* %p) { 398 ; CHECK-LABEL: double_to_uint64_to_double_mem_v8f64: 399 ; CHECK: # %bb.0: 400 ; CHECK-NEXT: vcvttpd2uqq (%rdi), %zmm0 401 ; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm0 402 ; CHECK-NEXT: retq 403 %x = load <8 x double>, <8 x double>* %p 404 %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4) 405 %uitofp = uitofp <8 x i64> %fptoui to <8 x double> 406 ret <8 x double> %uitofp 407 } 408 409 define <8 x double> @double_to_uint64_to_double_reg_v8f64(<8 x double> %x) { 410 ; CHECK-LABEL: double_to_uint64_to_double_reg_v8f64: 411 ; CHECK: # %bb.0: 412 ; CHECK-NEXT: vcvttpd2uqq %zmm0, %zmm0 413 ; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm0 414 ; CHECK-NEXT: retq 415 %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4) 416 %uitofp = uitofp <8 x i64> %fptoui to <8 x double> 417 ret <8 x double> %uitofp 418 } 419