Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f,avx512vl,avx512dq | FileCheck %s --check-prefixes=CHECK
      3 
      4 ; PR37751 - https://bugs.llvm.org/show_bug.cgi?id=37751
      5 ; We can't combine into 'round' instructions because the behavior is different for out-of-range values.
      6 
      7 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32)
      8 declare <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float>, <4 x i32>, i8)
      9 declare <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float>, <8 x i32>, i8)
     10 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32)
     11 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double>, <4 x i32>, i8)
     12 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
     13 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
     14 declare <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float>, <4 x i64>, i8)
     15 declare <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float>, <8 x i64>, i8, i32)
     16 declare <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float>, <4 x i64>, i8)
     17 declare <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float>, <8 x i64>, i8, i32)
     18 declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double>, <2 x i64>, i8)
     19 declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double>, <4 x i64>, i8)
     20 declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
     21 declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double>, <2 x i64>, i8)
     22 declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double>, <4 x i64>, i8)
     23 declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double>, <8 x i64>, i8, i32)
     24 
     25 define <16 x float> @float_to_sint_to_float_mem_v16f32(<16 x float>* %p) {
     26 ; CHECK-LABEL: float_to_sint_to_float_mem_v16f32:
     27 ; CHECK:       # %bb.0:
     28 ; CHECK-NEXT:    vcvttps2dq (%rdi), %zmm0
     29 ; CHECK-NEXT:    vcvtdq2ps %zmm0, %zmm0
     30 ; CHECK-NEXT:    retq
     31   %x = load <16 x float>, <16 x float>* %p
     32   %fptosi = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4)
     33   %sitofp = sitofp <16 x i32> %fptosi to <16 x float>
     34   ret <16 x float> %sitofp
     35 }
     36 
     37 define <16 x float> @float_to_sint_to_float_reg_v16f32(<16 x float> %x) {
     38 ; CHECK-LABEL: float_to_sint_to_float_reg_v16f32:
     39 ; CHECK:       # %bb.0:
     40 ; CHECK-NEXT:    vcvttps2dq %zmm0, %zmm0
     41 ; CHECK-NEXT:    vcvtdq2ps %zmm0, %zmm0
     42 ; CHECK-NEXT:    retq
     43   %fptosi = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4)
     44   %sitofp = sitofp <16 x i32> %fptosi to <16 x float>
     45   ret <16 x float> %sitofp
     46 }
     47 
     48 define <16 x float> @float_to_uint_to_float_mem_v16f32(<16 x float>* %p) {
     49 ; CHECK-LABEL: float_to_uint_to_float_mem_v16f32:
     50 ; CHECK:       # %bb.0:
     51 ; CHECK-NEXT:    vcvttps2udq (%rdi), %zmm0
     52 ; CHECK-NEXT:    vcvtudq2ps %zmm0, %zmm0
     53 ; CHECK-NEXT:    retq
     54   %x = load <16 x float>, <16 x float>* %p
     55   %fptoui = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4)
     56   %uitofp = uitofp <16 x i32> %fptoui to <16 x float>
     57   ret <16 x float> %uitofp
     58 }
     59 
     60 define <16 x float> @float_to_uint_to_float_reg_v16f32(<16 x float> %x) {
     61 ; CHECK-LABEL: float_to_uint_to_float_reg_v16f32:
     62 ; CHECK:       # %bb.0:
     63 ; CHECK-NEXT:    vcvttps2udq %zmm0, %zmm0
     64 ; CHECK-NEXT:    vcvtudq2ps %zmm0, %zmm0
     65 ; CHECK-NEXT:    retq
     66   %fptoui = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4)
     67   %uitofp = uitofp <16 x i32> %fptoui to <16 x float>
     68   ret <16 x float> %uitofp
     69 }
     70 
     71 define <4 x float> @float_to_uint_to_float_mem_v4f32(<4 x float>* %p) {
     72 ; CHECK-LABEL: float_to_uint_to_float_mem_v4f32:
     73 ; CHECK:       # %bb.0:
     74 ; CHECK-NEXT:    vcvttps2udq (%rdi), %xmm0
     75 ; CHECK-NEXT:    vcvtudq2ps %xmm0, %xmm0
     76 ; CHECK-NEXT:    retq
     77   %x = load <4 x float>, <4 x float>* %p
     78   %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x, <4 x i32> undef, i8 -1)
     79   %uitofp = uitofp <4 x i32> %fptoui to <4 x float>
     80   ret <4 x float> %uitofp
     81 }
     82 
     83 define <4 x float> @float_to_uint_to_float_reg_v4f32(<4 x float> %x) {
     84 ; CHECK-LABEL: float_to_uint_to_float_reg_v4f32:
     85 ; CHECK:       # %bb.0:
     86 ; CHECK-NEXT:    vcvttps2udq %xmm0, %xmm0
     87 ; CHECK-NEXT:    vcvtudq2ps %xmm0, %xmm0
     88 ; CHECK-NEXT:    retq
     89   %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x, <4 x i32> undef, i8 -1)
     90   %uitofp = uitofp <4 x i32> %fptoui to <4 x float>
     91   ret <4 x float> %uitofp
     92 }
     93 
     94 define <8 x float> @float_to_uint_to_float_mem_v8f32(<8 x float>* %p) {
     95 ; CHECK-LABEL: float_to_uint_to_float_mem_v8f32:
     96 ; CHECK:       # %bb.0:
     97 ; CHECK-NEXT:    vcvttps2udq (%rdi), %ymm0
     98 ; CHECK-NEXT:    vcvtudq2ps %ymm0, %ymm0
     99 ; CHECK-NEXT:    retq
    100   %x = load <8 x float>, <8 x float>* %p
    101   %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x, <8 x i32> undef, i8 -1)
    102   %uitofp = uitofp <8 x i32> %fptoui to <8 x float>
    103   ret <8 x float> %uitofp
    104 }
    105 
    106 define <8 x float> @float_to_uint_to_float_reg_v8f32(<8 x float> %x) {
    107 ; CHECK-LABEL: float_to_uint_to_float_reg_v8f32:
    108 ; CHECK:       # %bb.0:
    109 ; CHECK-NEXT:    vcvttps2udq %ymm0, %ymm0
    110 ; CHECK-NEXT:    vcvtudq2ps %ymm0, %ymm0
    111 ; CHECK-NEXT:    retq
    112   %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x, <8 x i32> undef, i8 -1)
    113   %uitofp = uitofp <8 x i32> %fptoui to <8 x float>
    114   ret <8 x float> %uitofp
    115 }
    116 
    117 define <4 x double> @double_to_uint_to_double_mem_v4f64(<4 x double>* %p) {
    118 ; CHECK-LABEL: double_to_uint_to_double_mem_v4f64:
    119 ; CHECK:       # %bb.0:
    120 ; CHECK-NEXT:    vcvttpd2udqy (%rdi), %xmm0
    121 ; CHECK-NEXT:    vcvtudq2pd %xmm0, %ymm0
    122 ; CHECK-NEXT:    retq
    123   %x = load <4 x double>, <4 x double>* %p
    124   %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x, <4 x i32> undef, i8 -1)
    125   %uitofp = uitofp <4 x i32> %fptoui to <4 x double>
    126   ret <4 x double> %uitofp
    127 }
    128 
    129 define <4 x double> @double_to_uint_to_double_reg_v4f64(<4 x double> %x) {
    130 ; CHECK-LABEL: double_to_uint_to_double_reg_v4f64:
    131 ; CHECK:       # %bb.0:
    132 ; CHECK-NEXT:    vcvttpd2udq %ymm0, %xmm0
    133 ; CHECK-NEXT:    vcvtudq2pd %xmm0, %ymm0
    134 ; CHECK-NEXT:    retq
    135   %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x, <4 x i32> undef, i8 -1)
    136   %uitofp = uitofp <4 x i32> %fptoui to <4 x double>
    137   ret <4 x double> %uitofp
    138 }
    139 
    140 define <8 x double> @double_to_sint_to_double_mem_v8f64(<8 x double>* %p) {
    141 ; CHECK-LABEL: double_to_sint_to_double_mem_v8f64:
    142 ; CHECK:       # %bb.0:
    143 ; CHECK-NEXT:    vcvttpd2dq (%rdi), %ymm0
    144 ; CHECK-NEXT:    vcvtdq2pd %ymm0, %zmm0
    145 ; CHECK-NEXT:    retq
    146   %x = load <8 x double>, <8 x double>* %p
    147   %fptosi = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4)
    148   %sitofp = sitofp <8 x i32> %fptosi to <8 x double>
    149   ret <8 x double> %sitofp
    150 }
    151 
    152 define <8 x double> @double_to_sint_to_double_reg_v8f64(<8 x double> %x) {
    153 ; CHECK-LABEL: double_to_sint_to_double_reg_v8f64:
    154 ; CHECK:       # %bb.0:
    155 ; CHECK-NEXT:    vcvttpd2dq %zmm0, %ymm0
    156 ; CHECK-NEXT:    vcvtdq2pd %ymm0, %zmm0
    157 ; CHECK-NEXT:    retq
    158   %fptosi = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4)
    159   %sitofp = sitofp <8 x i32> %fptosi to <8 x double>
    160   ret <8 x double> %sitofp
    161 }
    162 
    163 define <8 x double> @double_to_uint_to_double_mem_v8f64(<8 x double>* %p) {
    164 ; CHECK-LABEL: double_to_uint_to_double_mem_v8f64:
    165 ; CHECK:       # %bb.0:
    166 ; CHECK-NEXT:    vcvttpd2udq (%rdi), %ymm0
    167 ; CHECK-NEXT:    vcvtudq2pd %ymm0, %zmm0
    168 ; CHECK-NEXT:    retq
    169   %x = load <8 x double>, <8 x double>* %p
    170   %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4)
    171   %uitofp = uitofp <8 x i32> %fptoui to <8 x double>
    172   ret <8 x double> %uitofp
    173 }
    174 
    175 define <8 x double> @double_to_uint_to_double_reg_v8f64(<8 x double> %x) {
    176 ; CHECK-LABEL: double_to_uint_to_double_reg_v8f64:
    177 ; CHECK:       # %bb.0:
    178 ; CHECK-NEXT:    vcvttpd2udq %zmm0, %ymm0
    179 ; CHECK-NEXT:    vcvtudq2pd %ymm0, %zmm0
    180 ; CHECK-NEXT:    retq
    181   %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4)
    182   %uitofp = uitofp <8 x i32> %fptoui to <8 x double>
    183   ret <8 x double> %uitofp
    184 }
    185 
    186 define <4 x float> @float_to_sint64_to_float_mem_v4f32(<4 x float>* %p) {
    187 ; CHECK-LABEL: float_to_sint64_to_float_mem_v4f32:
    188 ; CHECK:       # %bb.0:
    189 ; CHECK-NEXT:    vcvttps2qq (%rdi), %ymm0
    190 ; CHECK-NEXT:    vcvtqq2ps %ymm0, %xmm0
    191 ; CHECK-NEXT:    vzeroupper
    192 ; CHECK-NEXT:    retq
    193   %x = load <4 x float>, <4 x float>* %p
    194   %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x, <4 x i64> undef, i8 -1)
    195   %sitofp = sitofp <4 x i64> %fptosi to <4 x float>
    196   ret <4 x float> %sitofp
    197 }
    198 
    199 define <4 x float> @float_to_sint64_to_float_reg_v4f32(<4 x float> %x) {
    200 ; CHECK-LABEL: float_to_sint64_to_float_reg_v4f32:
    201 ; CHECK:       # %bb.0:
    202 ; CHECK-NEXT:    vcvttps2qq %xmm0, %ymm0
    203 ; CHECK-NEXT:    vcvtqq2ps %ymm0, %xmm0
    204 ; CHECK-NEXT:    vzeroupper
    205 ; CHECK-NEXT:    retq
    206   %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x, <4 x i64> undef, i8 -1)
    207   %sitofp = sitofp <4 x i64> %fptosi to <4 x float>
    208   ret <4 x float> %sitofp
    209 }
    210 
    211 define <4 x float> @float_to_uint64_to_float_mem_v4f32(<4 x float>* %p) {
    212 ; CHECK-LABEL: float_to_uint64_to_float_mem_v4f32:
    213 ; CHECK:       # %bb.0:
    214 ; CHECK-NEXT:    vcvttps2uqq (%rdi), %ymm0
    215 ; CHECK-NEXT:    vcvtuqq2ps %ymm0, %xmm0
    216 ; CHECK-NEXT:    vzeroupper
    217 ; CHECK-NEXT:    retq
    218   %x = load <4 x float>, <4 x float>* %p
    219   %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x, <4 x i64> undef, i8 -1)
    220   %uitofp = uitofp <4 x i64> %fptoui to <4 x float>
    221   ret <4 x float> %uitofp
    222 }
    223 
    224 define <4 x float> @float_to_uint64_to_float_reg_v4f32(<4 x float> %x) {
    225 ; CHECK-LABEL: float_to_uint64_to_float_reg_v4f32:
    226 ; CHECK:       # %bb.0:
    227 ; CHECK-NEXT:    vcvttps2uqq %xmm0, %ymm0
    228 ; CHECK-NEXT:    vcvtuqq2ps %ymm0, %xmm0
    229 ; CHECK-NEXT:    vzeroupper
    230 ; CHECK-NEXT:    retq
    231   %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x, <4 x i64> undef, i8 -1)
    232   %uitofp = uitofp <4 x i64> %fptoui to <4 x float>
    233   ret <4 x float> %uitofp
    234 }
    235 
    236 define <8 x float> @float_to_sint64_to_float_mem_v8f32(<8 x float>* %p) {
    237 ; CHECK-LABEL: float_to_sint64_to_float_mem_v8f32:
    238 ; CHECK:       # %bb.0:
    239 ; CHECK-NEXT:    vcvttps2qq (%rdi), %zmm0
    240 ; CHECK-NEXT:    vcvtqq2ps %zmm0, %ymm0
    241 ; CHECK-NEXT:    retq
    242   %x = load <8 x float>, <8 x float>* %p
    243   %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4)
    244   %sitofp = sitofp <8 x i64> %fptosi to <8 x float>
    245   ret <8 x float> %sitofp
    246 }
    247 
    248 define <8 x float> @float_to_sint64_to_float_reg_v8f32(<8 x float> %x) {
    249 ; CHECK-LABEL: float_to_sint64_to_float_reg_v8f32:
    250 ; CHECK:       # %bb.0:
    251 ; CHECK-NEXT:    vcvttps2qq %ymm0, %zmm0
    252 ; CHECK-NEXT:    vcvtqq2ps %zmm0, %ymm0
    253 ; CHECK-NEXT:    retq
    254   %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4)
    255   %sitofp = sitofp <8 x i64> %fptosi to <8 x float>
    256   ret <8 x float> %sitofp
    257 }
    258 
    259 define <8 x float> @float_to_uint64_to_float_mem_v8f32(<8 x float>* %p) {
    260 ; CHECK-LABEL: float_to_uint64_to_float_mem_v8f32:
    261 ; CHECK:       # %bb.0:
    262 ; CHECK-NEXT:    vcvttps2uqq (%rdi), %zmm0
    263 ; CHECK-NEXT:    vcvtuqq2ps %zmm0, %ymm0
    264 ; CHECK-NEXT:    retq
    265   %x = load <8 x float>, <8 x float>* %p
    266   %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4)
    267   %uitofp = uitofp <8 x i64> %fptoui to <8 x float>
    268   ret <8 x float> %uitofp
    269 }
    270 
    271 define <8 x float> @float_to_uint64_to_float_reg_v8f32(<8 x float> %x) {
    272 ; CHECK-LABEL: float_to_uint64_to_float_reg_v8f32:
    273 ; CHECK:       # %bb.0:
    274 ; CHECK-NEXT:    vcvttps2uqq %ymm0, %zmm0
    275 ; CHECK-NEXT:    vcvtuqq2ps %zmm0, %ymm0
    276 ; CHECK-NEXT:    retq
    277   %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4)
    278   %uitofp = uitofp <8 x i64> %fptoui to <8 x float>
    279   ret <8 x float> %uitofp
    280 }
    281 
    282 define <2 x double> @double_to_sint64_to_double_mem_v2f64(<2 x double>* %p) {
    283 ; CHECK-LABEL: double_to_sint64_to_double_mem_v2f64:
    284 ; CHECK:       # %bb.0:
    285 ; CHECK-NEXT:    vcvttpd2qq (%rdi), %xmm0
    286 ; CHECK-NEXT:    vcvtqq2pd %xmm0, %xmm0
    287 ; CHECK-NEXT:    retq
    288   %x = load <2 x double>, <2 x double>* %p
    289   %fptosi = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x, <2 x i64> undef, i8 -1)
    290   %sitofp = sitofp <2 x i64> %fptosi to <2 x double>
    291   ret <2 x double> %sitofp
    292 }
    293 
    294 define <2 x double> @double_to_sint64_to_double_reg_v2f64(<2 x double> %x) {
    295 ; CHECK-LABEL: double_to_sint64_to_double_reg_v2f64:
    296 ; CHECK:       # %bb.0:
    297 ; CHECK-NEXT:    vcvttpd2qq %xmm0, %xmm0
    298 ; CHECK-NEXT:    vcvtqq2pd %xmm0, %xmm0
    299 ; CHECK-NEXT:    retq
    300   %fptosi = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x, <2 x i64> undef, i8 -1)
    301   %sitofp = sitofp <2 x i64> %fptosi to <2 x double>
    302   ret <2 x double> %sitofp
    303 }
    304 
    305 define <2 x double> @double_to_uint64_to_double_mem_v2f64(<2 x double>* %p) {
    306 ; CHECK-LABEL: double_to_uint64_to_double_mem_v2f64:
    307 ; CHECK:       # %bb.0:
    308 ; CHECK-NEXT:    vcvttpd2uqq (%rdi), %xmm0
    309 ; CHECK-NEXT:    vcvtuqq2pd %xmm0, %xmm0
    310 ; CHECK-NEXT:    retq
    311   %x = load <2 x double>, <2 x double>* %p
    312   %fptoui = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x, <2 x i64> undef, i8 -1)
    313   %uitofp = uitofp <2 x i64> %fptoui to <2 x double>
    314   ret <2 x double> %uitofp
    315 }
    316 
    317 define <2 x double> @double_to_uint64_to_double_reg_v2f64(<2 x double> %x) {
    318 ; CHECK-LABEL: double_to_uint64_to_double_reg_v2f64:
    319 ; CHECK:       # %bb.0:
    320 ; CHECK-NEXT:    vcvttpd2uqq %xmm0, %xmm0
    321 ; CHECK-NEXT:    vcvtuqq2pd %xmm0, %xmm0
    322 ; CHECK-NEXT:    retq
    323   %fptoui = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x, <2 x i64> undef, i8 -1)
    324   %uitofp = uitofp <2 x i64> %fptoui to <2 x double>
    325   ret <2 x double> %uitofp
    326 }
    327 
    328 define <4 x double> @double_to_sint64_to_double_mem_v4f64(<4 x double>* %p) {
    329 ; CHECK-LABEL: double_to_sint64_to_double_mem_v4f64:
    330 ; CHECK:       # %bb.0:
    331 ; CHECK-NEXT:    vcvttpd2qq (%rdi), %ymm0
    332 ; CHECK-NEXT:    vcvtqq2pd %ymm0, %ymm0
    333 ; CHECK-NEXT:    retq
    334   %x = load <4 x double>, <4 x double>* %p
    335   %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x, <4 x i64> undef, i8 -1)
    336   %sitofp = sitofp <4 x i64> %fptosi to <4 x double>
    337   ret <4 x double> %sitofp
    338 }
    339 
    340 define <4 x double> @double_to_sint64_to_double_reg_v4f64(<4 x double> %x) {
    341 ; CHECK-LABEL: double_to_sint64_to_double_reg_v4f64:
    342 ; CHECK:       # %bb.0:
    343 ; CHECK-NEXT:    vcvttpd2qq %ymm0, %ymm0
    344 ; CHECK-NEXT:    vcvtqq2pd %ymm0, %ymm0
    345 ; CHECK-NEXT:    retq
    346   %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x, <4 x i64> undef, i8 -1)
    347   %sitofp = sitofp <4 x i64> %fptosi to <4 x double>
    348   ret <4 x double> %sitofp
    349 }
    350 
    351 define <4 x double> @double_to_uint64_to_double_mem_v4f64(<4 x double>* %p) {
    352 ; CHECK-LABEL: double_to_uint64_to_double_mem_v4f64:
    353 ; CHECK:       # %bb.0:
    354 ; CHECK-NEXT:    vcvttpd2uqq (%rdi), %ymm0
    355 ; CHECK-NEXT:    vcvtuqq2pd %ymm0, %ymm0
    356 ; CHECK-NEXT:    retq
    357   %x = load <4 x double>, <4 x double>* %p
    358   %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x, <4 x i64> undef, i8 -1)
    359   %uitofp = uitofp <4 x i64> %fptoui to <4 x double>
    360   ret <4 x double> %uitofp
    361 }
    362 
    363 define <4 x double> @double_to_uint64_to_double_reg_v4f64(<4 x double> %x) {
    364 ; CHECK-LABEL: double_to_uint64_to_double_reg_v4f64:
    365 ; CHECK:       # %bb.0:
    366 ; CHECK-NEXT:    vcvttpd2uqq %ymm0, %ymm0
    367 ; CHECK-NEXT:    vcvtuqq2pd %ymm0, %ymm0
    368 ; CHECK-NEXT:    retq
    369   %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x, <4 x i64> undef, i8 -1)
    370   %uitofp = uitofp <4 x i64> %fptoui to <4 x double>
    371   ret <4 x double> %uitofp
    372 }
    373 
    374 define <8 x double> @double_to_sint64_to_double_mem_v8f64(<8 x double>* %p) {
    375 ; CHECK-LABEL: double_to_sint64_to_double_mem_v8f64:
    376 ; CHECK:       # %bb.0:
    377 ; CHECK-NEXT:    vcvttpd2qq (%rdi), %zmm0
    378 ; CHECK-NEXT:    vcvtqq2pd %zmm0, %zmm0
    379 ; CHECK-NEXT:    retq
    380   %x = load <8 x double>, <8 x double>* %p
    381   %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4)
    382   %sitofp = sitofp <8 x i64> %fptosi to <8 x double>
    383   ret <8 x double> %sitofp
    384 }
    385 
    386 define <8 x double> @double_to_sint64_to_double_reg_v8f64(<8 x double> %x) {
    387 ; CHECK-LABEL: double_to_sint64_to_double_reg_v8f64:
    388 ; CHECK:       # %bb.0:
    389 ; CHECK-NEXT:    vcvttpd2qq %zmm0, %zmm0
    390 ; CHECK-NEXT:    vcvtqq2pd %zmm0, %zmm0
    391 ; CHECK-NEXT:    retq
    392   %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4)
    393   %sitofp = sitofp <8 x i64> %fptosi to <8 x double>
    394   ret <8 x double> %sitofp
    395 }
    396 
    397 define <8 x double> @double_to_uint64_to_double_mem_v8f64(<8 x double>* %p) {
    398 ; CHECK-LABEL: double_to_uint64_to_double_mem_v8f64:
    399 ; CHECK:       # %bb.0:
    400 ; CHECK-NEXT:    vcvttpd2uqq (%rdi), %zmm0
    401 ; CHECK-NEXT:    vcvtuqq2pd %zmm0, %zmm0
    402 ; CHECK-NEXT:    retq
    403   %x = load <8 x double>, <8 x double>* %p
    404   %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4)
    405   %uitofp = uitofp <8 x i64> %fptoui to <8 x double>
    406   ret <8 x double> %uitofp
    407 }
    408 
    409 define <8 x double> @double_to_uint64_to_double_reg_v8f64(<8 x double> %x) {
    410 ; CHECK-LABEL: double_to_uint64_to_double_reg_v8f64:
    411 ; CHECK:       # %bb.0:
    412 ; CHECK-NEXT:    vcvttpd2uqq %zmm0, %zmm0
    413 ; CHECK-NEXT:    vcvtuqq2pd %zmm0, %zmm0
    414 ; CHECK-NEXT:    retq
    415   %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4)
    416   %uitofp = uitofp <8 x i64> %fptoui to <8 x double>
    417   ret <8 x double> %uitofp
    418 }
    419