1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64 4 5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vldq-builtins.c 6 7 define <2 x double> @test_mm_cvtepi64_pd(<2 x i64> %__A) { 8 ; CHECK-LABEL: test_mm_cvtepi64_pd: 9 ; CHECK: # %bb.0: # %entry 10 ; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 11 ; CHECK-NEXT: ret{{[l|q]}} 12 entry: 13 %conv.i = sitofp <2 x i64> %__A to <2 x double> 14 ret <2 x double> %conv.i 15 } 16 17 define <2 x double> @test_mm_mask_cvtepi64_pd(<2 x double> %__W, i8 zeroext %__U, <2 x i64> %__A) { 18 ; X86-LABEL: test_mm_mask_cvtepi64_pd: 19 ; X86: # %bb.0: # %entry 20 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 21 ; X86-NEXT: vcvtqq2pd %xmm1, %xmm0 {%k1} 22 ; X86-NEXT: retl 23 ; 24 ; X64-LABEL: test_mm_mask_cvtepi64_pd: 25 ; X64: # %bb.0: # %entry 26 ; X64-NEXT: kmovw %edi, %k1 27 ; X64-NEXT: vcvtqq2pd %xmm1, %xmm0 {%k1} 28 ; X64-NEXT: retq 29 entry: 30 %conv.i.i = sitofp <2 x i64> %__A to <2 x double> 31 %0 = bitcast i8 %__U to <8 x i1> 32 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 33 %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> %__W 34 ret <2 x double> %1 35 } 36 37 define <2 x double> @test_mm_maskz_cvtepi64_pd(i8 zeroext %__U, <2 x i64> %__A) { 38 ; X86-LABEL: test_mm_maskz_cvtepi64_pd: 39 ; X86: # %bb.0: # %entry 40 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 41 ; X86-NEXT: vcvtqq2pd %xmm0, %xmm0 {%k1} {z} 42 ; X86-NEXT: retl 43 ; 44 ; X64-LABEL: test_mm_maskz_cvtepi64_pd: 45 ; X64: # %bb.0: # %entry 46 ; X64-NEXT: kmovw %edi, %k1 47 ; X64-NEXT: vcvtqq2pd %xmm0, %xmm0 {%k1} {z} 48 ; X64-NEXT: retq 49 entry: 50 %conv.i.i = sitofp <2 x i64> %__A to <2 x double> 51 %0 = bitcast i8 %__U to <8 x i1> 52 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 53 %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> zeroinitializer 54 ret <2 x double> %1 55 } 56 57 define <4 x double> @test_mm256_cvtepi64_pd(<4 x i64> %__A) { 58 ; CHECK-LABEL: test_mm256_cvtepi64_pd: 59 ; CHECK: # %bb.0: # %entry 60 ; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0 61 ; CHECK-NEXT: ret{{[l|q]}} 62 entry: 63 %conv.i = sitofp <4 x i64> %__A to <4 x double> 64 ret <4 x double> %conv.i 65 } 66 67 define <4 x double> @test_mm256_mask_cvtepi64_pd(<4 x double> %__W, i8 zeroext %__U, <4 x i64> %__A) { 68 ; X86-LABEL: test_mm256_mask_cvtepi64_pd: 69 ; X86: # %bb.0: # %entry 70 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 71 ; X86-NEXT: vcvtqq2pd %ymm1, %ymm0 {%k1} 72 ; X86-NEXT: retl 73 ; 74 ; X64-LABEL: test_mm256_mask_cvtepi64_pd: 75 ; X64: # %bb.0: # %entry 76 ; X64-NEXT: kmovw %edi, %k1 77 ; X64-NEXT: vcvtqq2pd %ymm1, %ymm0 {%k1} 78 ; X64-NEXT: retq 79 entry: 80 %conv.i.i = sitofp <4 x i64> %__A to <4 x double> 81 %0 = bitcast i8 %__U to <8 x i1> 82 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 83 %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> %__W 84 ret <4 x double> %1 85 } 86 87 define <4 x double> @test_mm256_maskz_cvtepi64_pd(i8 zeroext %__U, <4 x i64> %__A) { 88 ; X86-LABEL: test_mm256_maskz_cvtepi64_pd: 89 ; X86: # %bb.0: # %entry 90 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 91 ; X86-NEXT: vcvtqq2pd %ymm0, %ymm0 {%k1} {z} 92 ; X86-NEXT: retl 93 ; 94 ; X64-LABEL: test_mm256_maskz_cvtepi64_pd: 95 ; X64: # %bb.0: # %entry 96 ; X64-NEXT: kmovw %edi, %k1 97 ; X64-NEXT: vcvtqq2pd %ymm0, %ymm0 {%k1} {z} 98 ; X64-NEXT: retq 99 entry: 100 %conv.i.i = sitofp <4 x i64> %__A to <4 x double> 101 %0 = bitcast i8 %__U to <8 x i1> 102 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 103 %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> zeroinitializer 104 ret <4 x double> %1 105 } 106 107 define <2 x double> @test_mm_cvtepu64_pd(<2 x i64> %__A) { 108 ; CHECK-LABEL: test_mm_cvtepu64_pd: 109 ; CHECK: # %bb.0: # %entry 110 ; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0 111 ; CHECK-NEXT: ret{{[l|q]}} 112 entry: 113 %conv.i = uitofp <2 x i64> %__A to <2 x double> 114 ret <2 x double> %conv.i 115 } 116 117 define <2 x double> @test_mm_mask_cvtepu64_pd(<2 x double> %__W, i8 zeroext %__U, <2 x i64> %__A) { 118 ; X86-LABEL: test_mm_mask_cvtepu64_pd: 119 ; X86: # %bb.0: # %entry 120 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 121 ; X86-NEXT: vcvtuqq2pd %xmm1, %xmm0 {%k1} 122 ; X86-NEXT: retl 123 ; 124 ; X64-LABEL: test_mm_mask_cvtepu64_pd: 125 ; X64: # %bb.0: # %entry 126 ; X64-NEXT: kmovw %edi, %k1 127 ; X64-NEXT: vcvtuqq2pd %xmm1, %xmm0 {%k1} 128 ; X64-NEXT: retq 129 entry: 130 %conv.i.i = uitofp <2 x i64> %__A to <2 x double> 131 %0 = bitcast i8 %__U to <8 x i1> 132 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 133 %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> %__W 134 ret <2 x double> %1 135 } 136 137 define <2 x double> @test_mm_maskz_cvtepu64_pd(i8 zeroext %__U, <2 x i64> %__A) { 138 ; X86-LABEL: test_mm_maskz_cvtepu64_pd: 139 ; X86: # %bb.0: # %entry 140 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 141 ; X86-NEXT: vcvtuqq2pd %xmm0, %xmm0 {%k1} {z} 142 ; X86-NEXT: retl 143 ; 144 ; X64-LABEL: test_mm_maskz_cvtepu64_pd: 145 ; X64: # %bb.0: # %entry 146 ; X64-NEXT: kmovw %edi, %k1 147 ; X64-NEXT: vcvtuqq2pd %xmm0, %xmm0 {%k1} {z} 148 ; X64-NEXT: retq 149 entry: 150 %conv.i.i = uitofp <2 x i64> %__A to <2 x double> 151 %0 = bitcast i8 %__U to <8 x i1> 152 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 153 %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> zeroinitializer 154 ret <2 x double> %1 155 } 156 157 define <4 x double> @test_mm256_cvtepu64_pd(<4 x i64> %__A) { 158 ; CHECK-LABEL: test_mm256_cvtepu64_pd: 159 ; CHECK: # %bb.0: # %entry 160 ; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm0 161 ; CHECK-NEXT: ret{{[l|q]}} 162 entry: 163 %conv.i = uitofp <4 x i64> %__A to <4 x double> 164 ret <4 x double> %conv.i 165 } 166 167 define <4 x double> @test_mm256_mask_cvtepu64_pd(<4 x double> %__W, i8 zeroext %__U, <4 x i64> %__A) { 168 ; X86-LABEL: test_mm256_mask_cvtepu64_pd: 169 ; X86: # %bb.0: # %entry 170 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 171 ; X86-NEXT: vcvtuqq2pd %ymm1, %ymm0 {%k1} 172 ; X86-NEXT: retl 173 ; 174 ; X64-LABEL: test_mm256_mask_cvtepu64_pd: 175 ; X64: # %bb.0: # %entry 176 ; X64-NEXT: kmovw %edi, %k1 177 ; X64-NEXT: vcvtuqq2pd %ymm1, %ymm0 {%k1} 178 ; X64-NEXT: retq 179 entry: 180 %conv.i.i = uitofp <4 x i64> %__A to <4 x double> 181 %0 = bitcast i8 %__U to <8 x i1> 182 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 183 %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> %__W 184 ret <4 x double> %1 185 } 186 187 define <4 x double> @test_mm256_maskz_cvtepu64_pd(i8 zeroext %__U, <4 x i64> %__A) { 188 ; X86-LABEL: test_mm256_maskz_cvtepu64_pd: 189 ; X86: # %bb.0: # %entry 190 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 191 ; X86-NEXT: vcvtuqq2pd %ymm0, %ymm0 {%k1} {z} 192 ; X86-NEXT: retl 193 ; 194 ; X64-LABEL: test_mm256_maskz_cvtepu64_pd: 195 ; X64: # %bb.0: # %entry 196 ; X64-NEXT: kmovw %edi, %k1 197 ; X64-NEXT: vcvtuqq2pd %ymm0, %ymm0 {%k1} {z} 198 ; X64-NEXT: retq 199 entry: 200 %conv.i.i = uitofp <4 x i64> %__A to <4 x double> 201 %0 = bitcast i8 %__U to <8 x i1> 202 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 203 %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> zeroinitializer 204 ret <4 x double> %1 205 } 206 207 define zeroext i8 @test_mm_mask_fpclass_pd_mask(i8 zeroext %__U, <2 x double> %__A) { 208 ; X86-LABEL: test_mm_mask_fpclass_pd_mask: 209 ; X86: # %bb.0: # %entry 210 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 211 ; X86-NEXT: vfpclasspd $2, %xmm0, %k0 {%k1} 212 ; X86-NEXT: kmovw %k0, %eax 213 ; X86-NEXT: # kill: def $al killed $al killed $eax 214 ; X86-NEXT: retl 215 ; 216 ; X64-LABEL: test_mm_mask_fpclass_pd_mask: 217 ; X64: # %bb.0: # %entry 218 ; X64-NEXT: kmovw %edi, %k1 219 ; X64-NEXT: vfpclasspd $2, %xmm0, %k0 {%k1} 220 ; X64-NEXT: kmovw %k0, %eax 221 ; X64-NEXT: # kill: def $al killed $al killed $eax 222 ; X64-NEXT: retq 223 entry: 224 %0 = tail call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %__A, i32 2) 225 %1 = bitcast i8 %__U to <8 x i1> 226 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 227 %2 = and <2 x i1> %0, %extract 228 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 229 %4 = bitcast <8 x i1> %3 to i8 230 ret i8 %4 231 } 232 233 declare <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double>, i32) 234 235 define zeroext i8 @test_mm_fpclass_pd_mask(<2 x double> %__A) { 236 ; CHECK-LABEL: test_mm_fpclass_pd_mask: 237 ; CHECK: # %bb.0: # %entry 238 ; CHECK-NEXT: vfpclasspd $2, %xmm0, %k0 239 ; CHECK-NEXT: kmovw %k0, %eax 240 ; CHECK-NEXT: # kill: def $al killed $al killed $eax 241 ; CHECK-NEXT: ret{{[l|q]}} 242 entry: 243 %0 = tail call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %__A, i32 2) 244 %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 245 %2 = bitcast <8 x i1> %1 to i8 246 ret i8 %2 247 } 248 249 define zeroext i8 @test_mm256_mask_fpclass_pd_mask(i8 zeroext %__U, <4 x double> %__A) { 250 ; X86-LABEL: test_mm256_mask_fpclass_pd_mask: 251 ; X86: # %bb.0: # %entry 252 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 253 ; X86-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1} 254 ; X86-NEXT: kmovw %k0, %eax 255 ; X86-NEXT: # kill: def $al killed $al killed $eax 256 ; X86-NEXT: vzeroupper 257 ; X86-NEXT: retl 258 ; 259 ; X64-LABEL: test_mm256_mask_fpclass_pd_mask: 260 ; X64: # %bb.0: # %entry 261 ; X64-NEXT: kmovw %edi, %k1 262 ; X64-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1} 263 ; X64-NEXT: kmovw %k0, %eax 264 ; X64-NEXT: # kill: def $al killed $al killed $eax 265 ; X64-NEXT: vzeroupper 266 ; X64-NEXT: retq 267 entry: 268 %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %__A, i32 2) 269 %1 = bitcast i8 %__U to <8 x i1> 270 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 271 %2 = and <4 x i1> %0, %extract 272 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 273 %4 = bitcast <8 x i1> %3 to i8 274 ret i8 %4 275 } 276 277 declare <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double>, i32) 278 279 define zeroext i8 @test_mm256_fpclass_pd_mask(<4 x double> %__A) { 280 ; CHECK-LABEL: test_mm256_fpclass_pd_mask: 281 ; CHECK: # %bb.0: # %entry 282 ; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 283 ; CHECK-NEXT: kmovw %k0, %eax 284 ; CHECK-NEXT: # kill: def $al killed $al killed $eax 285 ; CHECK-NEXT: vzeroupper 286 ; CHECK-NEXT: ret{{[l|q]}} 287 entry: 288 %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %__A, i32 2) 289 %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 290 %2 = bitcast <8 x i1> %1 to i8 291 ret i8 %2 292 } 293 294 define zeroext i8 @test_mm_mask_fpclass_ps_mask(i8 zeroext %__U, <4 x float> %__A) { 295 ; X86-LABEL: test_mm_mask_fpclass_ps_mask: 296 ; X86: # %bb.0: # %entry 297 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 298 ; X86-NEXT: vfpclassps $2, %xmm0, %k0 {%k1} 299 ; X86-NEXT: kmovw %k0, %eax 300 ; X86-NEXT: # kill: def $al killed $al killed $eax 301 ; X86-NEXT: retl 302 ; 303 ; X64-LABEL: test_mm_mask_fpclass_ps_mask: 304 ; X64: # %bb.0: # %entry 305 ; X64-NEXT: kmovw %edi, %k1 306 ; X64-NEXT: vfpclassps $2, %xmm0, %k0 {%k1} 307 ; X64-NEXT: kmovw %k0, %eax 308 ; X64-NEXT: # kill: def $al killed $al killed $eax 309 ; X64-NEXT: retq 310 entry: 311 %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %__A, i32 2) 312 %1 = bitcast i8 %__U to <8 x i1> 313 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 314 %2 = and <4 x i1> %0, %extract 315 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 316 %4 = bitcast <8 x i1> %3 to i8 317 ret i8 %4 318 } 319 320 declare <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float>, i32) 321 322 define zeroext i8 @test_mm_fpclass_ps_mask(<4 x float> %__A) { 323 ; CHECK-LABEL: test_mm_fpclass_ps_mask: 324 ; CHECK: # %bb.0: # %entry 325 ; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 326 ; CHECK-NEXT: kmovw %k0, %eax 327 ; CHECK-NEXT: # kill: def $al killed $al killed $eax 328 ; CHECK-NEXT: ret{{[l|q]}} 329 entry: 330 %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %__A, i32 2) 331 %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 332 %2 = bitcast <8 x i1> %1 to i8 333 ret i8 %2 334 } 335 336 define zeroext i8 @test_mm256_mask_fpclass_ps_mask(i8 zeroext %__U, <8 x float> %__A) { 337 ; X86-LABEL: test_mm256_mask_fpclass_ps_mask: 338 ; X86: # %bb.0: # %entry 339 ; X86-NEXT: vfpclassps $2, %ymm0, %k0 340 ; X86-NEXT: kmovw %k0, %eax 341 ; X86-NEXT: andb {{[0-9]+}}(%esp), %al 342 ; X86-NEXT: # kill: def $al killed $al killed $eax 343 ; X86-NEXT: vzeroupper 344 ; X86-NEXT: retl 345 ; 346 ; X64-LABEL: test_mm256_mask_fpclass_ps_mask: 347 ; X64: # %bb.0: # %entry 348 ; X64-NEXT: vfpclassps $2, %ymm0, %k0 349 ; X64-NEXT: kmovw %k0, %eax 350 ; X64-NEXT: andb %dil, %al 351 ; X64-NEXT: # kill: def $al killed $al killed $eax 352 ; X64-NEXT: vzeroupper 353 ; X64-NEXT: retq 354 entry: 355 %0 = tail call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %__A, i32 2) 356 %1 = bitcast i8 %__U to <8 x i1> 357 %2 = and <8 x i1> %0, %1 358 %3 = bitcast <8 x i1> %2 to i8 359 ret i8 %3 360 } 361 362 declare <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float>, i32) 363 364 define zeroext i8 @test_mm256_fpclass_ps_mask(<8 x float> %__A) { 365 ; CHECK-LABEL: test_mm256_fpclass_ps_mask: 366 ; CHECK: # %bb.0: # %entry 367 ; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 368 ; CHECK-NEXT: kmovw %k0, %eax 369 ; CHECK-NEXT: # kill: def $al killed $al killed $eax 370 ; CHECK-NEXT: vzeroupper 371 ; CHECK-NEXT: ret{{[l|q]}} 372 entry: 373 %0 = tail call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %__A, i32 2) 374 %1 = bitcast <8 x i1> %0 to i8 375 ret i8 %1 376 } 377