1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64 4 5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512ifmavl-builtins.c 6 7 define <2 x i64> @test_mm_madd52hi_epu64(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) { 8 ; CHECK-LABEL: test_mm_madd52hi_epu64: 9 ; CHECK: # %bb.0: # %entry 10 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 11 ; CHECK-NEXT: ret{{[l|q]}} 12 entry: 13 %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) 14 ret <2 x i64> %0 15 } 16 17 define <2 x i64> @test_mm_mask_madd52hi_epu64(<2 x i64> %__W, i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) { 18 ; X86-LABEL: test_mm_mask_madd52hi_epu64: 19 ; X86: # %bb.0: # %entry 20 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 21 ; X86-NEXT: kmovw %eax, %k1 22 ; X86-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} 23 ; X86-NEXT: retl 24 ; 25 ; X64-LABEL: test_mm_mask_madd52hi_epu64: 26 ; X64: # %bb.0: # %entry 27 ; X64-NEXT: kmovw %edi, %k1 28 ; X64-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} 29 ; X64-NEXT: retq 30 entry: 31 %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %__W, <2 x i64> %__X, <2 x i64> %__Y) 32 %1 = bitcast i8 %__M to <8 x i1> 33 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 34 %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W 35 ret <2 x i64> %2 36 } 37 38 define <2 x i64> @test_mm_maskz_madd52hi_epu64(i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) { 39 ; X86-LABEL: test_mm_maskz_madd52hi_epu64: 40 ; X86: # %bb.0: # %entry 41 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 42 ; X86-NEXT: kmovw %eax, %k1 43 ; X86-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z} 44 ; X86-NEXT: retl 45 ; 46 ; X64-LABEL: test_mm_maskz_madd52hi_epu64: 47 ; X64: # %bb.0: # %entry 48 ; X64-NEXT: kmovw %edi, %k1 49 ; X64-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z} 50 ; X64-NEXT: retq 51 entry: 52 %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) 53 %1 = bitcast i8 %__M to <8 x i1> 54 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 55 %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer 56 ret <2 x i64> %2 57 } 58 59 define <4 x i64> @test_mm256_madd52hi_epu64(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) { 60 ; CHECK-LABEL: test_mm256_madd52hi_epu64: 61 ; CHECK: # %bb.0: # %entry 62 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 63 ; CHECK-NEXT: ret{{[l|q]}} 64 entry: 65 %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) 66 ret <4 x i64> %0 67 } 68 69 define <4 x i64> @test_mm256_mask_madd52hi_epu64(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) { 70 ; X86-LABEL: test_mm256_mask_madd52hi_epu64: 71 ; X86: # %bb.0: # %entry 72 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 73 ; X86-NEXT: kmovw %eax, %k1 74 ; X86-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} 75 ; X86-NEXT: retl 76 ; 77 ; X64-LABEL: test_mm256_mask_madd52hi_epu64: 78 ; X64: # %bb.0: # %entry 79 ; X64-NEXT: kmovw %edi, %k1 80 ; X64-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} 81 ; X64-NEXT: retq 82 entry: 83 %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %__W, <4 x i64> %__X, <4 x i64> %__Y) 84 %1 = bitcast i8 %__M to <8 x i1> 85 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 86 %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W 87 ret <4 x i64> %2 88 } 89 90 define <4 x i64> @test_mm256_maskz_madd52hi_epu64(i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) { 91 ; X86-LABEL: test_mm256_maskz_madd52hi_epu64: 92 ; X86: # %bb.0: # %entry 93 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 94 ; X86-NEXT: kmovw %eax, %k1 95 ; X86-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z} 96 ; X86-NEXT: retl 97 ; 98 ; X64-LABEL: test_mm256_maskz_madd52hi_epu64: 99 ; X64: # %bb.0: # %entry 100 ; X64-NEXT: kmovw %edi, %k1 101 ; X64-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z} 102 ; X64-NEXT: retq 103 entry: 104 %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) 105 %1 = bitcast i8 %__M to <8 x i1> 106 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 107 %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer 108 ret <4 x i64> %2 109 } 110 111 define <2 x i64> @test_mm_madd52lo_epu64(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) { 112 ; CHECK-LABEL: test_mm_madd52lo_epu64: 113 ; CHECK: # %bb.0: # %entry 114 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 115 ; CHECK-NEXT: ret{{[l|q]}} 116 entry: 117 %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) 118 ret <2 x i64> %0 119 } 120 121 define <2 x i64> @test_mm_mask_madd52lo_epu64(<2 x i64> %__W, i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) { 122 ; X86-LABEL: test_mm_mask_madd52lo_epu64: 123 ; X86: # %bb.0: # %entry 124 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 125 ; X86-NEXT: kmovw %eax, %k1 126 ; X86-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} 127 ; X86-NEXT: retl 128 ; 129 ; X64-LABEL: test_mm_mask_madd52lo_epu64: 130 ; X64: # %bb.0: # %entry 131 ; X64-NEXT: kmovw %edi, %k1 132 ; X64-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} 133 ; X64-NEXT: retq 134 entry: 135 %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %__W, <2 x i64> %__X, <2 x i64> %__Y) 136 %1 = bitcast i8 %__M to <8 x i1> 137 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 138 %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W 139 ret <2 x i64> %2 140 } 141 142 define <2 x i64> @test_mm_maskz_madd52lo_epu64(i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) { 143 ; X86-LABEL: test_mm_maskz_madd52lo_epu64: 144 ; X86: # %bb.0: # %entry 145 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 146 ; X86-NEXT: kmovw %eax, %k1 147 ; X86-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z} 148 ; X86-NEXT: retl 149 ; 150 ; X64-LABEL: test_mm_maskz_madd52lo_epu64: 151 ; X64: # %bb.0: # %entry 152 ; X64-NEXT: kmovw %edi, %k1 153 ; X64-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z} 154 ; X64-NEXT: retq 155 entry: 156 %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) 157 %1 = bitcast i8 %__M to <8 x i1> 158 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 159 %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer 160 ret <2 x i64> %2 161 } 162 163 define <4 x i64> @test_mm256_madd52lo_epu64(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) { 164 ; CHECK-LABEL: test_mm256_madd52lo_epu64: 165 ; CHECK: # %bb.0: # %entry 166 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 167 ; CHECK-NEXT: ret{{[l|q]}} 168 entry: 169 %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) 170 ret <4 x i64> %0 171 } 172 173 define <4 x i64> @test_mm256_mask_madd52lo_epu64(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) { 174 ; X86-LABEL: test_mm256_mask_madd52lo_epu64: 175 ; X86: # %bb.0: # %entry 176 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 177 ; X86-NEXT: kmovw %eax, %k1 178 ; X86-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} 179 ; X86-NEXT: retl 180 ; 181 ; X64-LABEL: test_mm256_mask_madd52lo_epu64: 182 ; X64: # %bb.0: # %entry 183 ; X64-NEXT: kmovw %edi, %k1 184 ; X64-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} 185 ; X64-NEXT: retq 186 entry: 187 %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %__W, <4 x i64> %__X, <4 x i64> %__Y) 188 %1 = bitcast i8 %__M to <8 x i1> 189 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 190 %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W 191 ret <4 x i64> %2 192 } 193 194 define <4 x i64> @test_mm256_maskz_madd52lo_epu64(i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) { 195 ; X86-LABEL: test_mm256_maskz_madd52lo_epu64: 196 ; X86: # %bb.0: # %entry 197 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 198 ; X86-NEXT: kmovw %eax, %k1 199 ; X86-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z} 200 ; X86-NEXT: retl 201 ; 202 ; X64-LABEL: test_mm256_maskz_madd52lo_epu64: 203 ; X64: # %bb.0: # %entry 204 ; X64-NEXT: kmovw %edi, %k1 205 ; X64-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z} 206 ; X64-NEXT: retq 207 entry: 208 %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) 209 %1 = bitcast i8 %__M to <8 x i1> 210 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 211 %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer 212 ret <4 x i64> %2 213 } 214 215 declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>) 216 declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>) 217 declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>) 218 declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>) 219