1 ; NOTE: Assertions have been autogenerated by update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl -mattr=+avx512ifma | FileCheck %s 3 4 declare <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 5 6 define <2 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 7 ; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_128: 8 ; CHECK: ## BB#0: 9 ; CHECK-NEXT: kmovw %edi, %k1 10 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 11 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm3 {%k1} 12 ; CHECK-NEXT: vmovaps %zmm0, %zmm4 13 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm4 14 ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 15 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} 16 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z} 17 ; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 18 ; CHECK-NEXT: vpaddq %xmm2, %xmm4, %xmm1 19 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 20 ; CHECK-NEXT: retq 21 22 %res = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 23 %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 24 %res2 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 25 %res3 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 26 %res4 = add <2 x i64> %res, %res1 27 %res5 = add <2 x i64> %res3, %res2 28 %res6 = add <2 x i64> %res5, %res4 29 ret <2 x i64> %res6 30 } 31 32 declare <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 33 34 define <4 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 35 ; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_256: 36 ; CHECK: ## BB#0: 37 ; CHECK-NEXT: kmovw %edi, %k1 38 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 39 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm3 {%k1} 40 ; CHECK-NEXT: vmovaps %zmm0, %zmm4 41 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm4 42 ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 43 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} 44 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z} 45 ; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 46 ; CHECK-NEXT: vpaddq %ymm2, %ymm4, %ymm1 47 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 48 ; CHECK-NEXT: retq 49 50 %res = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 51 %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 52 %res2 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 53 %res3 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 54 %res4 = add <4 x i64> %res, %res1 55 %res5 = add <4 x i64> %res3, %res2 56 %res6 = add <4 x i64> %res5, %res4 57 ret <4 x i64> %res6 58 } 59 60 declare <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 61 62 define <2 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 63 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_128: 64 ; CHECK: ## BB#0: 65 ; CHECK-NEXT: kmovw %edi, %k1 66 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 67 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm3 {%k1} {z} 68 ; CHECK-NEXT: vmovaps %zmm0, %zmm4 69 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm4 70 ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 71 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z} 72 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z} 73 ; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 74 ; CHECK-NEXT: vpaddq %xmm2, %xmm4, %xmm1 75 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 76 ; CHECK-NEXT: retq 77 78 %res = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 79 %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 80 %res2 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 81 %res3 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 82 %res4 = add <2 x i64> %res, %res1 83 %res5 = add <2 x i64> %res3, %res2 84 %res6 = add <2 x i64> %res5, %res4 85 ret <2 x i64> %res6 86 } 87 88 declare <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 89 90 define <4 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 91 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_256: 92 ; CHECK: ## BB#0: 93 ; CHECK-NEXT: kmovw %edi, %k1 94 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 95 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm3 {%k1} {z} 96 ; CHECK-NEXT: vmovaps %zmm0, %zmm4 97 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm4 98 ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 99 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z} 100 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z} 101 ; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 102 ; CHECK-NEXT: vpaddq %ymm2, %ymm4, %ymm1 103 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 104 ; CHECK-NEXT: retq 105 106 %res = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 107 %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 108 %res2 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 109 %res3 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 110 %res4 = add <4 x i64> %res, %res1 111 %res5 = add <4 x i64> %res3, %res2 112 %res6 = add <4 x i64> %res5, %res4 113 ret <4 x i64> %res6 114 } 115 116 declare <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 117 118 define <2 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 119 ; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_128: 120 ; CHECK: ## BB#0: 121 ; CHECK-NEXT: kmovw %edi, %k1 122 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 123 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm3 {%k1} 124 ; CHECK-NEXT: vmovaps %zmm0, %zmm4 125 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm4 126 ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 127 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} 128 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z} 129 ; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 130 ; CHECK-NEXT: vpaddq %xmm2, %xmm4, %xmm1 131 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 132 ; CHECK-NEXT: retq 133 134 %res = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 135 %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 136 %res2 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 137 %res3 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 138 %res4 = add <2 x i64> %res, %res1 139 %res5 = add <2 x i64> %res3, %res2 140 %res6 = add <2 x i64> %res5, %res4 141 ret <2 x i64> %res6 142 } 143 144 declare <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 145 146 define <4 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 147 ; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_256: 148 ; CHECK: ## BB#0: 149 ; CHECK-NEXT: kmovw %edi, %k1 150 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 151 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm3 {%k1} 152 ; CHECK-NEXT: vmovaps %zmm0, %zmm4 153 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm4 154 ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 155 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} 156 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z} 157 ; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 158 ; CHECK-NEXT: vpaddq %ymm2, %ymm4, %ymm1 159 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 160 ; CHECK-NEXT: retq 161 162 %res = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 163 %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 164 %res2 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 165 %res3 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 166 %res4 = add <4 x i64> %res, %res1 167 %res5 = add <4 x i64> %res3, %res2 168 %res6 = add <4 x i64> %res5, %res4 169 ret <4 x i64> %res6 170 } 171 172 declare <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 173 174 define <2 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 175 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_128: 176 ; CHECK: ## BB#0: 177 ; CHECK-NEXT: kmovw %edi, %k1 178 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 179 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm3 {%k1} {z} 180 ; CHECK-NEXT: vmovaps %zmm0, %zmm4 181 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm4 182 ; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 183 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z} 184 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z} 185 ; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 186 ; CHECK-NEXT: vpaddq %xmm2, %xmm4, %xmm1 187 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 188 ; CHECK-NEXT: retq 189 190 %res = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 191 %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 192 %res2 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 193 %res3 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 194 %res4 = add <2 x i64> %res, %res1 195 %res5 = add <2 x i64> %res3, %res2 196 %res6 = add <2 x i64> %res5, %res4 197 ret <2 x i64> %res6 198 } 199 200 declare <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 201 202 define <4 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 203 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_256: 204 ; CHECK: ## BB#0: 205 ; CHECK-NEXT: kmovw %edi, %k1 206 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 207 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm3 {%k1} {z} 208 ; CHECK-NEXT: vmovaps %zmm0, %zmm4 209 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm4 210 ; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 211 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z} 212 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z} 213 ; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 214 ; CHECK-NEXT: vpaddq %ymm2, %ymm4, %ymm1 215 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 216 ; CHECK-NEXT: retq 217 218 %res = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 219 %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 220 %res2 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 221 %res3 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 222 %res4 = add <4 x i64> %res, %res1 223 %res5 = add <4 x i64> %res3, %res2 224 %res6 = add <4 x i64> %res5, %res4 225 ret <4 x i64> %res6 226 } 227