1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s 2 3 ; 256-bit 4 5 define i32 @test_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b) { 6 ; CHECK-LABEL: test_pcmpeq_b_256 7 ; CHECK: vpcmpeqb %ymm1, %ymm0, %k0 ## 8 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1) 9 ret i32 %res 10 } 11 12 define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 13 ; CHECK-LABEL: test_mask_pcmpeq_b_256 14 ; CHECK: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ## 15 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask) 16 ret i32 %res 17 } 18 19 declare i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8>, <32 x i8>, i32) 20 21 define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) { 22 ; CHECK-LABEL: test_pcmpeq_w_256 23 ; CHECK: vpcmpeqw %ymm1, %ymm0, %k0 ## 24 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1) 25 ret i16 %res 26 } 27 28 define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 29 ; CHECK-LABEL: test_mask_pcmpeq_w_256 30 ; CHECK: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ## 31 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask) 32 ret i16 %res 33 } 34 35 declare i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16>, <16 x i16>, i16) 36 37 define i32 @test_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b) { 38 ; CHECK-LABEL: test_pcmpgt_b_256 39 ; CHECK: vpcmpgtb %ymm1, %ymm0, %k0 ## 40 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1) 41 ret i32 %res 42 } 43 44 define i32 @test_mask_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 45 ; CHECK-LABEL: test_mask_pcmpgt_b_256 46 ; CHECK: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## 47 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask) 48 ret i32 %res 49 } 50 51 declare i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8>, <32 x i8>, i32) 52 53 define i16 @test_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b) { 54 ; CHECK-LABEL: test_pcmpgt_w_256 55 ; CHECK: vpcmpgtw %ymm1, %ymm0, %k0 ## 56 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1) 57 ret i16 %res 58 } 59 60 define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 61 ; CHECK-LABEL: test_mask_pcmpgt_w_256 62 ; CHECK: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} ## 63 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask) 64 ret i16 %res 65 } 66 67 declare i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16>, <16 x i16>, i16) 68 69 define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) { 70 ; CHECK_LABEL: test_cmp_b_256 71 ; CHECK: vpcmpeqb %ymm1, %ymm0, %k0 ## 72 %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1) 73 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 74 ; CHECK: vpcmpltb %ymm1, %ymm0, %k0 ## 75 %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1) 76 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 77 ; CHECK: vpcmpleb %ymm1, %ymm0, %k0 ## 78 %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1) 79 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 80 ; CHECK: vpcmpunordb %ymm1, %ymm0, %k0 ## 81 %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1) 82 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 83 ; CHECK: vpcmpneqb %ymm1, %ymm0, %k0 ## 84 %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1) 85 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 86 ; CHECK: vpcmpnltb %ymm1, %ymm0, %k0 ## 87 %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1) 88 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 89 ; CHECK: vpcmpnleb %ymm1, %ymm0, %k0 ## 90 %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1) 91 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 92 ; CHECK: vpcmpordb %ymm1, %ymm0, %k0 ## 93 %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1) 94 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 95 ret <8 x i32> %vec7 96 } 97 98 define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) { 99 ; CHECK_LABEL: test_mask_cmp_b_256 100 ; CHECK: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ## 101 %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) 102 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 103 ; CHECK: vpcmpltb %ymm1, %ymm0, %k0 {%k1} ## 104 %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask) 105 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 106 ; CHECK: vpcmpleb %ymm1, %ymm0, %k0 {%k1} ## 107 %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask) 108 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 109 ; CHECK: vpcmpunordb %ymm1, %ymm0, %k0 {%k1} ## 110 %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask) 111 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 112 ; CHECK: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## 113 %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask) 114 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 115 ; CHECK: vpcmpnltb %ymm1, %ymm0, %k0 {%k1} ## 116 %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask) 117 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 118 ; CHECK: vpcmpnleb %ymm1, %ymm0, %k0 {%k1} ## 119 %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask) 120 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 121 ; CHECK: vpcmpordb %ymm1, %ymm0, %k0 {%k1} ## 122 %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask) 123 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 124 ret <8 x i32> %vec7 125 } 126 127 declare i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone 128 129 define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) { 130 ; CHECK_LABEL: test_ucmp_b_256 131 ; CHECK: vpcmpequb %ymm1, %ymm0, %k0 ## 132 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1) 133 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 134 ; CHECK: vpcmpltub %ymm1, %ymm0, %k0 ## 135 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1) 136 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 137 ; CHECK: vpcmpleub %ymm1, %ymm0, %k0 ## 138 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1) 139 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 140 ; CHECK: vpcmpunordub %ymm1, %ymm0, %k0 ## 141 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1) 142 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 143 ; CHECK: vpcmpnequb %ymm1, %ymm0, %k0 ## 144 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1) 145 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 146 ; CHECK: vpcmpnltub %ymm1, %ymm0, %k0 ## 147 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1) 148 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 149 ; CHECK: vpcmpnleub %ymm1, %ymm0, %k0 ## 150 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1) 151 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 152 ; CHECK: vpcmpordub %ymm1, %ymm0, %k0 ## 153 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1) 154 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 155 ret <8 x i32> %vec7 156 } 157 158 define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) { 159 ; CHECK_LABEL: test_mask_ucmp_b_256 160 ; CHECK: vpcmpequb %ymm1, %ymm0, %k0 {%k1} ## 161 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) 162 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 163 ; CHECK: vpcmpltub %ymm1, %ymm0, %k0 {%k1} ## 164 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask) 165 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 166 ; CHECK: vpcmpleub %ymm1, %ymm0, %k0 {%k1} ## 167 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask) 168 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 169 ; CHECK: vpcmpunordub %ymm1, %ymm0, %k0 {%k1} ## 170 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask) 171 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 172 ; CHECK: vpcmpnequb %ymm1, %ymm0, %k0 {%k1} ## 173 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask) 174 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 175 ; CHECK: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ## 176 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask) 177 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 178 ; CHECK: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ## 179 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask) 180 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 181 ; CHECK: vpcmpordub %ymm1, %ymm0, %k0 {%k1} ## 182 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask) 183 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 184 ret <8 x i32> %vec7 185 } 186 187 declare i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone 188 189 define <8 x i16> @test_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1) { 190 ; CHECK_LABEL: test_cmp_w_256 191 ; CHECK: vpcmpeqw %ymm1, %ymm0, %k0 ## 192 %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1) 193 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 194 ; CHECK: vpcmpltw %ymm1, %ymm0, %k0 ## 195 %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1) 196 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 197 ; CHECK: vpcmplew %ymm1, %ymm0, %k0 ## 198 %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1) 199 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 200 ; CHECK: vpcmpunordw %ymm1, %ymm0, %k0 ## 201 %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1) 202 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 203 ; CHECK: vpcmpneqw %ymm1, %ymm0, %k0 ## 204 %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1) 205 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 206 ; CHECK: vpcmpnltw %ymm1, %ymm0, %k0 ## 207 %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1) 208 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 209 ; CHECK: vpcmpnlew %ymm1, %ymm0, %k0 ## 210 %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1) 211 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 212 ; CHECK: vpcmpordw %ymm1, %ymm0, %k0 ## 213 %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1) 214 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 215 ret <8 x i16> %vec7 216 } 217 218 define <8 x i16> @test_mask_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) { 219 ; CHECK_LABEL: test_mask_cmp_w_256 220 ; CHECK: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ## 221 %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask) 222 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 223 ; CHECK: vpcmpltw %ymm1, %ymm0, %k0 {%k1} ## 224 %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask) 225 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 226 ; CHECK: vpcmplew %ymm1, %ymm0, %k0 {%k1} ## 227 %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask) 228 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 229 ; CHECK: vpcmpunordw %ymm1, %ymm0, %k0 {%k1} ## 230 %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask) 231 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 232 ; CHECK: vpcmpneqw %ymm1, %ymm0, %k0 {%k1} ## 233 %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask) 234 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 235 ; CHECK: vpcmpnltw %ymm1, %ymm0, %k0 {%k1} ## 236 %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask) 237 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 238 ; CHECK: vpcmpnlew %ymm1, %ymm0, %k0 {%k1} ## 239 %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask) 240 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 241 ; CHECK: vpcmpordw %ymm1, %ymm0, %k0 {%k1} ## 242 %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask) 243 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 244 ret <8 x i16> %vec7 245 } 246 247 declare i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone 248 249 define <8 x i16> @test_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1) { 250 ; CHECK_LABEL: test_ucmp_w_256 251 ; CHECK: vpcmpequw %ymm1, %ymm0, %k0 ## 252 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1) 253 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 254 ; CHECK: vpcmpltuw %ymm1, %ymm0, %k0 ## 255 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1) 256 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 257 ; CHECK: vpcmpleuw %ymm1, %ymm0, %k0 ## 258 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1) 259 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 260 ; CHECK: vpcmpunorduw %ymm1, %ymm0, %k0 ## 261 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1) 262 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 263 ; CHECK: vpcmpnequw %ymm1, %ymm0, %k0 ## 264 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1) 265 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 266 ; CHECK: vpcmpnltuw %ymm1, %ymm0, %k0 ## 267 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1) 268 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 269 ; CHECK: vpcmpnleuw %ymm1, %ymm0, %k0 ## 270 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1) 271 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 272 ; CHECK: vpcmporduw %ymm1, %ymm0, %k0 ## 273 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1) 274 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 275 ret <8 x i16> %vec7 276 } 277 278 define <8 x i16> @test_mask_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) { 279 ; CHECK_LABEL: test_mask_ucmp_w_256 280 ; CHECK: vpcmpequw %ymm1, %ymm0, %k0 {%k1} ## 281 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask) 282 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 283 ; CHECK: vpcmpltuw %ymm1, %ymm0, %k0 {%k1} ## 284 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask) 285 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 286 ; CHECK: vpcmpleuw %ymm1, %ymm0, %k0 {%k1} ## 287 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask) 288 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 289 ; CHECK: vpcmpunorduw %ymm1, %ymm0, %k0 {%k1} ## 290 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask) 291 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 292 ; CHECK: vpcmpnequw %ymm1, %ymm0, %k0 {%k1} ## 293 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask) 294 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 295 ; CHECK: vpcmpnltuw %ymm1, %ymm0, %k0 {%k1} ## 296 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask) 297 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 298 ; CHECK: vpcmpnleuw %ymm1, %ymm0, %k0 {%k1} ## 299 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask) 300 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 301 ; CHECK: vpcmporduw %ymm1, %ymm0, %k0 {%k1} ## 302 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask) 303 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 304 ret <8 x i16> %vec7 305 } 306 307 declare i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone 308 309 ; 128-bit 310 311 define i16 @test_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b) { 312 ; CHECK-LABEL: test_pcmpeq_b_128 313 ; CHECK: vpcmpeqb %xmm1, %xmm0, %k0 ## 314 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1) 315 ret i16 %res 316 } 317 318 define i16 @test_mask_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 319 ; CHECK-LABEL: test_mask_pcmpeq_b_128 320 ; CHECK: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ## 321 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask) 322 ret i16 %res 323 } 324 325 declare i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8>, <16 x i8>, i16) 326 327 define i8 @test_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b) { 328 ; CHECK-LABEL: test_pcmpeq_w_128 329 ; CHECK: vpcmpeqw %xmm1, %xmm0, %k0 ## 330 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1) 331 ret i8 %res 332 } 333 334 define i8 @test_mask_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 335 ; CHECK-LABEL: test_mask_pcmpeq_w_128 336 ; CHECK: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ## 337 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask) 338 ret i8 %res 339 } 340 341 declare i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16>, <8 x i16>, i8) 342 343 define i16 @test_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b) { 344 ; CHECK-LABEL: test_pcmpgt_b_128 345 ; CHECK: vpcmpgtb %xmm1, %xmm0, %k0 ## 346 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1) 347 ret i16 %res 348 } 349 350 define i16 @test_mask_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 351 ; CHECK-LABEL: test_mask_pcmpgt_b_128 352 ; CHECK: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} ## 353 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask) 354 ret i16 %res 355 } 356 357 declare i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8>, <16 x i8>, i16) 358 359 define i8 @test_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b) { 360 ; CHECK-LABEL: test_pcmpgt_w_128 361 ; CHECK: vpcmpgtw %xmm1, %xmm0, %k0 ## 362 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1) 363 ret i8 %res 364 } 365 366 define i8 @test_mask_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 367 ; CHECK-LABEL: test_mask_pcmpgt_w_128 368 ; CHECK: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} ## 369 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask) 370 ret i8 %res 371 } 372 373 declare i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16>, <8 x i16>, i8) 374 375 define <8 x i16> @test_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1) { 376 ; CHECK_LABEL: test_cmp_b_128 377 ; CHECK: vpcmpeqb %xmm1, %xmm0, %k0 ## 378 %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1) 379 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 380 ; CHECK: vpcmpltb %xmm1, %xmm0, %k0 ## 381 %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1) 382 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 383 ; CHECK: vpcmpleb %xmm1, %xmm0, %k0 ## 384 %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1) 385 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 386 ; CHECK: vpcmpunordb %xmm1, %xmm0, %k0 ## 387 %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1) 388 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 389 ; CHECK: vpcmpneqb %xmm1, %xmm0, %k0 ## 390 %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1) 391 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 392 ; CHECK: vpcmpnltb %xmm1, %xmm0, %k0 ## 393 %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1) 394 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 395 ; CHECK: vpcmpnleb %xmm1, %xmm0, %k0 ## 396 %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1) 397 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 398 ; CHECK: vpcmpordb %xmm1, %xmm0, %k0 ## 399 %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1) 400 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 401 ret <8 x i16> %vec7 402 } 403 404 define <8 x i16> @test_mask_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) { 405 ; CHECK_LABEL: test_mask_cmp_b_128 406 ; CHECK: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ## 407 %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask) 408 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 409 ; CHECK: vpcmpltb %xmm1, %xmm0, %k0 {%k1} ## 410 %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask) 411 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 412 ; CHECK: vpcmpleb %xmm1, %xmm0, %k0 {%k1} ## 413 %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask) 414 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 415 ; CHECK: vpcmpunordb %xmm1, %xmm0, %k0 {%k1} ## 416 %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask) 417 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 418 ; CHECK: vpcmpneqb %xmm1, %xmm0, %k0 {%k1} ## 419 %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask) 420 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 421 ; CHECK: vpcmpnltb %xmm1, %xmm0, %k0 {%k1} ## 422 %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask) 423 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 424 ; CHECK: vpcmpnleb %xmm1, %xmm0, %k0 {%k1} ## 425 %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask) 426 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 427 ; CHECK: vpcmpordb %xmm1, %xmm0, %k0 {%k1} ## 428 %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask) 429 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 430 ret <8 x i16> %vec7 431 } 432 433 declare i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone 434 435 define <8 x i16> @test_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1) { 436 ; CHECK_LABEL: test_ucmp_b_128 437 ; CHECK: vpcmpequb %xmm1, %xmm0, %k0 ## 438 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1) 439 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 440 ; CHECK: vpcmpltub %xmm1, %xmm0, %k0 ## 441 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1) 442 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 443 ; CHECK: vpcmpleub %xmm1, %xmm0, %k0 ## 444 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1) 445 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 446 ; CHECK: vpcmpunordub %xmm1, %xmm0, %k0 ## 447 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1) 448 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 449 ; CHECK: vpcmpnequb %xmm1, %xmm0, %k0 ## 450 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1) 451 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 452 ; CHECK: vpcmpnltub %xmm1, %xmm0, %k0 ## 453 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1) 454 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 455 ; CHECK: vpcmpnleub %xmm1, %xmm0, %k0 ## 456 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1) 457 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 458 ; CHECK: vpcmpordub %xmm1, %xmm0, %k0 ## 459 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1) 460 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 461 ret <8 x i16> %vec7 462 } 463 464 define <8 x i16> @test_mask_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) { 465 ; CHECK_LABEL: test_mask_ucmp_b_128 466 ; CHECK: vpcmpequb %xmm1, %xmm0, %k0 {%k1} ## 467 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask) 468 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 469 ; CHECK: vpcmpltub %xmm1, %xmm0, %k0 {%k1} ## 470 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask) 471 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 472 ; CHECK: vpcmpleub %xmm1, %xmm0, %k0 {%k1} ## 473 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask) 474 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 475 ; CHECK: vpcmpunordub %xmm1, %xmm0, %k0 {%k1} ## 476 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask) 477 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 478 ; CHECK: vpcmpnequb %xmm1, %xmm0, %k0 {%k1} ## 479 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask) 480 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 481 ; CHECK: vpcmpnltub %xmm1, %xmm0, %k0 {%k1} ## 482 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask) 483 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 484 ; CHECK: vpcmpnleub %xmm1, %xmm0, %k0 {%k1} ## 485 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask) 486 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 487 ; CHECK: vpcmpordub %xmm1, %xmm0, %k0 {%k1} ## 488 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask) 489 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 490 ret <8 x i16> %vec7 491 } 492 493 declare i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone 494 495 define <8 x i8> @test_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1) { 496 ; CHECK_LABEL: test_cmp_w_128 497 ; CHECK: vpcmpeqw %xmm1, %xmm0, %k0 ## 498 %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1) 499 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 500 ; CHECK: vpcmpltw %xmm1, %xmm0, %k0 ## 501 %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1) 502 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 503 ; CHECK: vpcmplew %xmm1, %xmm0, %k0 ## 504 %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1) 505 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 506 ; CHECK: vpcmpunordw %xmm1, %xmm0, %k0 ## 507 %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1) 508 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 509 ; CHECK: vpcmpneqw %xmm1, %xmm0, %k0 ## 510 %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1) 511 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 512 ; CHECK: vpcmpnltw %xmm1, %xmm0, %k0 ## 513 %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1) 514 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 515 ; CHECK: vpcmpnlew %xmm1, %xmm0, %k0 ## 516 %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1) 517 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 518 ; CHECK: vpcmpordw %xmm1, %xmm0, %k0 ## 519 %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1) 520 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 521 ret <8 x i8> %vec7 522 } 523 524 define <8 x i8> @test_mask_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) { 525 ; CHECK_LABEL: test_mask_cmp_w_128 526 ; CHECK: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ## 527 %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask) 528 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 529 ; CHECK: vpcmpltw %xmm1, %xmm0, %k0 {%k1} ## 530 %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask) 531 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 532 ; CHECK: vpcmplew %xmm1, %xmm0, %k0 {%k1} ## 533 %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask) 534 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 535 ; CHECK: vpcmpunordw %xmm1, %xmm0, %k0 {%k1} ## 536 %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask) 537 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 538 ; CHECK: vpcmpneqw %xmm1, %xmm0, %k0 {%k1} ## 539 %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask) 540 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 541 ; CHECK: vpcmpnltw %xmm1, %xmm0, %k0 {%k1} ## 542 %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask) 543 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 544 ; CHECK: vpcmpnlew %xmm1, %xmm0, %k0 {%k1} ## 545 %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask) 546 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 547 ; CHECK: vpcmpordw %xmm1, %xmm0, %k0 {%k1} ## 548 %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask) 549 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 550 ret <8 x i8> %vec7 551 } 552 553 declare i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone 554 555 define <8 x i8> @test_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1) { 556 ; CHECK_LABEL: test_ucmp_w_128 557 ; CHECK: vpcmpequw %xmm1, %xmm0, %k0 ## 558 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1) 559 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 560 ; CHECK: vpcmpltuw %xmm1, %xmm0, %k0 ## 561 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1) 562 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 563 ; CHECK: vpcmpleuw %xmm1, %xmm0, %k0 ## 564 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1) 565 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 566 ; CHECK: vpcmpunorduw %xmm1, %xmm0, %k0 ## 567 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1) 568 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 569 ; CHECK: vpcmpnequw %xmm1, %xmm0, %k0 ## 570 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1) 571 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 572 ; CHECK: vpcmpnltuw %xmm1, %xmm0, %k0 ## 573 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1) 574 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 575 ; CHECK: vpcmpnleuw %xmm1, %xmm0, %k0 ## 576 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1) 577 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 578 ; CHECK: vpcmporduw %xmm1, %xmm0, %k0 ## 579 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1) 580 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 581 ret <8 x i8> %vec7 582 } 583 584 define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) { 585 ; CHECK_LABEL: test_mask_ucmp_w_128 586 ; CHECK: vpcmpequw %xmm1, %xmm0, %k0 {%k1} ## 587 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask) 588 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 589 ; CHECK: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} ## 590 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask) 591 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 592 ; CHECK: vpcmpleuw %xmm1, %xmm0, %k0 {%k1} ## 593 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask) 594 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 595 ; CHECK: vpcmpunorduw %xmm1, %xmm0, %k0 {%k1} ## 596 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask) 597 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 598 ; CHECK: vpcmpnequw %xmm1, %xmm0, %k0 {%k1} ## 599 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask) 600 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 601 ; CHECK: vpcmpnltuw %xmm1, %xmm0, %k0 {%k1} ## 602 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask) 603 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 604 ; CHECK: vpcmpnleuw %xmm1, %xmm0, %k0 {%k1} ## 605 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask) 606 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 607 ; CHECK: vpcmporduw %xmm1, %xmm0, %k0 {%k1} ## 608 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask) 609 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 610 ret <8 x i8> %vec7 611 } 612 613 declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone 614 615 declare <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone 616 617 define <8 x float> @test_mask_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) { 618 ; CHECK-LABEL: test_mask_vfmadd256_ps 619 ; CHECK: vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa8,0xc2] 620 %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind 621 ret <8 x float> %res 622 } 623 624 declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 625 626 define <4 x float> @test_mask_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 627 ; CHECK-LABEL: test_mask_vfmadd128_ps 628 ; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2] 629 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 630 ret <4 x float> %res 631 } 632 633 declare <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 634 635 define <4 x double> @test_mask_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) { 636 ; CHECK-LABEL: test_mask_fmadd256_pd: 637 ; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2] 638 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) 639 ret <4 x double> %res 640 } 641 642 declare <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 643 644 define <2 x double> @test_mask_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 645 ; CHECK-LABEL: test_mask_fmadd128_pd: 646 ; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2] 647 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) 648 ret <2 x double> %res 649 } 650 651 define <2 x double>@test_int_x86_avx512_mask_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 652 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_128: 653 ; CHECK: ## BB#0: 654 ; CHECK-NEXT: movzbl %dil, %eax 655 ; CHECK-NEXT: kmovw %eax, %k1 656 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 657 ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm3 {%k1} 658 ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 659 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 660 ; CHECK-NEXT: retq 661 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 662 %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 663 %res2 = fadd <2 x double> %res, %res1 664 ret <2 x double> %res2 665 } 666 667 declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 668 669 define <2 x double>@test_int_x86_avx512_mask3_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 670 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128: 671 ; CHECK: ## BB#0: 672 ; CHECK-NEXT: movzbl %dil, %eax 673 ; CHECK-NEXT: kmovw %eax, %k1 674 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 675 ; CHECK-NEXT: vfmadd231pd %xmm1, %xmm0, %xmm3 {%k1} 676 ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 677 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 678 ; CHECK-NEXT: retq 679 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 680 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 681 %res2 = fadd <2 x double> %res, %res1 682 ret <2 x double> %res2 683 } 684 685 declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 686 687 define <2 x double>@test_int_x86_avx512_maskz_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 688 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128: 689 ; CHECK: ## BB#0: 690 ; CHECK-NEXT: movzbl %dil, %eax 691 ; CHECK-NEXT: kmovw %eax, %k1 692 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 693 ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm3 {%k1} {z} 694 ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 695 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 696 ; CHECK-NEXT: retq 697 %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 698 %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 699 %res2 = fadd <2 x double> %res, %res1 700 ret <2 x double> %res2 701 } 702 703 define <4 x double>@test_int_x86_avx512_mask_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 704 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_256: 705 ; CHECK: ## BB#0: 706 ; CHECK-NEXT: movzbl %dil, %eax 707 ; CHECK-NEXT: kmovw %eax, %k1 708 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 709 ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm3 {%k1} 710 ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 711 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 712 ; CHECK-NEXT: retq 713 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 714 %res1 = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 715 %res2 = fadd <4 x double> %res, %res1 716 ret <4 x double> %res2 717 } 718 719 declare <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 720 721 define <4 x double>@test_int_x86_avx512_mask3_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 722 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256: 723 ; CHECK: ## BB#0: 724 ; CHECK-NEXT: movzbl %dil, %eax 725 ; CHECK-NEXT: kmovw %eax, %k1 726 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 727 ; CHECK-NEXT: vfmadd231pd %ymm1, %ymm0, %ymm3 {%k1} 728 ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 729 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 730 ; CHECK-NEXT: retq 731 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 732 %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 733 %res2 = fadd <4 x double> %res, %res1 734 ret <4 x double> %res2 735 } 736 737 declare <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 738 739 define <4 x double>@test_int_x86_avx512_maskz_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 740 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256: 741 ; CHECK: ## BB#0: 742 ; CHECK-NEXT: movzbl %dil, %eax 743 ; CHECK-NEXT: kmovw %eax, %k1 744 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 745 ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm3 {%k1} {z} 746 ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 747 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 748 ; CHECK-NEXT: retq 749 %res = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 750 %res1 = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 751 %res2 = fadd <4 x double> %res, %res1 752 ret <4 x double> %res2 753 } 754 755 define <4 x float>@test_int_x86_avx512_mask_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 756 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_128: 757 ; CHECK: ## BB#0: 758 ; CHECK-NEXT: movzbl %dil, %eax 759 ; CHECK-NEXT: kmovw %eax, %k1 760 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 761 ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm3 {%k1} 762 ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 763 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 764 ; CHECK-NEXT: retq 765 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 766 %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 767 %res2 = fadd <4 x float> %res, %res1 768 ret <4 x float> %res2 769 } 770 771 declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 772 773 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 774 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128: 775 ; CHECK: ## BB#0: 776 ; CHECK-NEXT: movzbl %dil, %eax 777 ; CHECK-NEXT: kmovw %eax, %k1 778 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 779 ; CHECK-NEXT: vfmadd231ps %xmm1, %xmm0, %xmm3 {%k1} 780 ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 781 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 782 ; CHECK-NEXT: retq 783 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 784 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 785 %res2 = fadd <4 x float> %res, %res1 786 ret <4 x float> %res2 787 } 788 789 declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 790 791 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 792 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128: 793 ; CHECK: ## BB#0: 794 ; CHECK-NEXT: movzbl %dil, %eax 795 ; CHECK-NEXT: kmovw %eax, %k1 796 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 797 ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm3 {%k1} {z} 798 ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 799 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 800 ; CHECK-NEXT: retq 801 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 802 %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 803 %res2 = fadd <4 x float> %res, %res1 804 ret <4 x float> %res2 805 } 806 807 define <8 x float>@test_int_x86_avx512_mask_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 808 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_256: 809 ; CHECK: ## BB#0: 810 ; CHECK-NEXT: movzbl %dil, %eax 811 ; CHECK-NEXT: kmovw %eax, %k1 812 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 813 ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm3 {%k1} 814 ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 815 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 816 ; CHECK-NEXT: retq 817 %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 818 %res1 = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 819 %res2 = fadd <8 x float> %res, %res1 820 ret <8 x float> %res2 821 } 822 823 declare <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 824 825 define <8 x float>@test_int_x86_avx512_mask3_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 826 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256: 827 ; CHECK: ## BB#0: 828 ; CHECK-NEXT: movzbl %dil, %eax 829 ; CHECK-NEXT: kmovw %eax, %k1 830 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 831 ; CHECK-NEXT: vfmadd231ps %ymm1, %ymm0, %ymm3 {%k1} 832 ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 833 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 834 ; CHECK-NEXT: retq 835 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 836 %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 837 %res2 = fadd <8 x float> %res, %res1 838 ret <8 x float> %res2 839 } 840 841 declare <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 842 843 define <8 x float>@test_int_x86_avx512_maskz_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 844 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256: 845 ; CHECK: ## BB#0: 846 ; CHECK-NEXT: movzbl %dil, %eax 847 ; CHECK-NEXT: kmovw %eax, %k1 848 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 849 ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm3 {%k1} {z} 850 ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 851 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 852 ; CHECK-NEXT: retq 853 %res = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 854 %res1 = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 855 %res2 = fadd <8 x float> %res, %res1 856 ret <8 x float> %res2 857 } 858 859 860 declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 861 862 define <2 x double>@test_int_x86_avx512_mask3_vfmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 863 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128: 864 ; CHECK: ## BB#0: 865 ; CHECK-NEXT: movzbl %dil, %eax 866 ; CHECK-NEXT: kmovw %eax, %k1 867 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 868 ; CHECK-NEXT: vfmsub231pd %xmm1, %xmm0, %xmm3 {%k1} 869 ; CHECK-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 870 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 871 ; CHECK-NEXT: retq 872 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 873 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 874 %res2 = fadd <2 x double> %res, %res1 875 ret <2 x double> %res2 876 } 877 878 879 declare <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 880 881 define <4 x double>@test_int_x86_avx512_mask3_vfmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 882 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256: 883 ; CHECK: ## BB#0: 884 ; CHECK-NEXT: movzbl %dil, %eax 885 ; CHECK-NEXT: kmovw %eax, %k1 886 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 887 ; CHECK-NEXT: vfmsub231pd %ymm1, %ymm0, %ymm3 {%k1} 888 ; CHECK-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 889 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 890 ; CHECK-NEXT: retq 891 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 892 %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 893 %res2 = fadd <4 x double> %res, %res1 894 ret <4 x double> %res2 895 } 896 897 declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 898 899 define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 900 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128: 901 ; CHECK: ## BB#0: 902 ; CHECK-NEXT: movzbl %dil, %eax 903 ; CHECK-NEXT: kmovw %eax, %k1 904 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 905 ; CHECK-NEXT: vfmsub231ps %xmm1, %xmm0, %xmm3 {%k1} 906 ; CHECK-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 907 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 908 ; CHECK-NEXT: retq 909 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 910 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 911 %res2 = fadd <4 x float> %res, %res1 912 ret <4 x float> %res2 913 } 914 915 declare <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 916 917 define <8 x float>@test_int_x86_avx512_mask3_vfmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 918 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256: 919 ; CHECK: ## BB#0: 920 ; CHECK-NEXT: movzbl %dil, %eax 921 ; CHECK-NEXT: kmovw %eax, %k1 922 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 923 ; CHECK-NEXT: vfmsub231ps %ymm1, %ymm0, %ymm3 {%k1} 924 ; CHECK-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 925 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 926 ; CHECK-NEXT: retq 927 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 928 %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 929 %res2 = fadd <8 x float> %res, %res1 930 ret <8 x float> %res2 931 } 932 933 declare <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone 934 935 define <8 x float> @test_mask_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) { 936 ; CHECK-LABEL: test_mask_vfnmadd256_ps 937 ; CHECK: vfnmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xac,0xc2] 938 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind 939 ret <8 x float> %res 940 } 941 942 declare <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 943 944 define <4 x float> @test_mask_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 945 ; CHECK-LABEL: test_mask_vfnmadd128_ps 946 ; CHECK: vfnmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xac,0xc2] 947 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 948 ret <4 x float> %res 949 } 950 951 declare <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone 952 953 define <4 x double> @test_mask_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) { 954 ; CHECK-LABEL: test_mask_vfnmadd256_pd 955 ; CHECK: vfnmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xac,0xc2] 956 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind 957 ret <4 x double> %res 958 } 959 960 declare <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone 961 962 define <2 x double> @test_mask_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 963 ; CHECK-LABEL: test_mask_vfnmadd128_pd 964 ; CHECK: vfnmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xac,0xc2] 965 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind 966 ret <2 x double> %res 967 } 968 969 declare <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone 970 971 define <8 x float> @test_mask_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) { 972 ; CHECK-LABEL: test_mask_vfnmsub256_ps 973 ; CHECK: vfnmsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xae,0xc2] 974 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind 975 ret <8 x float> %res 976 } 977 978 declare <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 979 980 define <4 x float> @test_mask_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 981 ; CHECK-LABEL: test_mask_vfnmsub128_ps 982 ; CHECK: vfnmsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xae,0xc2] 983 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 984 ret <4 x float> %res 985 } 986 987 declare <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone 988 989 define <4 x double> @test_mask_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) { 990 ; CHECK-LABEL: test_mask_vfnmsub256_pd 991 ; CHECK: vfnmsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xae,0xc2] 992 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind 993 ret <4 x double> %res 994 } 995 996 declare <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone 997 998 define <2 x double> @test_mask_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 999 ; CHECK-LABEL: test_mask_vfnmsub128_pd 1000 ; CHECK: vfnmsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xae,0xc2] 1001 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind 1002 ret <2 x double> %res 1003 } 1004 1005 1006 define <2 x double>@test_int_x86_avx512_mask_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 1007 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_128: 1008 ; CHECK: ## BB#0: 1009 ; CHECK-NEXT: movzbl %dil, %eax 1010 ; CHECK-NEXT: kmovw %eax, %k1 1011 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 1012 ; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm3 {%k1} 1013 ; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 1014 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 1015 ; CHECK-NEXT: retq 1016 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 1017 %res1 = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 1018 %res2 = fadd <2 x double> %res, %res1 1019 ret <2 x double> %res2 1020 } 1021 1022 declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 1023 1024 define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 1025 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128: 1026 ; CHECK: ## BB#0: 1027 ; CHECK-NEXT: movzbl %dil, %eax 1028 ; CHECK-NEXT: kmovw %eax, %k1 1029 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 1030 ; CHECK-NEXT: vfnmsub231pd %xmm1, %xmm0, %xmm3 {%k1} 1031 ; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 1032 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 1033 ; CHECK-NEXT: retq 1034 %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 1035 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 1036 %res2 = fadd <2 x double> %res, %res1 1037 ret <2 x double> %res2 1038 } 1039 1040 define <4 x double>@test_int_x86_avx512_mask_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1041 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_256: 1042 ; CHECK: ## BB#0: 1043 ; CHECK-NEXT: movzbl %dil, %eax 1044 ; CHECK-NEXT: kmovw %eax, %k1 1045 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 1046 ; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm3 {%k1} 1047 ; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 1048 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 1049 ; CHECK-NEXT: retq 1050 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1051 %res1 = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1052 %res2 = fadd <4 x double> %res, %res1 1053 ret <4 x double> %res2 1054 } 1055 1056 declare <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 1057 1058 define <4 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1059 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256: 1060 ; CHECK: ## BB#0: 1061 ; CHECK-NEXT: movzbl %dil, %eax 1062 ; CHECK-NEXT: kmovw %eax, %k1 1063 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 1064 ; CHECK-NEXT: vfnmsub231pd %ymm1, %ymm0, %ymm3 {%k1} 1065 ; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 1066 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 1067 ; CHECK-NEXT: retq 1068 %res = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1069 %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1070 %res2 = fadd <4 x double> %res, %res1 1071 ret <4 x double> %res2 1072 } 1073 1074 define <4 x float>@test_int_x86_avx512_mask_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1075 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_128: 1076 ; CHECK: ## BB#0: 1077 ; CHECK-NEXT: movzbl %dil, %eax 1078 ; CHECK-NEXT: kmovw %eax, %k1 1079 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 1080 ; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm3 {%k1} 1081 ; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 1082 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 1083 ; CHECK-NEXT: retq 1084 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1085 %res1 = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1086 %res2 = fadd <4 x float> %res, %res1 1087 ret <4 x float> %res2 1088 } 1089 1090 declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1091 1092 define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1093 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128: 1094 ; CHECK: ## BB#0: 1095 ; CHECK-NEXT: movzbl %dil, %eax 1096 ; CHECK-NEXT: kmovw %eax, %k1 1097 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 1098 ; CHECK-NEXT: vfnmsub231ps %xmm1, %xmm0, %xmm3 {%k1} 1099 ; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 1100 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 1101 ; CHECK-NEXT: retq 1102 %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1103 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1104 %res2 = fadd <4 x float> %res, %res1 1105 ret <4 x float> %res2 1106 } 1107 1108 define <8 x float>@test_int_x86_avx512_mask_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1109 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_256: 1110 ; CHECK: ## BB#0: 1111 ; CHECK-NEXT: movzbl %dil, %eax 1112 ; CHECK-NEXT: kmovw %eax, %k1 1113 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 1114 ; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm3 {%k1} 1115 ; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 1116 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 1117 ; CHECK-NEXT: retq 1118 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1119 %res1 = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1120 %res2 = fadd <8 x float> %res, %res1 1121 ret <8 x float> %res2 1122 } 1123 1124 declare <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1125 1126 define <8 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1127 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256: 1128 ; CHECK: ## BB#0: 1129 ; CHECK-NEXT: movzbl %dil, %eax 1130 ; CHECK-NEXT: kmovw %eax, %k1 1131 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 1132 ; CHECK-NEXT: vfnmsub231ps %ymm1, %ymm0, %ymm3 {%k1} 1133 ; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 1134 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 1135 ; CHECK-NEXT: retq 1136 %res = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1137 %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1138 %res2 = fadd <8 x float> %res, %res1 1139 ret <8 x float> %res2 1140 } 1141 1142 define <2 x double>@test_int_x86_avx512_mask_vfnmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 1143 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_128: 1144 ; CHECK: ## BB#0: 1145 ; CHECK-NEXT: movzbl %dil, %eax 1146 ; CHECK-NEXT: kmovw %eax, %k1 1147 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 1148 ; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm3 {%k1} 1149 ; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 1150 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 1151 ; CHECK-NEXT: retq 1152 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 1153 %res1 = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 1154 %res2 = fadd <2 x double> %res, %res1 1155 ret <2 x double> %res2 1156 } 1157 1158 define <4 x double>@test_int_x86_avx512_mask_vfnmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1159 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_256: 1160 ; CHECK: ## BB#0: 1161 ; CHECK-NEXT: movzbl %dil, %eax 1162 ; CHECK-NEXT: kmovw %eax, %k1 1163 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 1164 ; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm3 {%k1} 1165 ; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 1166 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 1167 ; CHECK-NEXT: retq 1168 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1169 %res1 = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1170 %res2 = fadd <4 x double> %res, %res1 1171 ret <4 x double> %res2 1172 } 1173 1174 define <4 x float>@test_int_x86_avx512_mask_vfnmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1175 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_128: 1176 ; CHECK: ## BB#0: 1177 ; CHECK-NEXT: movzbl %dil, %eax 1178 ; CHECK-NEXT: kmovw %eax, %k1 1179 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 1180 ; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm3 {%k1} 1181 ; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 1182 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 1183 ; CHECK-NEXT: retq 1184 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1185 %res1 = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1186 %res2 = fadd <4 x float> %res, %res1 1187 ret <4 x float> %res2 1188 } 1189 1190 define <8 x float>@test_int_x86_avx512_mask_vfnmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1191 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_256: 1192 ; CHECK: ## BB#0: 1193 ; CHECK-NEXT: movzbl %dil, %eax 1194 ; CHECK-NEXT: kmovw %eax, %k1 1195 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 1196 ; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm3 {%k1} 1197 ; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 1198 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 1199 ; CHECK-NEXT: retq 1200 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1201 %res1 = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1202 %res2 = fadd <8 x float> %res, %res1 1203 ret <8 x float> %res2 1204 } 1205 1206 declare <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone 1207 1208 define <8 x float> @test_mask_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) { 1209 ; CHECK-LABEL: test_mask_fmaddsub256_ps: 1210 ; CHECK: vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa6,0xc2] 1211 %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) 1212 ret <8 x float> %res 1213 } 1214 1215 declare <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 1216 1217 define <4 x float> @test_mask_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 1218 ; CHECK-LABEL: test_mask_fmaddsub128_ps: 1219 ; CHECK: vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa6,0xc2] 1220 %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) 1221 ret <4 x float> %res 1222 } 1223 1224 declare <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone 1225 1226 define <4 x double> @test_mask_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) { 1227 ; CHECK-LABEL: test_mask_vfmaddsub256_pd 1228 ; CHECK: vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa6,0xc2] 1229 %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind 1230 ret <4 x double> %res 1231 } 1232 1233 declare <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone 1234 1235 define <2 x double> @test_mask_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 1236 ; CHECK-LABEL: test_mask_vfmaddsub128_pd 1237 ; CHECK: vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa6,0xc2] 1238 %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind 1239 ret <2 x double> %res 1240 } 1241 1242 define <2 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 1243 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_128: 1244 ; CHECK: ## BB#0: 1245 ; CHECK-NEXT: movzbl %dil, %eax 1246 ; CHECK-NEXT: kmovw %eax, %k1 1247 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 1248 ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm3 {%k1} 1249 ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 1250 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 1251 ; CHECK-NEXT: retq 1252 %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 1253 %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 1254 %res2 = fadd <2 x double> %res, %res1 1255 ret <2 x double> %res2 1256 } 1257 1258 declare <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 1259 1260 define <2 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 1261 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128: 1262 ; CHECK: ## BB#0: 1263 ; CHECK-NEXT: movzbl %dil, %eax 1264 ; CHECK-NEXT: kmovw %eax, %k1 1265 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 1266 ; CHECK-NEXT: vfmaddsub231pd %xmm1, %xmm0, %xmm3 {%k1} 1267 ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 1268 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 1269 ; CHECK-NEXT: retq 1270 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 1271 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 1272 %res2 = fadd <2 x double> %res, %res1 1273 ret <2 x double> %res2 1274 } 1275 1276 declare <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 1277 1278 define <2 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 1279 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128: 1280 ; CHECK: ## BB#0: 1281 ; CHECK-NEXT: movzbl %dil, %eax 1282 ; CHECK-NEXT: kmovw %eax, %k1 1283 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 1284 ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm3 {%k1} {z} 1285 ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 1286 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 1287 ; CHECK-NEXT: retq 1288 %res = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 1289 %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 1290 %res2 = fadd <2 x double> %res, %res1 1291 ret <2 x double> %res2 1292 } 1293 1294 define <4 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1295 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_256: 1296 ; CHECK: ## BB#0: 1297 ; CHECK-NEXT: movzbl %dil, %eax 1298 ; CHECK-NEXT: kmovw %eax, %k1 1299 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 1300 ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm3 {%k1} 1301 ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 1302 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 1303 ; CHECK-NEXT: retq 1304 %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1305 %res1 = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1306 %res2 = fadd <4 x double> %res, %res1 1307 ret <4 x double> %res2 1308 } 1309 1310 declare <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 1311 1312 define <4 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1313 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256: 1314 ; CHECK: ## BB#0: 1315 ; CHECK-NEXT: movzbl %dil, %eax 1316 ; CHECK-NEXT: kmovw %eax, %k1 1317 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 1318 ; CHECK-NEXT: vfmaddsub231pd %ymm1, %ymm0, %ymm3 {%k1} 1319 ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 1320 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 1321 ; CHECK-NEXT: retq 1322 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1323 %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1324 %res2 = fadd <4 x double> %res, %res1 1325 ret <4 x double> %res2 1326 } 1327 1328 declare <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 1329 1330 define <4 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1331 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256: 1332 ; CHECK: ## BB#0: 1333 ; CHECK-NEXT: movzbl %dil, %eax 1334 ; CHECK-NEXT: kmovw %eax, %k1 1335 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 1336 ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm3 {%k1} {z} 1337 ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 1338 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 1339 ; CHECK-NEXT: retq 1340 %res = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1341 %res1 = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1342 %res2 = fadd <4 x double> %res, %res1 1343 ret <4 x double> %res2 1344 } 1345 1346 define <4 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1347 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_128: 1348 ; CHECK: ## BB#0: 1349 ; CHECK-NEXT: movzbl %dil, %eax 1350 ; CHECK-NEXT: kmovw %eax, %k1 1351 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 1352 ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm3 {%k1} 1353 ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 1354 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 1355 ; CHECK-NEXT: retq 1356 %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1357 %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1358 %res2 = fadd <4 x float> %res, %res1 1359 ret <4 x float> %res2 1360 } 1361 1362 declare <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1363 1364 define <4 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1365 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128: 1366 ; CHECK: ## BB#0: 1367 ; CHECK-NEXT: movzbl %dil, %eax 1368 ; CHECK-NEXT: kmovw %eax, %k1 1369 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 1370 ; CHECK-NEXT: vfmaddsub231ps %xmm1, %xmm0, %xmm3 {%k1} 1371 ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 1372 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 1373 ; CHECK-NEXT: retq 1374 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1375 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1376 %res2 = fadd <4 x float> %res, %res1 1377 ret <4 x float> %res2 1378 } 1379 1380 declare <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1381 1382 define <4 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1383 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128: 1384 ; CHECK: ## BB#0: 1385 ; CHECK-NEXT: movzbl %dil, %eax 1386 ; CHECK-NEXT: kmovw %eax, %k1 1387 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 1388 ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm3 {%k1} {z} 1389 ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 1390 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 1391 ; CHECK-NEXT: retq 1392 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1393 %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1394 %res2 = fadd <4 x float> %res, %res1 1395 ret <4 x float> %res2 1396 } 1397 1398 define <8 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1399 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_256: 1400 ; CHECK: ## BB#0: 1401 ; CHECK-NEXT: movzbl %dil, %eax 1402 ; CHECK-NEXT: kmovw %eax, %k1 1403 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 1404 ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm3 {%k1} 1405 ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 1406 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 1407 ; CHECK-NEXT: retq 1408 %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1409 %res1 = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1410 %res2 = fadd <8 x float> %res, %res1 1411 ret <8 x float> %res2 1412 } 1413 1414 declare <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1415 1416 define <8 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1417 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256: 1418 ; CHECK: ## BB#0: 1419 ; CHECK-NEXT: movzbl %dil, %eax 1420 ; CHECK-NEXT: kmovw %eax, %k1 1421 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 1422 ; CHECK-NEXT: vfmaddsub231ps %ymm1, %ymm0, %ymm3 {%k1} 1423 ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 1424 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 1425 ; CHECK-NEXT: retq 1426 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1427 %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1428 %res2 = fadd <8 x float> %res, %res1 1429 ret <8 x float> %res2 1430 } 1431 1432 declare <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1433 1434 define <8 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1435 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256: 1436 ; CHECK: ## BB#0: 1437 ; CHECK-NEXT: movzbl %dil, %eax 1438 ; CHECK-NEXT: kmovw %eax, %k1 1439 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 1440 ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm3 {%k1} {z} 1441 ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 1442 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 1443 ; CHECK-NEXT: retq 1444 %res = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1445 %res1 = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1446 %res2 = fadd <8 x float> %res, %res1 1447 ret <8 x float> %res2 1448 } 1449 1450 declare <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 1451 1452 define <2 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 1453 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128: 1454 ; CHECK: ## BB#0: 1455 ; CHECK-NEXT: movzbl %dil, %eax 1456 ; CHECK-NEXT: kmovw %eax, %k1 1457 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 1458 ; CHECK-NEXT: vfmsubadd231pd %xmm1, %xmm0, %xmm3 {%k1} 1459 ; CHECK-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 1460 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 1461 ; CHECK-NEXT: retq 1462 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 1463 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 1464 %res2=fadd <2 x double> %res, %res1 1465 ret <2 x double> %res2 1466 } 1467 1468 declare <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 1469 1470 define <4 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1471 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256: 1472 ; CHECK: ## BB#0: 1473 ; CHECK-NEXT: movzbl %dil, %eax 1474 ; CHECK-NEXT: kmovw %eax, %k1 1475 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 1476 ; CHECK-NEXT: vfmsubadd231pd %ymm1, %ymm0, %ymm3 {%k1} 1477 ; CHECK-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 1478 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 1479 ; CHECK-NEXT: retq 1480 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1481 %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1482 %res2=fadd <4 x double> %res, %res1 1483 ret <4 x double> %res2 1484 } 1485 1486 declare <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1487 1488 define <4 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1489 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128: 1490 ; CHECK: ## BB#0: 1491 ; CHECK-NEXT: movzbl %dil, %eax 1492 ; CHECK-NEXT: kmovw %eax, %k1 1493 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 1494 ; CHECK-NEXT: vfmsubadd231ps %xmm1, %xmm0, %xmm3 {%k1} 1495 ; CHECK-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 1496 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 1497 ; CHECK-NEXT: retq 1498 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1499 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1500 %res2=fadd <4 x float> %res, %res1 1501 ret <4 x float> %res2 1502 } 1503 1504 declare <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1505 1506 define <8 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1507 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256: 1508 ; CHECK: ## BB#0: 1509 ; CHECK-NEXT: movzbl %dil, %eax 1510 ; CHECK-NEXT: kmovw %eax, %k1 1511 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 1512 ; CHECK-NEXT: vfmsubadd231ps %ymm1, %ymm0, %ymm3 {%k1} 1513 ; CHECK-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 1514 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 1515 ; CHECK-NEXT: retq 1516 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1517 %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1518 %res2=fadd <8 x float> %res, %res1 1519 ret <8 x float> %res2 1520 } 1521 1522 1523 define <4 x float> @test_mask_vfmadd128_ps_r(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 1524 ; CHECK-LABEL: test_mask_vfmadd128_ps_r 1525 ; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2] 1526 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 1527 ret <4 x float> %res 1528 } 1529 1530 define <4 x float> @test_mask_vfmadd128_ps_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 1531 ; CHECK-LABEL: test_mask_vfmadd128_ps_rz 1532 ; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2] 1533 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind 1534 ret <4 x float> %res 1535 } 1536 1537 define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) { 1538 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmk 1539 ; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07] 1540 %a2 = load <4 x float>, <4 x float>* %ptr_a2 1541 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 1542 ret <4 x float> %res 1543 } 1544 1545 define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) { 1546 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmka 1547 ; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07] 1548 %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 8 1549 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 1550 ret <4 x float> %res 1551 } 1552 1553 define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) { 1554 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkz 1555 ; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07] 1556 %a2 = load <4 x float>, <4 x float>* %ptr_a2 1557 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind 1558 ret <4 x float> %res 1559 } 1560 1561 define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) { 1562 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkza 1563 ; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07] 1564 %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4 1565 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind 1566 ret <4 x float> %res 1567 } 1568 1569 define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) { 1570 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmb 1571 ; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07] 1572 %q = load float, float* %ptr_a2 1573 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1574 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 1575 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 1576 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 1577 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind 1578 ret <4 x float> %res 1579 } 1580 1581 define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) { 1582 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmba 1583 ; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07] 1584 %q = load float, float* %ptr_a2, align 4 1585 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1586 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 1587 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 1588 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 1589 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind 1590 ret <4 x float> %res 1591 } 1592 1593 define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) { 1594 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmbz 1595 ; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07] 1596 %q = load float, float* %ptr_a2 1597 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1598 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 1599 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 1600 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 1601 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind 1602 ret <4 x float> %res 1603 } 1604 1605 define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) { 1606 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmbza 1607 ; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07] 1608 %q = load float, float* %ptr_a2, align 4 1609 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1610 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 1611 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 1612 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 1613 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind 1614 ret <4 x float> %res 1615 } 1616 1617 define <2 x double> @test_mask_vfmadd128_pd_r(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 1618 ; CHECK-LABEL: test_mask_vfmadd128_pd_r 1619 ; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2] 1620 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind 1621 ret <2 x double> %res 1622 } 1623 1624 define <2 x double> @test_mask_vfmadd128_pd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 1625 ; CHECK-LABEL: test_mask_vfmadd128_pd_rz 1626 ; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2] 1627 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind 1628 ret <2 x double> %res 1629 } 1630 1631 define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) { 1632 ; CHECK-LABEL: test_mask_vfmadd128_pd_rmk 1633 ; CHECK: vfmadd213pd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07] 1634 %a2 = load <2 x double>, <2 x double>* %ptr_a2 1635 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind 1636 ret <2 x double> %res 1637 } 1638 1639 define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2) { 1640 ; CHECK-LABEL: test_mask_vfmadd128_pd_rmkz 1641 ; CHECK: vfmadd213pd (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0x07] 1642 %a2 = load <2 x double>, <2 x double>* %ptr_a2 1643 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind 1644 ret <2 x double> %res 1645 } 1646 1647 define <4 x double> @test_mask_vfmadd256_pd_r(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) { 1648 ; CHECK-LABEL: test_mask_vfmadd256_pd_r 1649 ; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2] 1650 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind 1651 ret <4 x double> %res 1652 } 1653 1654 define <4 x double> @test_mask_vfmadd256_pd_rz(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 1655 ; CHECK-LABEL: test_mask_vfmadd256_pd_rz 1656 ; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2] 1657 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind 1658 ret <4 x double> %res 1659 } 1660 1661 define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2, i8 %mask) { 1662 ; CHECK-LABEL: test_mask_vfmadd256_pd_rmk 1663 ; CHECK: vfmadd213pd (%rdi), %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07] 1664 %a2 = load <4 x double>, <4 x double>* %ptr_a2 1665 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind 1666 ret <4 x double> %res 1667 } 1668 1669 define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2) { 1670 ; CHECK-LABEL: test_mask_vfmadd256_pd_rmkz 1671 ; CHECK: vfmadd213pd (%rdi), %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0x07] 1672 %a2 = load <4 x double>, <4 x double>* %ptr_a2 1673 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind 1674 ret <4 x double> %res 1675 } 1676 define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 1677 ;CHECK-LABEL: test_mask_add_epi16_rr_128 1678 ;CHECK: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc1] 1679 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1680 ret <8 x i16> %res 1681 } 1682 1683 define <8 x i16> @test_mask_add_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 1684 ;CHECK-LABEL: test_mask_add_epi16_rrk_128 1685 ;CHECK: vpaddw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1] 1686 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1687 ret <8 x i16> %res 1688 } 1689 1690 define <8 x i16> @test_mask_add_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 1691 ;CHECK-LABEL: test_mask_add_epi16_rrkz_128 1692 ;CHECK: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1] 1693 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1694 ret <8 x i16> %res 1695 } 1696 1697 define <8 x i16> @test_mask_add_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 1698 ;CHECK-LABEL: test_mask_add_epi16_rm_128 1699 ;CHECK: vpaddw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0x07] 1700 %b = load <8 x i16>, <8 x i16>* %ptr_b 1701 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1702 ret <8 x i16> %res 1703 } 1704 1705 define <8 x i16> @test_mask_add_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 1706 ;CHECK-LABEL: test_mask_add_epi16_rmk_128 1707 ;CHECK: vpaddw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f] 1708 %b = load <8 x i16>, <8 x i16>* %ptr_b 1709 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1710 ret <8 x i16> %res 1711 } 1712 1713 define <8 x i16> @test_mask_add_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 1714 ;CHECK-LABEL: test_mask_add_epi16_rmkz_128 1715 ;CHECK: vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x07] 1716 %b = load <8 x i16>, <8 x i16>* %ptr_b 1717 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1718 ret <8 x i16> %res 1719 } 1720 1721 declare <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1722 1723 define <16 x i16> @test_mask_add_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 1724 ;CHECK-LABEL: test_mask_add_epi16_rr_256 1725 ;CHECK: vpaddw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc1] 1726 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1727 ret <16 x i16> %res 1728 } 1729 1730 define <16 x i16> @test_mask_add_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 1731 ;CHECK-LABEL: test_mask_add_epi16_rrk_256 1732 ;CHECK: vpaddw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1] 1733 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1734 ret <16 x i16> %res 1735 } 1736 1737 define <16 x i16> @test_mask_add_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 1738 ;CHECK-LABEL: test_mask_add_epi16_rrkz_256 1739 ;CHECK: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1] 1740 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1741 ret <16 x i16> %res 1742 } 1743 1744 define <16 x i16> @test_mask_add_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 1745 ;CHECK-LABEL: test_mask_add_epi16_rm_256 1746 ;CHECK: vpaddw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0x07] 1747 %b = load <16 x i16>, <16 x i16>* %ptr_b 1748 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1749 ret <16 x i16> %res 1750 } 1751 1752 define <16 x i16> @test_mask_add_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 1753 ;CHECK-LABEL: test_mask_add_epi16_rmk_256 1754 ;CHECK: vpaddw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f] 1755 %b = load <16 x i16>, <16 x i16>* %ptr_b 1756 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1757 ret <16 x i16> %res 1758 } 1759 1760 define <16 x i16> @test_mask_add_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 1761 ;CHECK-LABEL: test_mask_add_epi16_rmkz_256 1762 ;CHECK: vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x07] 1763 %b = load <16 x i16>, <16 x i16>* %ptr_b 1764 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1765 ret <16 x i16> %res 1766 } 1767 1768 declare <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1769 1770 define <8 x i16> @test_mask_sub_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 1771 ;CHECK-LABEL: test_mask_sub_epi16_rr_128 1772 ;CHECK: vpsubw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0xc1] 1773 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1774 ret <8 x i16> %res 1775 } 1776 1777 define <8 x i16> @test_mask_sub_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 1778 ;CHECK-LABEL: test_mask_sub_epi16_rrk_128 1779 ;CHECK: vpsubw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1] 1780 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1781 ret <8 x i16> %res 1782 } 1783 1784 define <8 x i16> @test_mask_sub_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 1785 ;CHECK-LABEL: test_mask_sub_epi16_rrkz_128 1786 ;CHECK: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1] 1787 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1788 ret <8 x i16> %res 1789 } 1790 1791 define <8 x i16> @test_mask_sub_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 1792 ;CHECK-LABEL: test_mask_sub_epi16_rm_128 1793 ;CHECK: vpsubw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0x07] 1794 %b = load <8 x i16>, <8 x i16>* %ptr_b 1795 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1796 ret <8 x i16> %res 1797 } 1798 1799 define <8 x i16> @test_mask_sub_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 1800 ;CHECK-LABEL: test_mask_sub_epi16_rmk_128 1801 ;CHECK: vpsubw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f] 1802 %b = load <8 x i16>, <8 x i16>* %ptr_b 1803 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1804 ret <8 x i16> %res 1805 } 1806 1807 define <8 x i16> @test_mask_sub_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 1808 ;CHECK-LABEL: test_mask_sub_epi16_rmkz_128 1809 ;CHECK: vpsubw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x07] 1810 %b = load <8 x i16>, <8 x i16>* %ptr_b 1811 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1812 ret <8 x i16> %res 1813 } 1814 1815 declare <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1816 1817 define <16 x i16> @test_mask_sub_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 1818 ;CHECK-LABEL: test_mask_sub_epi16_rr_256 1819 ;CHECK: vpsubw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0xc1] 1820 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1821 ret <16 x i16> %res 1822 } 1823 1824 define <16 x i16> @test_mask_sub_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 1825 ;CHECK-LABEL: test_mask_sub_epi16_rrk_256 1826 ;CHECK: vpsubw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1] 1827 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1828 ret <16 x i16> %res 1829 } 1830 1831 define <16 x i16> @test_mask_sub_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 1832 ;CHECK-LABEL: test_mask_sub_epi16_rrkz_256 1833 ;CHECK: vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1] 1834 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1835 ret <16 x i16> %res 1836 } 1837 1838 define <16 x i16> @test_mask_sub_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 1839 ;CHECK-LABEL: test_mask_sub_epi16_rm_256 1840 ;CHECK: vpsubw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0x07] 1841 %b = load <16 x i16>, <16 x i16>* %ptr_b 1842 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1843 ret <16 x i16> %res 1844 } 1845 1846 define <16 x i16> @test_mask_sub_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 1847 ;CHECK-LABEL: test_mask_sub_epi16_rmk_256 1848 ;CHECK: vpsubw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f] 1849 %b = load <16 x i16>, <16 x i16>* %ptr_b 1850 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1851 ret <16 x i16> %res 1852 } 1853 1854 define <16 x i16> @test_mask_sub_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 1855 ;CHECK-LABEL: test_mask_sub_epi16_rmkz_256 1856 ;CHECK: vpsubw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x07] 1857 %b = load <16 x i16>, <16 x i16>* %ptr_b 1858 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1859 ret <16 x i16> %res 1860 } 1861 1862 declare <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1863 1864 define <32 x i16> @test_mask_add_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1865 ;CHECK-LABEL: test_mask_add_epi16_rr_512 1866 ;CHECK: vpaddw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1] 1867 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1868 ret <32 x i16> %res 1869 } 1870 1871 define <32 x i16> @test_mask_add_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1872 ;CHECK-LABEL: test_mask_add_epi16_rrk_512 1873 ;CHECK: vpaddw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1] 1874 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1875 ret <32 x i16> %res 1876 } 1877 1878 define <32 x i16> @test_mask_add_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1879 ;CHECK-LABEL: test_mask_add_epi16_rrkz_512 1880 ;CHECK: vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1] 1881 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1882 ret <32 x i16> %res 1883 } 1884 1885 define <32 x i16> @test_mask_add_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1886 ;CHECK-LABEL: test_mask_add_epi16_rm_512 1887 ;CHECK: vpaddw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x07] 1888 %b = load <32 x i16>, <32 x i16>* %ptr_b 1889 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1890 ret <32 x i16> %res 1891 } 1892 1893 define <32 x i16> @test_mask_add_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1894 ;CHECK-LABEL: test_mask_add_epi16_rmk_512 1895 ;CHECK: vpaddw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x0f] 1896 %b = load <32 x i16>, <32 x i16>* %ptr_b 1897 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1898 ret <32 x i16> %res 1899 } 1900 1901 define <32 x i16> @test_mask_add_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 1902 ;CHECK-LABEL: test_mask_add_epi16_rmkz_512 1903 ;CHECK: vpaddw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x07] 1904 %b = load <32 x i16>, <32 x i16>* %ptr_b 1905 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1906 ret <32 x i16> %res 1907 } 1908 1909 declare <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1910 1911 define <32 x i16> @test_mask_sub_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1912 ;CHECK-LABEL: test_mask_sub_epi16_rr_512 1913 ;CHECK: vpsubw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0xc1] 1914 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1915 ret <32 x i16> %res 1916 } 1917 1918 define <32 x i16> @test_mask_sub_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1919 ;CHECK-LABEL: test_mask_sub_epi16_rrk_512 1920 ;CHECK: vpsubw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1] 1921 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1922 ret <32 x i16> %res 1923 } 1924 1925 define <32 x i16> @test_mask_sub_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1926 ;CHECK-LABEL: test_mask_sub_epi16_rrkz_512 1927 ;CHECK: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1] 1928 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1929 ret <32 x i16> %res 1930 } 1931 1932 define <32 x i16> @test_mask_sub_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1933 ;CHECK-LABEL: test_mask_sub_epi16_rm_512 1934 ;CHECK: vpsubw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x07] 1935 %b = load <32 x i16>, <32 x i16>* %ptr_b 1936 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1937 ret <32 x i16> %res 1938 } 1939 1940 define <32 x i16> @test_mask_sub_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1941 ;CHECK-LABEL: test_mask_sub_epi16_rmk_512 1942 ;CHECK: vpsubw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x0f] 1943 %b = load <32 x i16>, <32 x i16>* %ptr_b 1944 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1945 ret <32 x i16> %res 1946 } 1947 1948 define <32 x i16> @test_mask_sub_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 1949 ;CHECK-LABEL: test_mask_sub_epi16_rmkz_512 1950 ;CHECK: vpsubw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x07] 1951 %b = load <32 x i16>, <32 x i16>* %ptr_b 1952 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1953 ret <32 x i16> %res 1954 } 1955 1956 declare <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1957 1958 define <32 x i16> @test_mask_mullo_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1959 ;CHECK-LABEL: test_mask_mullo_epi16_rr_512 1960 ;CHECK: vpmullw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0xc1] 1961 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1962 ret <32 x i16> %res 1963 } 1964 1965 define <32 x i16> @test_mask_mullo_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1966 ;CHECK-LABEL: test_mask_mullo_epi16_rrk_512 1967 ;CHECK: vpmullw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1] 1968 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1969 ret <32 x i16> %res 1970 } 1971 1972 define <32 x i16> @test_mask_mullo_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1973 ;CHECK-LABEL: test_mask_mullo_epi16_rrkz_512 1974 ;CHECK: vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1] 1975 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1976 ret <32 x i16> %res 1977 } 1978 1979 define <32 x i16> @test_mask_mullo_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1980 ;CHECK-LABEL: test_mask_mullo_epi16_rm_512 1981 ;CHECK: vpmullw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x07] 1982 %b = load <32 x i16>, <32 x i16>* %ptr_b 1983 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1984 ret <32 x i16> %res 1985 } 1986 1987 define <32 x i16> @test_mask_mullo_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1988 ;CHECK-LABEL: test_mask_mullo_epi16_rmk_512 1989 ;CHECK: vpmullw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x0f] 1990 %b = load <32 x i16>, <32 x i16>* %ptr_b 1991 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1992 ret <32 x i16> %res 1993 } 1994 1995 define <32 x i16> @test_mask_mullo_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 1996 ;CHECK-LABEL: test_mask_mullo_epi16_rmkz_512 1997 ;CHECK: vpmullw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x07] 1998 %b = load <32 x i16>, <32 x i16>* %ptr_b 1999 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 2000 ret <32 x i16> %res 2001 } 2002 2003 declare <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2004 2005 define <8 x i16> @test_mask_mullo_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 2006 ;CHECK-LABEL: test_mask_mullo_epi16_rr_128 2007 ;CHECK: vpmullw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0xc1] 2008 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2009 ret <8 x i16> %res 2010 } 2011 2012 define <8 x i16> @test_mask_mullo_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 2013 ;CHECK-LABEL: test_mask_mullo_epi16_rrk_128 2014 ;CHECK: vpmullw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1] 2015 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2016 ret <8 x i16> %res 2017 } 2018 2019 define <8 x i16> @test_mask_mullo_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 2020 ;CHECK-LABEL: test_mask_mullo_epi16_rrkz_128 2021 ;CHECK: vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1] 2022 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2023 ret <8 x i16> %res 2024 } 2025 2026 define <8 x i16> @test_mask_mullo_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 2027 ;CHECK-LABEL: test_mask_mullo_epi16_rm_128 2028 ;CHECK: vpmullw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0x07] 2029 %b = load <8 x i16>, <8 x i16>* %ptr_b 2030 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2031 ret <8 x i16> %res 2032 } 2033 2034 define <8 x i16> @test_mask_mullo_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 2035 ;CHECK-LABEL: test_mask_mullo_epi16_rmk_128 2036 ;CHECK: vpmullw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f] 2037 %b = load <8 x i16>, <8 x i16>* %ptr_b 2038 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2039 ret <8 x i16> %res 2040 } 2041 2042 define <8 x i16> @test_mask_mullo_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 2043 ;CHECK-LABEL: test_mask_mullo_epi16_rmkz_128 2044 ;CHECK: vpmullw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x07] 2045 %b = load <8 x i16>, <8 x i16>* %ptr_b 2046 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2047 ret <8 x i16> %res 2048 } 2049 2050 declare <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2051 2052 define <16 x i16> @test_mask_mullo_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 2053 ;CHECK-LABEL: test_mask_mullo_epi16_rr_256 2054 ;CHECK: vpmullw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0xc1] 2055 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2056 ret <16 x i16> %res 2057 } 2058 2059 define <16 x i16> @test_mask_mullo_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 2060 ;CHECK-LABEL: test_mask_mullo_epi16_rrk_256 2061 ;CHECK: vpmullw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1] 2062 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2063 ret <16 x i16> %res 2064 } 2065 2066 define <16 x i16> @test_mask_mullo_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 2067 ;CHECK-LABEL: test_mask_mullo_epi16_rrkz_256 2068 ;CHECK: vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1] 2069 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2070 ret <16 x i16> %res 2071 } 2072 2073 define <16 x i16> @test_mask_mullo_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 2074 ;CHECK-LABEL: test_mask_mullo_epi16_rm_256 2075 ;CHECK: vpmullw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0x07] 2076 %b = load <16 x i16>, <16 x i16>* %ptr_b 2077 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2078 ret <16 x i16> %res 2079 } 2080 2081 define <16 x i16> @test_mask_mullo_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 2082 ;CHECK-LABEL: test_mask_mullo_epi16_rmk_256 2083 ;CHECK: vpmullw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f] 2084 %b = load <16 x i16>, <16 x i16>* %ptr_b 2085 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2086 ret <16 x i16> %res 2087 } 2088 2089 define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 2090 ;CHECK-LABEL: test_mask_mullo_epi16_rmkz_256 2091 ;CHECK: vpmullw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x07] 2092 %b = load <16 x i16>, <16 x i16>* %ptr_b 2093 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2094 ret <16 x i16> %res 2095 } 2096 2097 declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2098 2099 2100 define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 2101 ;CHECK-LABEL: test_mask_packs_epi32_rr_128 2102 ;CHECK: vpackssdw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0xc1] 2103 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 2104 ret <8 x i16> %res 2105 } 2106 2107 define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { 2108 ;CHECK-LABEL: test_mask_packs_epi32_rrk_128 2109 ;CHECK: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1] 2110 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 2111 ret <8 x i16> %res 2112 } 2113 2114 define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 2115 ;CHECK-LABEL: test_mask_packs_epi32_rrkz_128 2116 ;CHECK: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1] 2117 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 2118 ret <8 x i16> %res 2119 } 2120 2121 define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 2122 ;CHECK-LABEL: test_mask_packs_epi32_rm_128 2123 ;CHECK: vpackssdw (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0x07] 2124 %b = load <4 x i32>, <4 x i32>* %ptr_b 2125 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 2126 ret <8 x i16> %res 2127 } 2128 2129 define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 2130 ;CHECK-LABEL: test_mask_packs_epi32_rmk_128 2131 ;CHECK: vpackssdw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f] 2132 %b = load <4 x i32>, <4 x i32>* %ptr_b 2133 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 2134 ret <8 x i16> %res 2135 } 2136 2137 define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 2138 ;CHECK-LABEL: test_mask_packs_epi32_rmkz_128 2139 ;CHECK: vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07] 2140 %b = load <4 x i32>, <4 x i32>* %ptr_b 2141 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 2142 ret <8 x i16> %res 2143 } 2144 2145 define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 2146 ;CHECK-LABEL: test_mask_packs_epi32_rmb_128 2147 ;CHECK: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07] 2148 %q = load i32, i32* %ptr_b 2149 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2150 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2151 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 2152 ret <8 x i16> %res 2153 } 2154 2155 define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) { 2156 ;CHECK-LABEL: test_mask_packs_epi32_rmbk_128 2157 ;CHECK: vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f] 2158 %q = load i32, i32* %ptr_b 2159 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2160 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2161 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 2162 ret <8 x i16> %res 2163 } 2164 2165 define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 2166 ;CHECK-LABEL: test_mask_packs_epi32_rmbkz_128 2167 ;CHECK: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07] 2168 %q = load i32, i32* %ptr_b 2169 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2170 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2171 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 2172 ret <8 x i16> %res 2173 } 2174 2175 declare <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8) 2176 2177 define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 2178 ;CHECK-LABEL: test_mask_packs_epi32_rr_256 2179 ;CHECK: vpackssdw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0xc1] 2180 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 2181 ret <16 x i16> %res 2182 } 2183 2184 define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { 2185 ;CHECK-LABEL: test_mask_packs_epi32_rrk_256 2186 ;CHECK: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1] 2187 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 2188 ret <16 x i16> %res 2189 } 2190 2191 define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { 2192 ;CHECK-LABEL: test_mask_packs_epi32_rrkz_256 2193 ;CHECK: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1] 2194 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 2195 ret <16 x i16> %res 2196 } 2197 2198 define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 2199 ;CHECK-LABEL: test_mask_packs_epi32_rm_256 2200 ;CHECK: vpackssdw (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0x07] 2201 %b = load <8 x i32>, <8 x i32>* %ptr_b 2202 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 2203 ret <16 x i16> %res 2204 } 2205 2206 define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 2207 ;CHECK-LABEL: test_mask_packs_epi32_rmk_256 2208 ;CHECK: vpackssdw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f] 2209 %b = load <8 x i32>, <8 x i32>* %ptr_b 2210 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 2211 ret <16 x i16> %res 2212 } 2213 2214 define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) { 2215 ;CHECK-LABEL: test_mask_packs_epi32_rmkz_256 2216 ;CHECK: vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07] 2217 %b = load <8 x i32>, <8 x i32>* %ptr_b 2218 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 2219 ret <16 x i16> %res 2220 } 2221 2222 define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 2223 ;CHECK-LABEL: test_mask_packs_epi32_rmb_256 2224 ;CHECK: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07] 2225 %q = load i32, i32* %ptr_b 2226 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2227 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2228 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 2229 ret <16 x i16> %res 2230 } 2231 2232 define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) { 2233 ;CHECK-LABEL: test_mask_packs_epi32_rmbk_256 2234 ;CHECK: vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f] 2235 %q = load i32, i32* %ptr_b 2236 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2237 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2238 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 2239 ret <16 x i16> %res 2240 } 2241 2242 define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) { 2243 ;CHECK-LABEL: test_mask_packs_epi32_rmbkz_256 2244 ;CHECK: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07] 2245 %q = load i32, i32* %ptr_b 2246 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2247 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2248 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 2249 ret <16 x i16> %res 2250 } 2251 2252 declare <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16) 2253 2254 define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 2255 ;CHECK-LABEL: test_mask_packs_epi16_rr_128 2256 ;CHECK: vpacksswb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc1] 2257 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) 2258 ret <16 x i8> %res 2259 } 2260 2261 define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { 2262 ;CHECK-LABEL: test_mask_packs_epi16_rrk_128 2263 ;CHECK: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x63,0xd1] 2264 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) 2265 ret <16 x i8> %res 2266 } 2267 2268 define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { 2269 ;CHECK-LABEL: test_mask_packs_epi16_rrkz_128 2270 ;CHECK: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x63,0xc1] 2271 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) 2272 ret <16 x i8> %res 2273 } 2274 2275 define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 2276 ;CHECK-LABEL: test_mask_packs_epi16_rm_128 2277 ;CHECK: vpacksswb (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0x07] 2278 %b = load <8 x i16>, <8 x i16>* %ptr_b 2279 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) 2280 ret <16 x i8> %res 2281 } 2282 2283 define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 2284 ;CHECK-LABEL: test_mask_packs_epi16_rmk_128 2285 ;CHECK: vpacksswb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x63,0x0f] 2286 %b = load <8 x i16>, <8 x i16>* %ptr_b 2287 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) 2288 ret <16 x i8> %res 2289 } 2290 2291 define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) { 2292 ;CHECK-LABEL: test_mask_packs_epi16_rmkz_128 2293 ;CHECK: vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x63,0x07] 2294 %b = load <8 x i16>, <8 x i16>* %ptr_b 2295 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) 2296 ret <16 x i8> %res 2297 } 2298 2299 declare <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16) 2300 2301 define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 2302 ;CHECK-LABEL: test_mask_packs_epi16_rr_256 2303 ;CHECK: vpacksswb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0xc1] 2304 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) 2305 ret <32 x i8> %res 2306 } 2307 2308 define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) { 2309 ;CHECK-LABEL: test_mask_packs_epi16_rrk_256 2310 ;CHECK: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x63,0xd1] 2311 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) 2312 ret <32 x i8> %res 2313 } 2314 2315 define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) { 2316 ;CHECK-LABEL: test_mask_packs_epi16_rrkz_256 2317 ;CHECK: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x63,0xc1] 2318 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) 2319 ret <32 x i8> %res 2320 } 2321 2322 define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 2323 ;CHECK-LABEL: test_mask_packs_epi16_rm_256 2324 ;CHECK: vpacksswb (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0x07] 2325 %b = load <16 x i16>, <16 x i16>* %ptr_b 2326 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) 2327 ret <32 x i8> %res 2328 } 2329 2330 define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 2331 ;CHECK-LABEL: test_mask_packs_epi16_rmk_256 2332 ;CHECK: vpacksswb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x63,0x0f] 2333 %b = load <16 x i16>, <16 x i16>* %ptr_b 2334 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) 2335 ret <32 x i8> %res 2336 } 2337 2338 define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) { 2339 ;CHECK-LABEL: test_mask_packs_epi16_rmkz_256 2340 ;CHECK: vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x63,0x07] 2341 %b = load <16 x i16>, <16 x i16>* %ptr_b 2342 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) 2343 ret <32 x i8> %res 2344 } 2345 2346 declare <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32) 2347 2348 2349 define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 2350 ;CHECK-LABEL: test_mask_packus_epi32_rr_128 2351 ;CHECK: vpackusdw %xmm1, %xmm0, %xmm0 2352 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 2353 ret <8 x i16> %res 2354 } 2355 2356 define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { 2357 ;CHECK-LABEL: test_mask_packus_epi32_rrk_128 2358 ;CHECK: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} 2359 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 2360 ret <8 x i16> %res 2361 } 2362 2363 define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 2364 ;CHECK-LABEL: test_mask_packus_epi32_rrkz_128 2365 ;CHECK: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} 2366 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 2367 ret <8 x i16> %res 2368 } 2369 2370 define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 2371 ;CHECK-LABEL: test_mask_packus_epi32_rm_128 2372 ;CHECK: vpackusdw (%rdi), %xmm0, %xmm0 2373 %b = load <4 x i32>, <4 x i32>* %ptr_b 2374 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 2375 ret <8 x i16> %res 2376 } 2377 2378 define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 2379 ;CHECK-LABEL: test_mask_packus_epi32_rmk_128 2380 ;CHECK: vpackusdw (%rdi), %xmm0, %xmm1 {%k1} 2381 %b = load <4 x i32>, <4 x i32>* %ptr_b 2382 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 2383 ret <8 x i16> %res 2384 } 2385 2386 define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 2387 ;CHECK-LABEL: test_mask_packus_epi32_rmkz_128 2388 ;CHECK: vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z} 2389 %b = load <4 x i32>, <4 x i32>* %ptr_b 2390 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 2391 ret <8 x i16> %res 2392 } 2393 2394 define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 2395 ;CHECK-LABEL: test_mask_packus_epi32_rmb_128 2396 ;CHECK: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 2397 %q = load i32, i32* %ptr_b 2398 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2399 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2400 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 2401 ret <8 x i16> %res 2402 } 2403 2404 define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) { 2405 ;CHECK-LABEL: test_mask_packus_epi32_rmbk_128 2406 ;CHECK: vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} 2407 %q = load i32, i32* %ptr_b 2408 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2409 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2410 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 2411 ret <8 x i16> %res 2412 } 2413 2414 define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 2415 ;CHECK-LABEL: test_mask_packus_epi32_rmbkz_128 2416 ;CHECK: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} 2417 %q = load i32, i32* %ptr_b 2418 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2419 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2420 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 2421 ret <8 x i16> %res 2422 } 2423 2424 declare <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8) 2425 2426 define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 2427 ;CHECK-LABEL: test_mask_packus_epi32_rr_256 2428 ;CHECK: vpackusdw %ymm1, %ymm0, %ymm0 2429 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 2430 ret <16 x i16> %res 2431 } 2432 2433 define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { 2434 ;CHECK-LABEL: test_mask_packus_epi32_rrk_256 2435 ;CHECK: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} 2436 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 2437 ret <16 x i16> %res 2438 } 2439 2440 define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { 2441 ;CHECK-LABEL: test_mask_packus_epi32_rrkz_256 2442 ;CHECK: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} 2443 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 2444 ret <16 x i16> %res 2445 } 2446 2447 define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 2448 ;CHECK-LABEL: test_mask_packus_epi32_rm_256 2449 ;CHECK: vpackusdw (%rdi), %ymm0, %ymm0 2450 %b = load <8 x i32>, <8 x i32>* %ptr_b 2451 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 2452 ret <16 x i16> %res 2453 } 2454 2455 define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 2456 ;CHECK-LABEL: test_mask_packus_epi32_rmk_256 2457 ;CHECK: vpackusdw (%rdi), %ymm0, %ymm1 {%k1} 2458 %b = load <8 x i32>, <8 x i32>* %ptr_b 2459 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 2460 ret <16 x i16> %res 2461 } 2462 2463 define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) { 2464 ;CHECK-LABEL: test_mask_packus_epi32_rmkz_256 2465 ;CHECK: vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z} 2466 %b = load <8 x i32>, <8 x i32>* %ptr_b 2467 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 2468 ret <16 x i16> %res 2469 } 2470 2471 define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 2472 ;CHECK-LABEL: test_mask_packus_epi32_rmb_256 2473 ;CHECK: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 2474 %q = load i32, i32* %ptr_b 2475 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2476 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2477 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 2478 ret <16 x i16> %res 2479 } 2480 2481 define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) { 2482 ;CHECK-LABEL: test_mask_packus_epi32_rmbk_256 2483 ;CHECK: vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} 2484 %q = load i32, i32* %ptr_b 2485 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2486 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2487 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 2488 ret <16 x i16> %res 2489 } 2490 2491 define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) { 2492 ;CHECK-LABEL: test_mask_packus_epi32_rmbkz_256 2493 ;CHECK: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} 2494 %q = load i32, i32* %ptr_b 2495 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2496 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2497 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 2498 ret <16 x i16> %res 2499 } 2500 2501 declare <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16) 2502 2503 define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 2504 ;CHECK-LABEL: test_mask_packus_epi16_rr_128 2505 ;CHECK: vpackuswb %xmm1, %xmm0, %xmm0 2506 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) 2507 ret <16 x i8> %res 2508 } 2509 2510 define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { 2511 ;CHECK-LABEL: test_mask_packus_epi16_rrk_128 2512 ;CHECK: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} 2513 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) 2514 ret <16 x i8> %res 2515 } 2516 2517 define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { 2518 ;CHECK-LABEL: test_mask_packus_epi16_rrkz_128 2519 ;CHECK: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} 2520 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) 2521 ret <16 x i8> %res 2522 } 2523 2524 define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 2525 ;CHECK-LABEL: test_mask_packus_epi16_rm_128 2526 ;CHECK: vpackuswb (%rdi), %xmm0, %xmm0 2527 %b = load <8 x i16>, <8 x i16>* %ptr_b 2528 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) 2529 ret <16 x i8> %res 2530 } 2531 2532 define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 2533 ;CHECK-LABEL: test_mask_packus_epi16_rmk_128 2534 ;CHECK: vpackuswb (%rdi), %xmm0, %xmm1 {%k1} 2535 %b = load <8 x i16>, <8 x i16>* %ptr_b 2536 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) 2537 ret <16 x i8> %res 2538 } 2539 2540 define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) { 2541 ;CHECK-LABEL: test_mask_packus_epi16_rmkz_128 2542 ;CHECK: vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z} 2543 %b = load <8 x i16>, <8 x i16>* %ptr_b 2544 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) 2545 ret <16 x i8> %res 2546 } 2547 2548 declare <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16) 2549 2550 define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 2551 ;CHECK-LABEL: test_mask_packus_epi16_rr_256 2552 ;CHECK: vpackuswb %ymm1, %ymm0, %ymm0 2553 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) 2554 ret <32 x i8> %res 2555 } 2556 2557 define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) { 2558 ;CHECK-LABEL: test_mask_packus_epi16_rrk_256 2559 ;CHECK: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} 2560 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) 2561 ret <32 x i8> %res 2562 } 2563 2564 define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) { 2565 ;CHECK-LABEL: test_mask_packus_epi16_rrkz_256 2566 ;CHECK: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} 2567 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) 2568 ret <32 x i8> %res 2569 } 2570 2571 define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 2572 ;CHECK-LABEL: test_mask_packus_epi16_rm_256 2573 ;CHECK: vpackuswb (%rdi), %ymm0, %ymm0 2574 %b = load <16 x i16>, <16 x i16>* %ptr_b 2575 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) 2576 ret <32 x i8> %res 2577 } 2578 2579 define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 2580 ;CHECK-LABEL: test_mask_packus_epi16_rmk_256 2581 ;CHECK: vpackuswb (%rdi), %ymm0, %ymm1 {%k1} 2582 %b = load <16 x i16>, <16 x i16>* %ptr_b 2583 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) 2584 ret <32 x i8> %res 2585 } 2586 2587 define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) { 2588 ;CHECK-LABEL: test_mask_packus_epi16_rmkz_256 2589 ;CHECK: vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z} 2590 %b = load <16 x i16>, <16 x i16>* %ptr_b 2591 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) 2592 ret <32 x i8> %res 2593 } 2594 2595 declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32) 2596 2597 define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 2598 ;CHECK-LABEL: test_mask_adds_epi16_rr_128 2599 ;CHECK: vpaddsw %xmm1, %xmm0, %xmm0 2600 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2601 ret <8 x i16> %res 2602 } 2603 2604 define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 2605 ;CHECK-LABEL: test_mask_adds_epi16_rrk_128 2606 ;CHECK: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} 2607 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2608 ret <8 x i16> %res 2609 } 2610 2611 define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 2612 ;CHECK-LABEL: test_mask_adds_epi16_rrkz_128 2613 ;CHECK: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} 2614 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2615 ret <8 x i16> %res 2616 } 2617 2618 define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 2619 ;CHECK-LABEL: test_mask_adds_epi16_rm_128 2620 ;CHECK: vpaddsw (%rdi), %xmm0, %xmm0 2621 %b = load <8 x i16>, <8 x i16>* %ptr_b 2622 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2623 ret <8 x i16> %res 2624 } 2625 2626 define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 2627 ;CHECK-LABEL: test_mask_adds_epi16_rmk_128 2628 ;CHECK: vpaddsw (%rdi), %xmm0, %xmm1 {%k1} 2629 %b = load <8 x i16>, <8 x i16>* %ptr_b 2630 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2631 ret <8 x i16> %res 2632 } 2633 2634 define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 2635 ;CHECK-LABEL: test_mask_adds_epi16_rmkz_128 2636 ;CHECK: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} 2637 %b = load <8 x i16>, <8 x i16>* %ptr_b 2638 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2639 ret <8 x i16> %res 2640 } 2641 2642 declare <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2643 2644 define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 2645 ;CHECK-LABEL: test_mask_adds_epi16_rr_256 2646 ;CHECK: vpaddsw %ymm1, %ymm0, %ymm0 2647 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2648 ret <16 x i16> %res 2649 } 2650 2651 define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 2652 ;CHECK-LABEL: test_mask_adds_epi16_rrk_256 2653 ;CHECK: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} 2654 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2655 ret <16 x i16> %res 2656 } 2657 2658 define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 2659 ;CHECK-LABEL: test_mask_adds_epi16_rrkz_256 2660 ;CHECK: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} 2661 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2662 ret <16 x i16> %res 2663 } 2664 2665 define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 2666 ;CHECK-LABEL: test_mask_adds_epi16_rm_256 2667 ;CHECK: vpaddsw (%rdi), %ymm0, %ymm0 2668 %b = load <16 x i16>, <16 x i16>* %ptr_b 2669 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2670 ret <16 x i16> %res 2671 } 2672 2673 define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 2674 ;CHECK-LABEL: test_mask_adds_epi16_rmk_256 2675 ;CHECK: vpaddsw (%rdi), %ymm0, %ymm1 {%k1} 2676 %b = load <16 x i16>, <16 x i16>* %ptr_b 2677 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2678 ret <16 x i16> %res 2679 } 2680 2681 define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 2682 ;CHECK-LABEL: test_mask_adds_epi16_rmkz_256 2683 ;CHECK: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} 2684 %b = load <16 x i16>, <16 x i16>* %ptr_b 2685 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2686 ret <16 x i16> %res 2687 } 2688 2689 declare <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2690 2691 define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 2692 ;CHECK-LABEL: test_mask_subs_epi16_rr_128 2693 ;CHECK: vpsubsw %xmm1, %xmm0, %xmm0 2694 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2695 ret <8 x i16> %res 2696 } 2697 2698 define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 2699 ;CHECK-LABEL: test_mask_subs_epi16_rrk_128 2700 ;CHECK: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} 2701 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2702 ret <8 x i16> %res 2703 } 2704 2705 define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 2706 ;CHECK-LABEL: test_mask_subs_epi16_rrkz_128 2707 ;CHECK: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} 2708 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2709 ret <8 x i16> %res 2710 } 2711 2712 define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 2713 ;CHECK-LABEL: test_mask_subs_epi16_rm_128 2714 ;CHECK: vpsubsw (%rdi), %xmm0, %xmm0 2715 %b = load <8 x i16>, <8 x i16>* %ptr_b 2716 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2717 ret <8 x i16> %res 2718 } 2719 2720 define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 2721 ;CHECK-LABEL: test_mask_subs_epi16_rmk_128 2722 ;CHECK: vpsubsw (%rdi), %xmm0, %xmm1 {%k1} 2723 %b = load <8 x i16>, <8 x i16>* %ptr_b 2724 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2725 ret <8 x i16> %res 2726 } 2727 2728 define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 2729 ;CHECK-LABEL: test_mask_subs_epi16_rmkz_128 2730 ;CHECK: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} 2731 %b = load <8 x i16>, <8 x i16>* %ptr_b 2732 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2733 ret <8 x i16> %res 2734 } 2735 2736 declare <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2737 2738 define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 2739 ;CHECK-LABEL: test_mask_subs_epi16_rr_256 2740 ;CHECK: vpsubsw %ymm1, %ymm0, %ymm0 2741 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2742 ret <16 x i16> %res 2743 } 2744 2745 define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 2746 ;CHECK-LABEL: test_mask_subs_epi16_rrk_256 2747 ;CHECK: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} 2748 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2749 ret <16 x i16> %res 2750 } 2751 2752 define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 2753 ;CHECK-LABEL: test_mask_subs_epi16_rrkz_256 2754 ;CHECK: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} 2755 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2756 ret <16 x i16> %res 2757 } 2758 2759 define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 2760 ;CHECK-LABEL: test_mask_subs_epi16_rm_256 2761 ;CHECK: vpsubsw (%rdi), %ymm0, %ymm0 2762 %b = load <16 x i16>, <16 x i16>* %ptr_b 2763 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2764 ret <16 x i16> %res 2765 } 2766 2767 define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 2768 ;CHECK-LABEL: test_mask_subs_epi16_rmk_256 2769 ;CHECK: vpsubsw (%rdi), %ymm0, %ymm1 {%k1} 2770 %b = load <16 x i16>, <16 x i16>* %ptr_b 2771 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2772 ret <16 x i16> %res 2773 } 2774 2775 define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 2776 ;CHECK-LABEL: test_mask_subs_epi16_rmkz_256 2777 ;CHECK: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} 2778 %b = load <16 x i16>, <16 x i16>* %ptr_b 2779 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2780 ret <16 x i16> %res 2781 } 2782 2783 declare <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2784 2785 define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) { 2786 ;CHECK-LABEL: test_mask_adds_epu16_rr_128 2787 ;CHECK: vpaddusw %xmm1, %xmm0, %xmm0 2788 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2789 ret <8 x i16> %res 2790 } 2791 2792 define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 2793 ;CHECK-LABEL: test_mask_adds_epu16_rrk_128 2794 ;CHECK: vpaddusw %xmm1, %xmm0, %xmm2 {%k1} 2795 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2796 ret <8 x i16> %res 2797 } 2798 2799 define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 2800 ;CHECK-LABEL: test_mask_adds_epu16_rrkz_128 2801 ;CHECK: vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} 2802 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2803 ret <8 x i16> %res 2804 } 2805 2806 define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 2807 ;CHECK-LABEL: test_mask_adds_epu16_rm_128 2808 ;CHECK: vpaddusw (%rdi), %xmm0, %xmm0 2809 %b = load <8 x i16>, <8 x i16>* %ptr_b 2810 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2811 ret <8 x i16> %res 2812 } 2813 2814 define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 2815 ;CHECK-LABEL: test_mask_adds_epu16_rmk_128 2816 ;CHECK: vpaddusw (%rdi), %xmm0, %xmm1 {%k1} 2817 %b = load <8 x i16>, <8 x i16>* %ptr_b 2818 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2819 ret <8 x i16> %res 2820 } 2821 2822 define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 2823 ;CHECK-LABEL: test_mask_adds_epu16_rmkz_128 2824 ;CHECK: vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z} 2825 %b = load <8 x i16>, <8 x i16>* %ptr_b 2826 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2827 ret <8 x i16> %res 2828 } 2829 2830 declare <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2831 2832 define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) { 2833 ;CHECK-LABEL: test_mask_adds_epu16_rr_256 2834 ;CHECK: vpaddusw %ymm1, %ymm0, %ymm0 2835 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2836 ret <16 x i16> %res 2837 } 2838 2839 define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 2840 ;CHECK-LABEL: test_mask_adds_epu16_rrk_256 2841 ;CHECK: vpaddusw %ymm1, %ymm0, %ymm2 {%k1} 2842 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2843 ret <16 x i16> %res 2844 } 2845 2846 define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 2847 ;CHECK-LABEL: test_mask_adds_epu16_rrkz_256 2848 ;CHECK: vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} 2849 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2850 ret <16 x i16> %res 2851 } 2852 2853 define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 2854 ;CHECK-LABEL: test_mask_adds_epu16_rm_256 2855 ;CHECK: vpaddusw (%rdi), %ymm0, %ymm0 2856 %b = load <16 x i16>, <16 x i16>* %ptr_b 2857 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2858 ret <16 x i16> %res 2859 } 2860 2861 define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 2862 ;CHECK-LABEL: test_mask_adds_epu16_rmk_256 2863 ;CHECK: vpaddusw (%rdi), %ymm0, %ymm1 {%k1} 2864 %b = load <16 x i16>, <16 x i16>* %ptr_b 2865 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2866 ret <16 x i16> %res 2867 } 2868 2869 define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 2870 ;CHECK-LABEL: test_mask_adds_epu16_rmkz_256 2871 ;CHECK: vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z} 2872 %b = load <16 x i16>, <16 x i16>* %ptr_b 2873 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2874 ret <16 x i16> %res 2875 } 2876 2877 declare <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2878 2879 define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) { 2880 ;CHECK-LABEL: test_mask_subs_epu16_rr_128 2881 ;CHECK: vpsubusw %xmm1, %xmm0, %xmm0 2882 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2883 ret <8 x i16> %res 2884 } 2885 2886 define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 2887 ;CHECK-LABEL: test_mask_subs_epu16_rrk_128 2888 ;CHECK: vpsubusw %xmm1, %xmm0, %xmm2 {%k1} 2889 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2890 ret <8 x i16> %res 2891 } 2892 2893 define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 2894 ;CHECK-LABEL: test_mask_subs_epu16_rrkz_128 2895 ;CHECK: vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} 2896 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2897 ret <8 x i16> %res 2898 } 2899 2900 define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 2901 ;CHECK-LABEL: test_mask_subs_epu16_rm_128 2902 ;CHECK: vpsubusw (%rdi), %xmm0, %xmm0 2903 %b = load <8 x i16>, <8 x i16>* %ptr_b 2904 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2905 ret <8 x i16> %res 2906 } 2907 2908 define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 2909 ;CHECK-LABEL: test_mask_subs_epu16_rmk_128 2910 ;CHECK: vpsubusw (%rdi), %xmm0, %xmm1 {%k1} 2911 %b = load <8 x i16>, <8 x i16>* %ptr_b 2912 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2913 ret <8 x i16> %res 2914 } 2915 2916 define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 2917 ;CHECK-LABEL: test_mask_subs_epu16_rmkz_128 2918 ;CHECK: vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z} 2919 %b = load <8 x i16>, <8 x i16>* %ptr_b 2920 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2921 ret <8 x i16> %res 2922 } 2923 2924 declare <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2925 2926 define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) { 2927 ;CHECK-LABEL: test_mask_subs_epu16_rr_256 2928 ;CHECK: vpsubusw %ymm1, %ymm0, %ymm0 2929 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2930 ret <16 x i16> %res 2931 } 2932 2933 define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 2934 ;CHECK-LABEL: test_mask_subs_epu16_rrk_256 2935 ;CHECK: vpsubusw %ymm1, %ymm0, %ymm2 {%k1} 2936 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2937 ret <16 x i16> %res 2938 } 2939 2940 define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 2941 ;CHECK-LABEL: test_mask_subs_epu16_rrkz_256 2942 ;CHECK: vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} 2943 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2944 ret <16 x i16> %res 2945 } 2946 2947 define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 2948 ;CHECK-LABEL: test_mask_subs_epu16_rm_256 2949 ;CHECK: vpsubusw (%rdi), %ymm0, %ymm0 2950 %b = load <16 x i16>, <16 x i16>* %ptr_b 2951 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2952 ret <16 x i16> %res 2953 } 2954 2955 define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 2956 ;CHECK-LABEL: test_mask_subs_epu16_rmk_256 2957 ;CHECK: vpsubusw (%rdi), %ymm0, %ymm1 {%k1} 2958 %b = load <16 x i16>, <16 x i16>* %ptr_b 2959 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2960 ret <16 x i16> %res 2961 } 2962 2963 define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 2964 ;CHECK-LABEL: test_mask_subs_epu16_rmkz_256 2965 ;CHECK: vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z} 2966 %b = load <16 x i16>, <16 x i16>* %ptr_b 2967 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2968 ret <16 x i16> %res 2969 } 2970 2971 declare <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2972 2973 define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) { 2974 ;CHECK-LABEL: test_mask_adds_epi8_rr_128 2975 ;CHECK: vpaddsb %xmm1, %xmm0, %xmm0 2976 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 2977 ret <16 x i8> %res 2978 } 2979 2980 define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 2981 ;CHECK-LABEL: test_mask_adds_epi8_rrk_128 2982 ;CHECK: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} 2983 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 2984 ret <16 x i8> %res 2985 } 2986 2987 define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 2988 ;CHECK-LABEL: test_mask_adds_epi8_rrkz_128 2989 ;CHECK: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} 2990 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 2991 ret <16 x i8> %res 2992 } 2993 2994 define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 2995 ;CHECK-LABEL: test_mask_adds_epi8_rm_128 2996 ;CHECK: vpaddsb (%rdi), %xmm0, %xmm0 2997 %b = load <16 x i8>, <16 x i8>* %ptr_b 2998 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 2999 ret <16 x i8> %res 3000 } 3001 3002 define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 3003 ;CHECK-LABEL: test_mask_adds_epi8_rmk_128 3004 ;CHECK: vpaddsb (%rdi), %xmm0, %xmm1 {%k1} 3005 %b = load <16 x i8>, <16 x i8>* %ptr_b 3006 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 3007 ret <16 x i8> %res 3008 } 3009 3010 define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 3011 ;CHECK-LABEL: test_mask_adds_epi8_rmkz_128 3012 ;CHECK: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} 3013 %b = load <16 x i8>, <16 x i8>* %ptr_b 3014 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 3015 ret <16 x i8> %res 3016 } 3017 3018 declare <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 3019 3020 define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) { 3021 ;CHECK-LABEL: test_mask_adds_epi8_rr_256 3022 ;CHECK: vpaddsb %ymm1, %ymm0, %ymm0 3023 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 3024 ret <32 x i8> %res 3025 } 3026 3027 define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 3028 ;CHECK-LABEL: test_mask_adds_epi8_rrk_256 3029 ;CHECK: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} 3030 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 3031 ret <32 x i8> %res 3032 } 3033 3034 define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 3035 ;CHECK-LABEL: test_mask_adds_epi8_rrkz_256 3036 ;CHECK: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} 3037 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 3038 ret <32 x i8> %res 3039 } 3040 3041 define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { 3042 ;CHECK-LABEL: test_mask_adds_epi8_rm_256 3043 ;CHECK: vpaddsb (%rdi), %ymm0, %ymm0 3044 %b = load <32 x i8>, <32 x i8>* %ptr_b 3045 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 3046 ret <32 x i8> %res 3047 } 3048 3049 define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 3050 ;CHECK-LABEL: test_mask_adds_epi8_rmk_256 3051 ;CHECK: vpaddsb (%rdi), %ymm0, %ymm1 {%k1} 3052 %b = load <32 x i8>, <32 x i8>* %ptr_b 3053 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 3054 ret <32 x i8> %res 3055 } 3056 3057 define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 3058 ;CHECK-LABEL: test_mask_adds_epi8_rmkz_256 3059 ;CHECK: vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} 3060 %b = load <32 x i8>, <32 x i8>* %ptr_b 3061 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 3062 ret <32 x i8> %res 3063 } 3064 3065 declare <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 3066 3067 define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) { 3068 ;CHECK-LABEL: test_mask_subs_epi8_rr_128 3069 ;CHECK: vpsubsb %xmm1, %xmm0, %xmm0 3070 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 3071 ret <16 x i8> %res 3072 } 3073 3074 define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 3075 ;CHECK-LABEL: test_mask_subs_epi8_rrk_128 3076 ;CHECK: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} 3077 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 3078 ret <16 x i8> %res 3079 } 3080 3081 define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 3082 ;CHECK-LABEL: test_mask_subs_epi8_rrkz_128 3083 ;CHECK: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} 3084 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 3085 ret <16 x i8> %res 3086 } 3087 3088 define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 3089 ;CHECK-LABEL: test_mask_subs_epi8_rm_128 3090 ;CHECK: vpsubsb (%rdi), %xmm0, %xmm0 3091 %b = load <16 x i8>, <16 x i8>* %ptr_b 3092 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 3093 ret <16 x i8> %res 3094 } 3095 3096 define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 3097 ;CHECK-LABEL: test_mask_subs_epi8_rmk_128 3098 ;CHECK: vpsubsb (%rdi), %xmm0, %xmm1 {%k1} 3099 %b = load <16 x i8>, <16 x i8>* %ptr_b 3100 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 3101 ret <16 x i8> %res 3102 } 3103 3104 define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 3105 ;CHECK-LABEL: test_mask_subs_epi8_rmkz_128 3106 ;CHECK: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} 3107 %b = load <16 x i8>, <16 x i8>* %ptr_b 3108 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 3109 ret <16 x i8> %res 3110 } 3111 3112 declare <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 3113 3114 define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) { 3115 ;CHECK-LABEL: test_mask_subs_epi8_rr_256 3116 ;CHECK: vpsubsb %ymm1, %ymm0, %ymm0 3117 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 3118 ret <32 x i8> %res 3119 } 3120 3121 define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 3122 ;CHECK-LABEL: test_mask_subs_epi8_rrk_256 3123 ;CHECK: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} 3124 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 3125 ret <32 x i8> %res 3126 } 3127 3128 define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 3129 ;CHECK-LABEL: test_mask_subs_epi8_rrkz_256 3130 ;CHECK: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} 3131 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 3132 ret <32 x i8> %res 3133 } 3134 3135 define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { 3136 ;CHECK-LABEL: test_mask_subs_epi8_rm_256 3137 ;CHECK: vpsubsb (%rdi), %ymm0, %ymm0 3138 %b = load <32 x i8>, <32 x i8>* %ptr_b 3139 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 3140 ret <32 x i8> %res 3141 } 3142 3143 define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 3144 ;CHECK-LABEL: test_mask_subs_epi8_rmk_256 3145 ;CHECK: vpsubsb (%rdi), %ymm0, %ymm1 {%k1} 3146 %b = load <32 x i8>, <32 x i8>* %ptr_b 3147 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 3148 ret <32 x i8> %res 3149 } 3150 3151 define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 3152 ;CHECK-LABEL: test_mask_subs_epi8_rmkz_256 3153 ;CHECK: vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} 3154 %b = load <32 x i8>, <32 x i8>* %ptr_b 3155 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 3156 ret <32 x i8> %res 3157 } 3158 3159 declare <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 3160 3161 define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) { 3162 ;CHECK-LABEL: test_mask_adds_epu8_rr_128 3163 ;CHECK: vpaddusb %xmm1, %xmm0, %xmm0 3164 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 3165 ret <16 x i8> %res 3166 } 3167 3168 define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 3169 ;CHECK-LABEL: test_mask_adds_epu8_rrk_128 3170 ;CHECK: vpaddusb %xmm1, %xmm0, %xmm2 {%k1} 3171 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 3172 ret <16 x i8> %res 3173 } 3174 3175 define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 3176 ;CHECK-LABEL: test_mask_adds_epu8_rrkz_128 3177 ;CHECK: vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} 3178 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 3179 ret <16 x i8> %res 3180 } 3181 3182 define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 3183 ;CHECK-LABEL: test_mask_adds_epu8_rm_128 3184 ;CHECK: vpaddusb (%rdi), %xmm0, %xmm0 3185 %b = load <16 x i8>, <16 x i8>* %ptr_b 3186 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 3187 ret <16 x i8> %res 3188 } 3189 3190 define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 3191 ;CHECK-LABEL: test_mask_adds_epu8_rmk_128 3192 ;CHECK: vpaddusb (%rdi), %xmm0, %xmm1 {%k1} 3193 %b = load <16 x i8>, <16 x i8>* %ptr_b 3194 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 3195 ret <16 x i8> %res 3196 } 3197 3198 define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 3199 ;CHECK-LABEL: test_mask_adds_epu8_rmkz_128 3200 ;CHECK: vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z} 3201 %b = load <16 x i8>, <16 x i8>* %ptr_b 3202 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 3203 ret <16 x i8> %res 3204 } 3205 3206 declare <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 3207 3208 define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) { 3209 ;CHECK-LABEL: test_mask_adds_epu8_rr_256 3210 ;CHECK: vpaddusb %ymm1, %ymm0, %ymm0 3211 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 3212 ret <32 x i8> %res 3213 } 3214 3215 define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 3216 ;CHECK-LABEL: test_mask_adds_epu8_rrk_256 3217 ;CHECK: vpaddusb %ymm1, %ymm0, %ymm2 {%k1} 3218 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 3219 ret <32 x i8> %res 3220 } 3221 3222 define <32 x i8> @test_mask_adds_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 3223 ;CHECK-LABEL: test_mask_adds_epu8_rrkz_256 3224 ;CHECK: vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z} 3225 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 3226 ret <32 x i8> %res 3227 } 3228 3229 define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { 3230 ;CHECK-LABEL: test_mask_adds_epu8_rm_256 3231 ;CHECK: vpaddusb (%rdi), %ymm0, %ymm0 3232 %b = load <32 x i8>, <32 x i8>* %ptr_b 3233 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 3234 ret <32 x i8> %res 3235 } 3236 3237 define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 3238 ;CHECK-LABEL: test_mask_adds_epu8_rmk_256 3239 ;CHECK: vpaddusb (%rdi), %ymm0, %ymm1 {%k1} 3240 %b = load <32 x i8>, <32 x i8>* %ptr_b 3241 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 3242 ret <32 x i8> %res 3243 } 3244 3245 define <32 x i8> @test_mask_adds_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 3246 ;CHECK-LABEL: test_mask_adds_epu8_rmkz_256 3247 ;CHECK: vpaddusb (%rdi), %ymm0, %ymm0 {%k1} {z} 3248 %b = load <32 x i8>, <32 x i8>* %ptr_b 3249 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 3250 ret <32 x i8> %res 3251 } 3252 3253 declare <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 3254 3255 define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) { 3256 ;CHECK-LABEL: test_mask_subs_epu8_rr_128 3257 ;CHECK: vpsubusb %xmm1, %xmm0, %xmm0 3258 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 3259 ret <16 x i8> %res 3260 } 3261 3262 define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 3263 ;CHECK-LABEL: test_mask_subs_epu8_rrk_128 3264 ;CHECK: vpsubusb %xmm1, %xmm0, %xmm2 {%k1} 3265 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 3266 ret <16 x i8> %res 3267 } 3268 3269 define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 3270 ;CHECK-LABEL: test_mask_subs_epu8_rrkz_128 3271 ;CHECK: vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} 3272 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 3273 ret <16 x i8> %res 3274 } 3275 3276 define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 3277 ;CHECK-LABEL: test_mask_subs_epu8_rm_128 3278 ;CHECK: vpsubusb (%rdi), %xmm0, %xmm0 3279 %b = load <16 x i8>, <16 x i8>* %ptr_b 3280 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 3281 ret <16 x i8> %res 3282 } 3283 3284 define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 3285 ;CHECK-LABEL: test_mask_subs_epu8_rmk_128 3286 ;CHECK: vpsubusb (%rdi), %xmm0, %xmm1 {%k1} 3287 %b = load <16 x i8>, <16 x i8>* %ptr_b 3288 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 3289 ret <16 x i8> %res 3290 } 3291 3292 define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 3293 ;CHECK-LABEL: test_mask_subs_epu8_rmkz_128 3294 ;CHECK: vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z} 3295 %b = load <16 x i8>, <16 x i8>* %ptr_b 3296 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 3297 ret <16 x i8> %res 3298 } 3299 3300 declare <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 3301 3302 define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) { 3303 ;CHECK-LABEL: test_mask_subs_epu8_rr_256 3304 ;CHECK: vpsubusb %ymm1, %ymm0, %ymm0 3305 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 3306 ret <32 x i8> %res 3307 } 3308 3309 define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 3310 ;CHECK-LABEL: test_mask_subs_epu8_rrk_256 3311 ;CHECK: vpsubusb %ymm1, %ymm0, %ymm2 {%k1} 3312 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 3313 ret <32 x i8> %res 3314 } 3315 3316 define <32 x i8> @test_mask_subs_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 3317 ;CHECK-LABEL: test_mask_subs_epu8_rrkz_256 3318 ;CHECK: vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z} 3319 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 3320 ret <32 x i8> %res 3321 } 3322 3323 define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { 3324 ;CHECK-LABEL: test_mask_subs_epu8_rm_256 3325 ;CHECK: vpsubusb (%rdi), %ymm0, %ymm0 3326 %b = load <32 x i8>, <32 x i8>* %ptr_b 3327 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 3328 ret <32 x i8> %res 3329 } 3330 3331 define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 3332 ;CHECK-LABEL: test_mask_subs_epu8_rmk_256 3333 ;CHECK: vpsubusb (%rdi), %ymm0, %ymm1 {%k1} 3334 %b = load <32 x i8>, <32 x i8>* %ptr_b 3335 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 3336 ret <32 x i8> %res 3337 } 3338 3339 define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 3340 ;CHECK-LABEL: test_mask_subs_epu8_rmkz_256 3341 ;CHECK: vpsubusb (%rdi), %ymm0, %ymm0 {%k1} {z} 3342 %b = load <32 x i8>, <32 x i8>* %ptr_b 3343 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 3344 ret <32 x i8> %res 3345 } 3346 3347 declare <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 3348 3349 declare <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 3350 3351 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_b_128 3352 ; CHECK-NOT: call 3353 ; CHECK: vpmaxsb %xmm 3354 ; CHECK: {%k1} 3355 define <16 x i8>@test_int_x86_avx512_mask_pmaxs_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { 3356 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2 ,i16 %mask) 3357 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) 3358 %res2 = add <16 x i8> %res, %res1 3359 ret <16 x i8> %res2 3360 } 3361 3362 declare <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 3363 3364 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_b_256 3365 ; CHECK-NOT: call 3366 ; CHECK: vpmaxsb %ymm 3367 ; CHECK: {%k1} 3368 define <32 x i8>@test_int_x86_avx512_mask_pmaxs_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 3369 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 3370 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 3371 %res2 = add <32 x i8> %res, %res1 3372 ret <32 x i8> %res2 3373 } 3374 3375 declare <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 3376 3377 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_w_128 3378 ; CHECK-NOT: call 3379 ; CHECK: vpmaxsw %xmm 3380 ; CHECK: {%k1} 3381 define <8 x i16>@test_int_x86_avx512_mask_pmaxs_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 3382 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 3383 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 3384 %res2 = add <8 x i16> %res, %res1 3385 ret <8 x i16> %res2 3386 } 3387 3388 declare <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 3389 3390 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_w_256 3391 ; CHECK-NOT: call 3392 ; CHECK: vpmaxsw %ymm 3393 ; CHECK: {%k1} 3394 define <16 x i16>@test_int_x86_avx512_mask_pmaxs_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { 3395 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) 3396 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) 3397 %res2 = add <16 x i16> %res, %res1 3398 ret <16 x i16> %res2 3399 } 3400 3401 declare <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 3402 3403 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_b_128 3404 ; CHECK-NOT: call 3405 ; CHECK: vpmaxub %xmm 3406 ; CHECK: {%k1} 3407 define <16 x i8>@test_int_x86_avx512_mask_pmaxu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2,i16 %mask) { 3408 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) 3409 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) 3410 %res2 = add <16 x i8> %res, %res1 3411 ret <16 x i8> %res2 3412 } 3413 3414 declare <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 3415 3416 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_b_256 3417 ; CHECK-NOT: call 3418 ; CHECK: vpmaxub %ymm 3419 ; CHECK: {%k1} 3420 define <32 x i8>@test_int_x86_avx512_mask_pmaxu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 3421 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 3422 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 3423 %res2 = add <32 x i8> %res, %res1 3424 ret <32 x i8> %res2 3425 } 3426 3427 declare <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 3428 3429 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_w_128 3430 ; CHECK-NOT: call 3431 ; CHECK: vpmaxuw %xmm 3432 ; CHECK: {%k1} 3433 define <8 x i16>@test_int_x86_avx512_mask_pmaxu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 3434 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 3435 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 3436 %res2 = add <8 x i16> %res, %res1 3437 ret <8 x i16> %res2 3438 } 3439 3440 declare <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 3441 3442 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_w_256 3443 ; CHECK-NOT: call 3444 ; CHECK: vpmaxuw %ymm 3445 ; CHECK: {%k1} 3446 define <16 x i16>@test_int_x86_avx512_mask_pmaxu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { 3447 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) 3448 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) 3449 %res2 = add <16 x i16> %res, %res1 3450 ret <16 x i16> %res2 3451 } 3452 3453 declare <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 3454 3455 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_b_128 3456 ; CHECK-NOT: call 3457 ; CHECK: vpminsb %xmm 3458 ; CHECK: {%k1} 3459 define <16 x i8>@test_int_x86_avx512_mask_pmins_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { 3460 %res = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) 3461 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) 3462 %res2 = add <16 x i8> %res, %res1 3463 ret <16 x i8> %res2 3464 } 3465 3466 declare <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 3467 3468 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_b_256 3469 ; CHECK-NOT: call 3470 ; CHECK: vpminsb %ymm 3471 ; CHECK: {%k1} 3472 define <32 x i8>@test_int_x86_avx512_mask_pmins_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 3473 %res = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 3474 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 3475 %res2 = add <32 x i8> %res, %res1 3476 ret <32 x i8> %res2 3477 } 3478 3479 declare <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 3480 3481 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_w_128 3482 ; CHECK-NOT: call 3483 ; CHECK: vpminsw %xmm 3484 ; CHECK: {%k1} 3485 define <8 x i16>@test_int_x86_avx512_mask_pmins_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 3486 %res = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 3487 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 3488 %res2 = add <8 x i16> %res, %res1 3489 ret <8 x i16> %res2 3490 } 3491 3492 declare <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 3493 3494 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_w_256 3495 ; CHECK-NOT: call 3496 ; CHECK: vpminsw %ymm 3497 ; CHECK: {%k1} 3498 define <16 x i16>@test_int_x86_avx512_mask_pmins_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { 3499 %res = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) 3500 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) 3501 %res2 = add <16 x i16> %res, %res1 3502 ret <16 x i16> %res2 3503 } 3504 3505 declare <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 3506 3507 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_b_128 3508 ; CHECK-NOT: call 3509 ; CHECK: vpminub %xmm 3510 ; CHECK: {%k1} 3511 define <16 x i8>@test_int_x86_avx512_mask_pminu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { 3512 %res = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) 3513 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) 3514 %res2 = add <16 x i8> %res, %res1 3515 ret <16 x i8> %res2 3516 } 3517 3518 declare <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 3519 3520 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_b_256 3521 ; CHECK-NOT: call 3522 ; CHECK: vpminub %ymm 3523 ; CHECK: {%k1} 3524 define <32 x i8>@test_int_x86_avx512_mask_pminu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 3525 %res = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 3526 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 3527 %res2 = add <32 x i8> %res, %res1 3528 ret <32 x i8> %res2 3529 } 3530 3531 declare <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 3532 3533 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_w_128 3534 ; CHECK-NOT: call 3535 ; CHECK: vpminuw %xmm 3536 ; CHECK: {%k1} 3537 define <8 x i16>@test_int_x86_avx512_mask_pminu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 3538 %res = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 3539 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 3540 %res2 = add <8 x i16> %res, %res1 3541 ret <8 x i16> %res2 3542 } 3543 3544 declare <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 3545 3546 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_w_256 3547 ; CHECK-NOT: call 3548 ; CHECK: vpminuw %ymm 3549 ; CHECK: {%k1} 3550 define <16 x i16>@test_int_x86_avx512_mask_pminu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { 3551 %res = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) 3552 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) 3553 %res2 = add <16 x i16> %res, %res1 3554 ret <16 x i16> %res2 3555 } 3556 3557 declare <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 3558 3559 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_128 3560 ; CHECK-NOT: call 3561 ; CHECK: kmov 3562 ; CHECK: vpermt2w %xmm{{.*}}{%k1} 3563 ; CHECK-NOT: {z} 3564 define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 3565 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 3566 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 3567 %res2 = add <8 x i16> %res, %res1 3568 ret <8 x i16> %res2 3569 } 3570 3571 declare <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 3572 3573 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_128 3574 ; CHECK-NOT: call 3575 ; CHECK: kmov 3576 ; CHECK: vpermt2w %xmm{{.*}}{%k1} {z} 3577 define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 3578 %res = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 3579 %res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 3580 %res2 = add <8 x i16> %res, %res1 3581 ret <8 x i16> %res2 3582 } 3583 3584 declare <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 3585 3586 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_256 3587 ; CHECK-NOT: call 3588 ; CHECK: kmov 3589 ; CHECK: vpermt2w %ymm{{.*}}{%k1} 3590 define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 3591 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 3592 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 3593 %res2 = add <16 x i16> %res, %res1 3594 ret <16 x i16> %res2 3595 } 3596 3597 declare <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 3598 3599 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_256 3600 ; CHECK-NOT: call 3601 ; CHECK: kmov 3602 ; CHECK: vpermt2w %ymm{{.*}}{%k1} {z} 3603 define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 3604 %res = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 3605 %res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 3606 %res2 = add <16 x i16> %res, %res1 3607 ret <16 x i16> %res2 3608 } 3609 3610 declare <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 3611 3612 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_128 3613 ; CHECK-NOT: call 3614 ; CHECK: kmov 3615 ; CHECK: vpermi2w %xmm{{.*}}{%k1} 3616 define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 3617 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 3618 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 3619 %res2 = add <8 x i16> %res, %res1 3620 ret <8 x i16> %res2 3621 } 3622 3623 declare <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 3624 3625 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_256 3626 ; CHECK-NOT: call 3627 ; CHECK: kmov 3628 ; CHECK: vpermi2w %ymm{{.*}}{%k1} 3629 define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 3630 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 3631 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 3632 %res2 = add <16 x i16> %res, %res1 3633 ret <16 x i16> %res2 3634 } 3635 3636 declare <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 3637 3638 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_128 3639 ; CHECK-NOT: call 3640 ; CHECK: vpavgb %xmm 3641 ; CHECK: {%k1} 3642 define <16 x i8>@test_int_x86_avx512_mask_pavg_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 3643 %res = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 3644 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 3645 %res2 = add <16 x i8> %res, %res1 3646 ret <16 x i8> %res2 3647 } 3648 3649 declare <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 3650 3651 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_256 3652 ; CHECK-NOT: call 3653 ; CHECK: vpavgb %ymm 3654 ; CHECK: {%k1} 3655 define <32 x i8>@test_int_x86_avx512_mask_pavg_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 3656 %res = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 3657 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 3658 %res2 = add <32 x i8> %res, %res1 3659 ret <32 x i8> %res2 3660 } 3661 3662 declare <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 3663 3664 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_w_128 3665 ; CHECK-NOT: call 3666 ; CHECK: vpavgw %xmm 3667 ; CHECK: {%k1} 3668 define <8 x i16>@test_int_x86_avx512_mask_pavg_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 3669 %res = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 3670 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 3671 %res2 = add <8 x i16> %res, %res1 3672 ret <8 x i16> %res2 3673 } 3674 3675 declare <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 3676 3677 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_w_256 3678 ; CHECK-NOT: call 3679 ; CHECK: vpavgw %ymm 3680 ; CHECK: {%k1} 3681 define <16 x i16>@test_int_x86_avx512_mask_pavg_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 3682 %res = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 3683 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 3684 %res2 = add <16 x i16> %res, %res1 3685 ret <16 x i16> %res2 3686 } 3687 3688 declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 3689 3690 ; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_128 3691 ; CHECK-NOT: call 3692 ; CHECK: kmov 3693 ; CHECK: vpshufb %xmm{{.*}}{%k1} 3694 define <16 x i8>@test_int_x86_avx512_mask_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 3695 %res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 3696 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 3697 %res2 = add <16 x i8> %res, %res1 3698 ret <16 x i8> %res2 3699 } 3700 3701 declare <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 3702 3703 ; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_256 3704 ; CHECK-NOT: call 3705 ; CHECK: kmov 3706 ; CHECK: vpshufb %ymm{{.*}}{%k1} 3707 define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 3708 %res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 3709 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 3710 %res2 = add <32 x i8> %res, %res1 3711 ret <32 x i8> %res2 3712 } 3713 3714 declare <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8>, <16 x i8>, i16) 3715 3716 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_128 3717 ; CHECK-NOT: call 3718 ; CHECK: kmov 3719 ; CHECK: vpabsb{{.*}}{%k1} 3720 define <16 x i8>@test_int_x86_avx512_mask_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { 3721 %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) 3722 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1) 3723 %res2 = add <16 x i8> %res, %res1 3724 ret <16 x i8> %res2 3725 } 3726 3727 declare <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8>, <32 x i8>, i32) 3728 3729 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_256 3730 ; CHECK-NOT: call 3731 ; CHECK: kmov 3732 ; CHECK: vpabsb{{.*}}{%k1} 3733 define <32 x i8>@test_int_x86_avx512_mask_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) { 3734 %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) 3735 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 -1) 3736 %res2 = add <32 x i8> %res, %res1 3737 ret <32 x i8> %res2 3738 } 3739 3740 declare <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16>, <8 x i16>, i8) 3741 3742 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_128 3743 ; CHECK-NOT: call 3744 ; CHECK: kmov 3745 ; CHECK: vpabsw{{.*}}{%k1} 3746 define <8 x i16>@test_int_x86_avx512_mask_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { 3747 %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) 3748 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1) 3749 %res2 = add <8 x i16> %res, %res1 3750 ret <8 x i16> %res2 3751 } 3752 3753 declare <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16>, <16 x i16>, i16) 3754 3755 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_256 3756 ; CHECK-NOT: call 3757 ; CHECK: kmov 3758 ; CHECK: vpabsw{{.*}}{%k1} 3759 define <16 x i16>@test_int_x86_avx512_mask_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { 3760 %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) 3761 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 -1) 3762 %res2 = add <16 x i16> %res, %res1 3763 ret <16 x i16> %res2 3764 } 3765 3766 ; CHECK-LABEL: test_x86_mask_blend_b_256 3767 ; CHECK: vpblendmb 3768 define <32 x i8> @test_x86_mask_blend_b_256(i32 %a0, <32 x i8> %a1, <32 x i8> %a2) { 3769 %res = call <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8> %a1, <32 x i8> %a2, i32 %a0) ; <<32 x i8>> [#uses=1] 3770 ret <32 x i8> %res 3771 } 3772 declare <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8>, <32 x i8>, i32) nounwind readonly 3773 3774 ; CHECK-LABEL: test_x86_mask_blend_w_256 3775 define <16 x i16> @test_x86_mask_blend_w_256(i16 %mask, <16 x i16> %a1, <16 x i16> %a2) { 3776 ; CHECK: vpblendmw 3777 %res = call <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16> %a1, <16 x i16> %a2, i16 %mask) ; <<16 x i16>> [#uses=1] 3778 ret <16 x i16> %res 3779 } 3780 declare <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16>, <16 x i16>, i16) nounwind readonly 3781 3782 ; CHECK-LABEL: test_x86_mask_blend_b_128 3783 ; CHECK: vpblendmb 3784 define <16 x i8> @test_x86_mask_blend_b_128(i16 %a0, <16 x i8> %a1, <16 x i8> %a2) { 3785 %res = call <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8> %a1, <16 x i8> %a2, i16 %a0) ; <<16 x i8>> [#uses=1] 3786 ret <16 x i8> %res 3787 } 3788 declare <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8>, <16 x i8>, i16) nounwind readonly 3789 3790 ; CHECK-LABEL: test_x86_mask_blend_w_128 3791 define <8 x i16> @test_x86_mask_blend_w_128(i8 %mask, <8 x i16> %a1, <8 x i16> %a2) { 3792 ; CHECK: vpblendmw 3793 %res = call <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16> %a1, <8 x i16> %a2, i8 %mask) ; <<8 x i16>> [#uses=1] 3794 ret <8 x i16> %res 3795 } 3796 declare <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16>, <8 x i16>, i8) nounwind readonly 3797 3798 declare <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 3799 3800 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhu_w_128 3801 ; CHECK-NOT: call 3802 ; CHECK: kmov 3803 ; CHECK: {%k1} 3804 ; CHECK: vpmulhuw {{.*}}encoding: [0x62 3805 define <8 x i16>@test_int_x86_avx512_mask_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 3806 %res = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 3807 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 3808 %res2 = add <8 x i16> %res, %res1 3809 ret <8 x i16> %res2 3810 } 3811 3812 declare <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 3813 3814 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhu_w_256 3815 ; CHECK-NOT: call 3816 ; CHECK: kmov 3817 ; CHECK: {%k1} 3818 ; CHECK: vpmulhuw {{.*}}encoding: [0x62 3819 define <16 x i16>@test_int_x86_avx512_mask_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 3820 %res = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 3821 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 3822 %res2 = add <16 x i16> %res, %res1 3823 ret <16 x i16> %res2 3824 } 3825 3826 declare <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 3827 3828 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulh_w_128 3829 ; CHECK-NOT: call 3830 ; CHECK: kmov 3831 ; CHECK: {%k1} 3832 ; CHECK: vpmulhw {{.*}}encoding: [0x62 3833 define <8 x i16>@test_int_x86_avx512_mask_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 3834 %res = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 3835 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 3836 %res2 = add <8 x i16> %res, %res1 3837 ret <8 x i16> %res2 3838 } 3839 3840 declare <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 3841 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulh_w_256 3842 ; CHECK-NOT: call 3843 ; CHECK: kmov 3844 ; CHECK: {%k1} 3845 ; CHECK: vpmulhw {{.*}}encoding: [0x62 3846 define <16 x i16>@test_int_x86_avx512_mask_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 3847 %res = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 3848 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 3849 %res2 = add <16 x i16> %res, %res1 3850 ret <16 x i16> %res2 3851 } 3852 3853 declare <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 3854 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhr_sw_128 3855 ; CHECK-NOT: call 3856 ; CHECK: kmov 3857 ; CHECK: {%k1} 3858 ; CHECK: vpmulhrsw {{.*}}encoding: [0x62 3859 define <8 x i16>@test_int_x86_avx512_mask_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 3860 %res = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 3861 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 3862 %res2 = add <8 x i16> %res, %res1 3863 ret <8 x i16> %res2 3864 } 3865 3866 declare <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 3867 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhr_sw_256 3868 ; CHECK-NOT: call 3869 ; CHECK: kmov 3870 ; CHECK: {%k1} 3871 ; CHECK: vpmulhrsw {{.*}}encoding: [0x62 3872 define <16 x i16>@test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 3873 %res = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 3874 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 3875 %res2 = add <16 x i16> %res, %res1 3876 ret <16 x i16> %res2 3877 } 3878 3879 declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16>, <16 x i8>, i8) 3880 3881 define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { 3882 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_128: 3883 ; CHECK: vpmovwb %xmm0, %xmm1 {%k1} 3884 ; CHECK-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z} 3885 ; CHECK-NEXT: vpmovwb %xmm0, %xmm0 3886 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) 3887 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) 3888 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) 3889 %res3 = add <16 x i8> %res0, %res1 3890 %res4 = add <16 x i8> %res3, %res2 3891 ret <16 x i8> %res4 3892 } 3893 3894 declare void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16>, i8) 3895 3896 define void @test_int_x86_avx512_mask_pmov_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { 3897 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128: 3898 ; CHECK: vpmovwb %xmm0, (%rdi) 3899 ; CHECK: vpmovwb %xmm0, (%rdi) {%k1} 3900 call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) 3901 call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2) 3902 ret void 3903 } 3904 3905 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16>, <16 x i8>, i8) 3906 3907 define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { 3908 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_128: 3909 ; CHECK: vpmovswb %xmm0, %xmm1 {%k1} 3910 ; CHECK-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z} 3911 ; CHECK-NEXT: vpmovswb %xmm0, %xmm0 3912 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) 3913 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) 3914 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) 3915 %res3 = add <16 x i8> %res0, %res1 3916 %res4 = add <16 x i8> %res3, %res2 3917 ret <16 x i8> %res4 3918 } 3919 3920 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16>, i8) 3921 3922 define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { 3923 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128: 3924 ; CHECK: vpmovswb %xmm0, (%rdi) 3925 ; CHECK: vpmovswb %xmm0, (%rdi) {%k1} 3926 call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) 3927 call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2) 3928 ret void 3929 } 3930 3931 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16>, <16 x i8>, i8) 3932 3933 define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { 3934 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_128: 3935 ; CHECK: vpmovuswb %xmm0, %xmm1 {%k1} 3936 ; CHECK-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z} 3937 ; CHECK-NEXT: vpmovuswb %xmm0, %xmm0 3938 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) 3939 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) 3940 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) 3941 %res3 = add <16 x i8> %res0, %res1 3942 %res4 = add <16 x i8> %res3, %res2 3943 ret <16 x i8> %res4 3944 } 3945 3946 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16>, i8) 3947 3948 define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { 3949 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128: 3950 ; CHECK: vpmovuswb %xmm0, (%rdi) 3951 ; CHECK: vpmovuswb %xmm0, (%rdi) {%k1} 3952 call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) 3953 call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2) 3954 ret void 3955 } 3956 3957 declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16>, <16 x i8>, i16) 3958 3959 define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { 3960 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_256: 3961 ; CHECK: vpmovwb %ymm0, %xmm1 {%k1} 3962 ; CHECK-NEXT: vpmovwb %ymm0, %xmm2 {%k1} {z} 3963 ; CHECK-NEXT: vpmovwb %ymm0, %xmm0 3964 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) 3965 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) 3966 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) 3967 %res3 = add <16 x i8> %res0, %res1 3968 %res4 = add <16 x i8> %res3, %res2 3969 ret <16 x i8> %res4 3970 } 3971 3972 declare void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16>, i16) 3973 3974 define void @test_int_x86_avx512_mask_pmov_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { 3975 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256: 3976 ; CHECK: vpmovwb %ymm0, (%rdi) 3977 ; CHECK: vpmovwb %ymm0, (%rdi) {%k1} 3978 call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) 3979 call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2) 3980 ret void 3981 } 3982 3983 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16>, <16 x i8>, i16) 3984 3985 define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { 3986 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_256: 3987 ; CHECK: vpmovswb %ymm0, %xmm1 {%k1} 3988 ; CHECK-NEXT: vpmovswb %ymm0, %xmm2 {%k1} {z} 3989 ; CHECK-NEXT: vpmovswb %ymm0, %xmm0 3990 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) 3991 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) 3992 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) 3993 %res3 = add <16 x i8> %res0, %res1 3994 %res4 = add <16 x i8> %res3, %res2 3995 ret <16 x i8> %res4 3996 } 3997 3998 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16>, i16) 3999 4000 define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { 4001 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256: 4002 ; CHECK: vpmovswb %ymm0, (%rdi) 4003 ; CHECK: vpmovswb %ymm0, (%rdi) {%k1} 4004 call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) 4005 call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2) 4006 ret void 4007 } 4008 4009 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16>, <16 x i8>, i16) 4010 4011 define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { 4012 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_256: 4013 ; CHECK: vpmovuswb %ymm0, %xmm1 {%k1} 4014 ; CHECK-NEXT: vpmovuswb %ymm0, %xmm2 {%k1} {z} 4015 ; CHECK-NEXT: vpmovuswb %ymm0, %xmm0 4016 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) 4017 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) 4018 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) 4019 %res3 = add <16 x i8> %res0, %res1 4020 %res4 = add <16 x i8> %res3, %res2 4021 ret <16 x i8> %res4 4022 } 4023 4024 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16>, i16) 4025 4026 define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { 4027 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256: 4028 ; CHECK: vpmovuswb %ymm0, (%rdi) 4029 ; CHECK: vpmovuswb %ymm0, (%rdi) {%k1} 4030 call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) 4031 call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2) 4032 ret void 4033 } 4034 4035 declare <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16>, <8 x i16>, <4 x i32>, i8) 4036 4037 define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) { 4038 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_128: 4039 ; CHECK: ## BB#0: 4040 ; CHECK-NEXT: movzbl %dil, %eax 4041 ; CHECK-NEXT: kmovw %eax, %k1 4042 ; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} 4043 ; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 4044 ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 4045 ; CHECK-NEXT: retq 4046 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) 4047 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 -1) 4048 %res2 = add <4 x i32> %res, %res1 4049 ret <4 x i32> %res2 4050 } 4051 4052 declare <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16>, <16 x i16>, <8 x i32>, i8) 4053 4054 define <8 x i32>@test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) { 4055 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_256: 4056 ; CHECK: ## BB#0: 4057 ; CHECK-NEXT: movzbl %dil, %eax 4058 ; CHECK-NEXT: kmovw %eax, %k1 4059 ; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} 4060 ; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 4061 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 4062 ; CHECK-NEXT: retq 4063 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) 4064 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 -1) 4065 %res2 = add <8 x i32> %res, %res1 4066 ret <8 x i32> %res2 4067 } 4068 4069 declare <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8>, <16 x i8>, <8 x i16>, i8) 4070 4071 define <8 x i16>@test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) { 4072 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128: 4073 ; CHECK: ## BB#0: 4074 ; CHECK-NEXT: movzbl %dil, %eax 4075 ; CHECK-NEXT: kmovw %eax, %k1 4076 ; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} 4077 ; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 4078 ; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 4079 ; CHECK-NEXT: retq 4080 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) 4081 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 -1) 4082 %res2 = add <8 x i16> %res, %res1 4083 ret <8 x i16> %res2 4084 } 4085 4086 declare <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8>, <32 x i8>, <16 x i16>, i16) 4087 4088 define <16 x i16>@test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) { 4089 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256: 4090 ; CHECK: ## BB#0: 4091 ; CHECK-NEXT: kmovw %edi, %k1 4092 ; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} 4093 ; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 4094 ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 4095 ; CHECK-NEXT: retq 4096 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) 4097 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 -1) 4098 %res2 = add <16 x i16> %res, %res1 4099 ret <16 x i16> %res2 4100 } 4101 4102 declare <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 4103 4104 define <16 x i8>@test_int_x86_avx512_mask_punpckhb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 4105 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_128: 4106 ; CHECK: vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1} 4107 ; CHECK-NEXT: ## xmm2 = xmm2[8],k1[8],xmm2[9],k1[9],xmm2[10],k1[10],xmm2[11],k1[11],xmm2[12],k1[12],xmm2[13],k1[13],xmm2[14],k1[14],xmm2[15],k1[15] 4108 ; CHECK-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x68,0xc1] 4109 ; CHECK-NEXT: ## xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 4110 %res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 4111 %res1 = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 4112 %res2 = add <16 x i8> %res, %res1 4113 ret <16 x i8> %res2 4114 } 4115 4116 declare <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 4117 4118 define <16 x i8>@test_int_x86_avx512_mask_punpcklb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 4119 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_128: 4120 ; CHECK: vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1} 4121 ; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1],xmm2[2],k1[2],xmm2[3],k1[3],xmm2[4],k1[4],xmm2[5],k1[5],xmm2[6],k1[6],xmm2[7],k1[7] 4122 ; CHECK-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x60,0xc1] 4123 ; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 4124 %res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 4125 %res1 = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 4126 %res2 = add <16 x i8> %res, %res1 4127 ret <16 x i8> %res2 4128 } 4129 4130 declare <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 4131 4132 define <32 x i8>@test_int_x86_avx512_mask_punpckhb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 4133 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_256: 4134 ; CHECK: vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1} 4135 ; CHECK-NEXT: ## ymm2 = ymm2[8],k1[8],ymm2[9],k1[9],ymm2[10],k1[10],ymm2[11],k1[11],ymm2[12],k1[12],ymm2[13],k1[13],ymm2[14],k1[14],ymm2[15],k1[15],ymm2[24],k1[24],ymm2[25],k1[25],ymm2[26],k1[26],ymm2[27],k1[27],ymm2[28],k1[28],ymm2[29],k1[29],ymm2[30],k1[30],ymm2[31],k1[31] 4136 ; CHECK-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x68,0xc1] 4137 ; CHECK-NEXT: ## ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 4138 %res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 4139 %res1 = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 4140 %res2 = add <32 x i8> %res, %res1 4141 ret <32 x i8> %res2 4142 } 4143 4144 declare <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 4145 4146 define <32 x i8>@test_int_x86_avx512_mask_punpcklb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 4147 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_256: 4148 ; CHECK: vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1} 4149 ; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[2],k1[2],ymm2[3],k1[3],ymm2[4],k1[4],ymm2[5],k1[5],ymm2[6],k1[6],ymm2[7],k1[7],ymm2[16],k1[16],ymm2[17],k1[17],ymm2[18],k1[18],ymm2[19],k1[19],ymm2[20],k1[20],ymm2[21],k1[21],ymm2[22],k1[22],ymm2[23],k1[23] 4150 ; CHECK-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x60,0xc1] 4151 ; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 4152 %res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 4153 %res1 = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 4154 %res2 = add <32 x i8> %res, %res1 4155 ret <32 x i8> %res2 4156 } 4157 4158 declare <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 4159 4160 define <8 x i16>@test_int_x86_avx512_mask_punpcklw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 4161 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_128: 4162 ; CHECK: vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1} 4163 ; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1],xmm2[2],k1[2],xmm2[3],k1[3] 4164 ; CHECK-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x61,0xc1] 4165 ; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4166 %res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 4167 %res1 = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 4168 %res2 = add <8 x i16> %res, %res1 4169 ret <8 x i16> %res2 4170 } 4171 4172 declare <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 4173 4174 define <8 x i16>@test_int_x86_avx512_mask_punpckhw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 4175 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_128: 4176 ; CHECK: vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1} 4177 ; CHECK-NEXT: ## xmm2 = xmm2[4],k1[4],xmm2[5],k1[5],xmm2[6],k1[6],xmm2[7],k1[7] 4178 ; CHECK-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x69,0xc1] 4179 ; CHECK-NEXT: ## xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 4180 %res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 4181 %res1 = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 4182 %res2 = add <8 x i16> %res, %res1 4183 ret <8 x i16> %res2 4184 } 4185 4186 declare <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 4187 4188 define <16 x i16>@test_int_x86_avx512_mask_punpcklw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 4189 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_256: 4190 ; CHECK: vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1} 4191 ; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[2],k1[2],ymm2[3],k1[3],ymm2[8],k1[8],ymm2[9],k1[9],ymm2[10],k1[10],ymm2[11],k1[11] 4192 ; CHECK-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x61,0xc1] 4193 ; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] 4194 %res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 4195 %res1 = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 4196 %res2 = add <16 x i16> %res, %res1 4197 ret <16 x i16> %res2 4198 } 4199 4200 declare <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 4201 4202 define <16 x i16>@test_int_x86_avx512_mask_punpckhw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 4203 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_256: 4204 ; CHECK: vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1} 4205 ; CHECK-NEXT: ## ymm2 = ymm2[4],k1[4],ymm2[5],k1[5],ymm2[6],k1[6],ymm2[7],k1[7],ymm2[12],k1[12],ymm2[13],k1[13],ymm2[14],k1[14],ymm2[15],k1[15] 4206 ; CHECK-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x69,0xc1] 4207 ; CHECK-NEXT: ## ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] 4208 %res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 4209 %res1 = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 4210 %res2 = add <16 x i16> %res, %res1 4211 ret <16 x i16> %res2 4212 } 4213 4214 declare <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8>, <16 x i8>, i32, <16 x i8>, i16) 4215 4216 define <16 x i8>@test_int_x86_avx512_mask_palignr_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x3, i16 %x4) { 4217 ; CHECK-LABEL: test_int_x86_avx512_mask_palignr_128: 4218 ; CHECK: ## BB#0: 4219 ; CHECK-NEXT: kmovw %edi, %k1 4220 ; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1} 4221 ; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm3 {%k1} {z} 4222 ; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm0 4223 ; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm1 4224 ; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 4225 ; CHECK-NEXT: retq 4226 %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 %x4) 4227 %res1 = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> zeroinitializer, i16 %x4) 4228 %res2 = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 -1) 4229 %res3 = add <16 x i8> %res, %res1 4230 %res4 = add <16 x i8> %res3, %res2 4231 ret <16 x i8> %res4 4232 } 4233 4234 declare <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8>, <32 x i8>, i32, <32 x i8>, i32) 4235 4236 define <32 x i8>@test_int_x86_avx512_mask_palignr_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x3, i32 %x4) { 4237 ; CHECK-LABEL: test_int_x86_avx512_mask_palignr_256: 4238 ; CHECK: ## BB#0: 4239 ; CHECK-NEXT: kmovd %edi, %k1 4240 ; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1} 4241 ; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm3 {%k1} {z} 4242 ; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm0 4243 ; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm1 4244 ; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 4245 ; CHECK-NEXT: retq 4246 %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 %x4) 4247 %res1 = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> zeroinitializer, i32 %x4) 4248 %res2 = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 -1) 4249 %res3 = add <32 x i8> %res, %res1 4250 %res4 = add <32 x i8> %res3, %res2 4251 ret <32 x i8> %res4 4252 } 4253 4254 declare <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8>, <16 x i8>, i32, <8 x i16>, i8) 4255 4256 define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) { 4257 ; CHECK-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: 4258 ; CHECK: ## BB#0: 4259 ; CHECK-NEXT: movzbl %dil, %eax 4260 ; CHECK-NEXT: kmovw %eax, %k1 4261 ; CHECK-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} 4262 ; CHECK-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm3 {%k1} {z} 4263 ; CHECK-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm0 4264 ; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 4265 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 4266 ; CHECK-NEXT: retq 4267 %res = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4) 4268 %res1 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> zeroinitializer, i8 %x4) 4269 %res2 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 -1) 4270 %res3 = add <8 x i16> %res, %res1 4271 %res4 = add <8 x i16> %res2, %res3 4272 ret <8 x i16> %res4 4273 } 4274 4275 declare <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8>, <32 x i8>, i32, <16 x i16>, i16) 4276 4277 define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) { 4278 ; CHECK-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: 4279 ; CHECK: ## BB#0: 4280 ; CHECK-NEXT: kmovw %edi, %k1 4281 ; CHECK-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} 4282 ; CHECK-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm3 {%k1} {z} 4283 ; CHECK-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm0 4284 ; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 4285 ; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 4286 ; CHECK-NEXT: retq 4287 %res = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 %x4) 4288 %res1 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> zeroinitializer, i16 %x4) 4289 %res2 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 -1) 4290 %res3 = add <16 x i16> %res, %res1 4291 %res4 = add <16 x i16> %res3, %res2 4292 ret <16 x i16> %res4 4293 } 4294 4295 declare <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8>, <32 x i8>, i32) 4296 4297 define <32 x i8>@test_int_x86_avx512_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) { 4298 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_256: 4299 ; CHECK: ## BB#0: 4300 ; CHECK-NEXT: kmovd %edi, %k1 4301 ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm1 {%k1} 4302 ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm2 {%k1} {z} 4303 ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0 4304 ; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0 4305 ; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 4306 ; CHECK-NEXT: retq 4307 %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 -1) 4308 %res1 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) 4309 %res2 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> zeroinitializer, i32 %mask) 4310 %res3 = add <32 x i8> %res, %res1 4311 %res4 = add <32 x i8> %res2, %res3 4312 ret <32 x i8> %res4 4313 } 4314 4315 declare <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8>, <16 x i8>, i16) 4316 4317 define <16 x i8>@test_int_x86_avx512_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) { 4318 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_128: 4319 ; CHECK: ## BB#0: 4320 ; CHECK-NEXT: kmovw %edi, %k1 4321 ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm1 {%k1} 4322 ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm2 {%k1} {z} 4323 ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 4324 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 4325 ; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 4326 ; CHECK-NEXT: retq 4327 %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1) 4328 %res1 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) 4329 %res2 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> zeroinitializer, i16 %mask) 4330 %res3 = add <16 x i8> %res, %res1 4331 %res4 = add <16 x i8> %res2, %res3 4332 ret <16 x i8> %res4 4333 } 4334 4335 declare <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16>, <16 x i16>, i16) 4336 4337 define <16 x i16>@test_int_x86_avx512_pbroadcastw_256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) { 4338 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_256: 4339 ; CHECK: ## BB#0: 4340 ; CHECK-NEXT: kmovw %edi, %k1 4341 ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm1 {%k1} 4342 ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm2 {%k1} {z} 4343 ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0 4344 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 4345 ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 4346 ; CHECK-NEXT: retq 4347 %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 -1) 4348 %res1 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) 4349 %res2 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> zeroinitializer, i16 %mask) 4350 %res3 = add <16 x i16> %res, %res1 4351 %res4 = add <16 x i16> %res2, %res3 4352 ret <16 x i16> %res4 4353 } 4354 4355 declare <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16>, <8 x i16>, i8) 4356 4357 define <8 x i16>@test_int_x86_avx512_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) { 4358 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_128: 4359 ; CHECK: ## BB#0: 4360 ; CHECK-NEXT: movzbl %dil, %eax 4361 ; CHECK-NEXT: kmovw %eax, %k1 4362 ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm1 {%k1} 4363 ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm2 {%k1} {z} 4364 ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0 4365 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 4366 ; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 4367 ; CHECK-NEXT: retq 4368 %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1) 4369 %res1 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) 4370 %res2 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> zeroinitializer, i8 %mask) 4371 %res3 = add <8 x i16> %res, %res1 4372 %res4 = add <8 x i16> %res2, %res3 4373 ret <8 x i16> %res4 4374 } 4375 4376 declare <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8>, <64 x i8>, i64) 4377 4378 define <64 x i8>@test_int_x86_avx512_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask) { 4379 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_512: 4380 ; CHECK: ## BB#0: 4381 ; CHECK-NEXT: kmovq %rdi, %k1 ## encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 4382 ; CHECK-NEXT: vpbroadcastb %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8] 4383 ; CHECK-NEXT: vpbroadcastb %xmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xd0] 4384 ; CHECK-NEXT: vpbroadcastb %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x78,0xc0] 4385 ; CHECK-NEXT: vpaddb %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1] 4386 ; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0] 4387 ; CHECK-NEXT: retq ## encoding: [0xc3] 4388 %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 -1) 4389 %res1 = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask) 4390 %res2 = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> zeroinitializer, i64 %mask) 4391 %res3 = add <64 x i8> %res, %res1 4392 %res4 = add <64 x i8> %res2, %res3 4393 ret <64 x i8> %res4 4394 } 4395 4396 declare <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16>, <32 x i16>, i32) 4397 4398 define <32 x i16>@test_int_x86_avx512_pbroadcastw_512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask) { 4399 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_512: 4400 ; CHECK: ## BB#0: 4401 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 4402 ; CHECK-NEXT: vpbroadcastw %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8] 4403 ; CHECK-NEXT: vpbroadcastw %xmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xd0] 4404 ; CHECK-NEXT: vpbroadcastw %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x79,0xc0] 4405 ; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1] 4406 ; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 4407 ; CHECK-NEXT: retq ## encoding: [0xc3] 4408 %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 -1) 4409 %res1 = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask) 4410 %res2 = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> zeroinitializer, i32 %mask) 4411 %res3 = add <32 x i16> %res, %res1 4412 %res4 = add <32 x i16> %res2, %res3 4413 ret <32 x i16> %res4 4414 } 4415 4416