1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding| FileCheck %s 2 3 define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) { 4 ; CHECK-LABEL: test_pcmpeq_b 5 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ## 6 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) 7 ret i64 %res 8 } 9 10 define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { 11 ; CHECK-LABEL: test_mask_pcmpeq_b 12 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ## 13 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) 14 ret i64 %res 15 } 16 17 declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64) 18 19 define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) { 20 ; CHECK-LABEL: test_pcmpeq_w 21 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ## 22 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) 23 ret i32 %res 24 } 25 26 define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 27 ; CHECK-LABEL: test_mask_pcmpeq_w 28 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ## 29 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) 30 ret i32 %res 31 } 32 33 declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32) 34 35 define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) { 36 ; CHECK-LABEL: test_pcmpgt_b 37 ; CHECK: vpcmpgtb %zmm1, %zmm0, %k0 ## 38 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) 39 ret i64 %res 40 } 41 42 define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { 43 ; CHECK-LABEL: test_mask_pcmpgt_b 44 ; CHECK: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} ## 45 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) 46 ret i64 %res 47 } 48 49 declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64) 50 51 define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) { 52 ; CHECK-LABEL: test_pcmpgt_w 53 ; CHECK: vpcmpgtw %zmm1, %zmm0, %k0 ## 54 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) 55 ret i32 %res 56 } 57 58 define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 59 ; CHECK-LABEL: test_mask_pcmpgt_w 60 ; CHECK: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} ## 61 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) 62 ret i32 %res 63 } 64 65 declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32) 66 67 define <8 x i64> @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { 68 ; CHECK_LABEL: test_cmp_b_512 69 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ## 70 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 0, i64 -1) 71 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0 72 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 ## 73 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 1, i64 -1) 74 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1 75 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 ## 76 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 2, i64 -1) 77 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2 78 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 ## 79 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 3, i64 -1) 80 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3 81 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 ## 82 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 4, i64 -1) 83 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4 84 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 ## 85 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 5, i64 -1) 86 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5 87 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 ## 88 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 6, i64 -1) 89 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6 90 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 ## 91 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 7, i64 -1) 92 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7 93 ret <8 x i64> %vec7 94 } 95 96 define <8 x i64> @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { 97 ; CHECK_LABEL: test_mask_cmp_b_512 98 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ## 99 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 0, i64 %mask) 100 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0 101 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 {%k1} ## 102 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 1, i64 %mask) 103 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1 104 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 {%k1} ## 105 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 2, i64 %mask) 106 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2 107 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} ## 108 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 3, i64 %mask) 109 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3 110 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} ## 111 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 4, i64 %mask) 112 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4 113 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} ## 114 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 5, i64 %mask) 115 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5 116 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} ## 117 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 6, i64 %mask) 118 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6 119 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 {%k1} ## 120 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 7, i64 %mask) 121 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7 122 ret <8 x i64> %vec7 123 } 124 125 declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i8, i64) nounwind readnone 126 127 define <8 x i64> @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { 128 ; CHECK_LABEL: test_ucmp_b_512 129 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 ## 130 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 0, i64 -1) 131 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0 132 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 ## 133 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 1, i64 -1) 134 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1 135 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 ## 136 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 2, i64 -1) 137 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2 138 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 ## 139 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 3, i64 -1) 140 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3 141 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 ## 142 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 4, i64 -1) 143 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4 144 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 ## 145 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 5, i64 -1) 146 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5 147 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 ## 148 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 6, i64 -1) 149 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6 150 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 ## 151 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 7, i64 -1) 152 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7 153 ret <8 x i64> %vec7 154 } 155 156 define <8 x i64> @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { 157 ; CHECK_LABEL: test_mask_ucmp_b_512 158 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 {%k1} ## 159 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 0, i64 %mask) 160 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0 161 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 {%k1} ## 162 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 1, i64 %mask) 163 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1 164 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 {%k1} ## 165 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 2, i64 %mask) 166 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2 167 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} ## 168 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 3, i64 %mask) 169 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3 170 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} ## 171 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 4, i64 %mask) 172 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4 173 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} ## 174 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 5, i64 %mask) 175 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5 176 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} ## 177 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 6, i64 %mask) 178 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6 179 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 {%k1} ## 180 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 7, i64 %mask) 181 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7 182 ret <8 x i64> %vec7 183 } 184 185 declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i8, i64) nounwind readnone 186 187 define <8 x i32> @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { 188 ; CHECK_LABEL: test_cmp_w_512 189 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ## 190 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 0, i32 -1) 191 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 192 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 ## 193 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 1, i32 -1) 194 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 195 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 ## 196 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 2, i32 -1) 197 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 198 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 ## 199 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 3, i32 -1) 200 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 201 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 ## 202 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 4, i32 -1) 203 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 204 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 ## 205 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 5, i32 -1) 206 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 207 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 ## 208 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 6, i32 -1) 209 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 210 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 ## 211 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 7, i32 -1) 212 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 213 ret <8 x i32> %vec7 214 } 215 216 define <8 x i32> @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { 217 ; CHECK_LABEL: test_mask_cmp_w_512 218 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ## 219 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 0, i32 %mask) 220 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 221 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 {%k1} ## 222 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 1, i32 %mask) 223 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 224 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 {%k1} ## 225 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 2, i32 %mask) 226 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 227 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} ## 228 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 3, i32 %mask) 229 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 230 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} ## 231 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 4, i32 %mask) 232 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 233 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} ## 234 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 5, i32 %mask) 235 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 236 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} ## 237 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 6, i32 %mask) 238 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 239 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 {%k1} ## 240 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 7, i32 %mask) 241 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 242 ret <8 x i32> %vec7 243 } 244 245 declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i8, i32) nounwind readnone 246 247 define <8 x i32> @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { 248 ; CHECK_LABEL: test_ucmp_w_512 249 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 ## 250 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 0, i32 -1) 251 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 252 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 ## 253 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 1, i32 -1) 254 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 255 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 ## 256 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 2, i32 -1) 257 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 258 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 ## 259 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 3, i32 -1) 260 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 261 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 ## 262 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 4, i32 -1) 263 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 264 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 ## 265 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 5, i32 -1) 266 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 267 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 ## 268 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 6, i32 -1) 269 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 270 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 ## 271 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 7, i32 -1) 272 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 273 ret <8 x i32> %vec7 274 } 275 276 define <8 x i32> @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { 277 ; CHECK_LABEL: test_mask_ucmp_w_512 278 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 {%k1} ## 279 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 0, i32 %mask) 280 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 281 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} ## 282 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 1, i32 %mask) 283 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 284 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} ## 285 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 2, i32 %mask) 286 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 287 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} ## 288 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 3, i32 %mask) 289 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 290 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} ## 291 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 4, i32 %mask) 292 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 293 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} ## 294 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 5, i32 %mask) 295 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 296 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} ## 297 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 6, i32 %mask) 298 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 299 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 {%k1} ## 300 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 7, i32 %mask) 301 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 302 ret <8 x i32> %vec7 303 } 304 305 declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i8, i32) nounwind readnone 306 307 ; CHECK-LABEL: test_x86_mask_blend_b_256 308 ; CHECK: vpblendmb 309 define <32 x i8> @test_x86_mask_blend_b_256(i32 %a0, <32 x i8> %a1, <32 x i8> %a2) { 310 %res = call <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8> %a1, <32 x i8> %a2, i32 %a0) ; <<32 x i8>> [#uses=1] 311 ret <32 x i8> %res 312 } 313 declare <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8>, <32 x i8>, i32) nounwind readonly 314 315 ; CHECK-LABEL: test_x86_mask_blend_w_256 316 define <16 x i16> @test_x86_mask_blend_w_256(i16 %mask, <16 x i16> %a1, <16 x i16> %a2) { 317 ; CHECK: vpblendmw 318 %res = call <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16> %a1, <16 x i16> %a2, i16 %mask) ; <<16 x i16>> [#uses=1] 319 ret <16 x i16> %res 320 } 321 declare <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16>, <16 x i16>, i16) nounwind readonly 322 323 ; CHECK-LABEL: test_x86_mask_blend_b_512 324 ; CHECK: vpblendmb 325 define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a2) { 326 %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1] 327 ret <64 x i8> %res 328 } 329 declare <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8>, <64 x i8>, i64) nounwind readonly 330 331 ; CHECK-LABEL: test_x86_mask_blend_w_512 332 define <32 x i16> @test_x86_mask_blend_w_512(i32 %mask, <32 x i16> %a1, <32 x i16> %a2) { 333 ; CHECK: vpblendmw 334 %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1] 335 ret <32 x i16> %res 336 } 337 declare <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16>, <32 x i16>, i32) nounwind readonly 338 339 ; CHECK-LABEL: test_x86_mask_blend_b_128 340 ; CHECK: vpblendmb 341 define <16 x i8> @test_x86_mask_blend_b_128(i16 %a0, <16 x i8> %a1, <16 x i8> %a2) { 342 %res = call <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8> %a1, <16 x i8> %a2, i16 %a0) ; <<16 x i8>> [#uses=1] 343 ret <16 x i8> %res 344 } 345 declare <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8>, <16 x i8>, i16) nounwind readonly 346 347 ; CHECK-LABEL: test_x86_mask_blend_w_128 348 define <8 x i16> @test_x86_mask_blend_w_128(i8 %mask, <8 x i16> %a1, <8 x i16> %a2) { 349 ; CHECK: vpblendmw 350 %res = call <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16> %a1, <8 x i16> %a2, i8 %mask) ; <<8 x i16>> [#uses=1] 351 ret <8 x i16> %res 352 } 353 declare <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16>, <8 x i16>, i8) nounwind readonly 354