1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s 3 4 declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8) 5 6 define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask, i32 * %y_ptr) { 7 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_256: 8 ; CHECK: ## BB#0: 9 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 10 ; CHECK-NEXT: vpbroadcastd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x58,0xc8] 11 ; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x58,0xc0] 12 ; CHECK-NEXT: vpaddd (%rsi){1to8}, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x38,0xfe,0x0e] 13 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1] 14 ; CHECK-NEXT: retq ## encoding: [0xc3] 15 %y_32 = load i32, i32 * %y_ptr 16 %y = insertelement <4 x i32> undef, i32 %y_32, i32 0 17 %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %y, <8 x i32> %x1, i8 -1) 18 %res1 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask) 19 %res2 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask) 20 %res3 = add <8 x i32> %res, %res1 21 %res4 = add <8 x i32> %res2, %res3 22 ret <8 x i32> %res4 23 } 24 25 declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8) 26 27 define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) { 28 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_128: 29 ; CHECK: ## BB#0: 30 ; CHECK-NEXT: vpbroadcastd %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x58,0xd0] 31 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 32 ; CHECK-NEXT: vpbroadcastd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x58,0xc8] 33 ; CHECK-NEXT: vpbroadcastd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x58,0xc0] 34 ; CHECK-NEXT: vpaddd %xmm1, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc9] 35 ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1] 36 ; CHECK-NEXT: retq ## encoding: [0xc3] 37 %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) 38 %res1 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) 39 %res2 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %mask) 40 %res3 = add <4 x i32> %res, %res1 41 %res4 = add <4 x i32> %res2, %res3 42 ret <4 x i32> %res4 43 } 44 45 declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8) 46 47 define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) { 48 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_256: 49 ; CHECK: ## BB#0: 50 ; CHECK-NEXT: vpbroadcastq %xmm0, %ymm2 ## encoding: [0x62,0xf2,0xfd,0x28,0x59,0xd0] 51 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 52 ; CHECK-NEXT: vpbroadcastq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc8] 53 ; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc0] 54 ; CHECK-NEXT: vpaddq %ymm1, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc9] 55 ; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc1] 56 ; CHECK-NEXT: retq ## encoding: [0xc3] 57 %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 -1) 58 %res1 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 %mask) 59 %res2 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> zeroinitializer,i8 %mask) 60 %res3 = add <4 x i64> %res, %res1 61 %res4 = add <4 x i64> %res2, %res3 62 ret <4 x i64> %res4 63 } 64 65 declare <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64>, <2 x i64>, i8) 66 67 define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) { 68 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_128: 69 ; CHECK: ## BB#0: 70 ; CHECK-NEXT: vpbroadcastq %xmm0, %xmm2 ## encoding: [0x62,0xf2,0xfd,0x08,0x59,0xd0] 71 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 72 ; CHECK-NEXT: vpbroadcastq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc8] 73 ; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc0] 74 ; CHECK-NEXT: vpaddq %xmm1, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc9] 75 ; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0xc1] 76 ; CHECK-NEXT: retq ## encoding: [0xc3] 77 %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 -1) 78 %res1 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 %mask) 79 %res2 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> zeroinitializer,i8 %mask) 80 %res3 = add <2 x i64> %res, %res1 81 %res4 = add <2 x i64> %res2, %res3 82 ret <2 x i64> %res4 83 } 84 85 declare <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double>, <4 x double>, i8) nounwind readonly 86 87 define <4 x double> @test_x86_vbroadcast_sd_pd_256(<2 x double> %a0, <4 x double> %a1, i8 %mask ) { 88 ; CHECK-LABEL: test_x86_vbroadcast_sd_pd_256: 89 ; CHECK: ## BB#0: 90 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm2 ## encoding: [0x62,0xf2,0xfd,0x28,0x19,0xd0] 91 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 92 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x19,0xc8] 93 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x19,0xc0] 94 ; CHECK-NEXT: vaddpd %ymm1, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc9] 95 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1] 96 ; CHECK-NEXT: retq ## encoding: [0xc3] 97 %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 -1) 98 %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> %a1, i8 %mask) 99 %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 %mask) 100 %res3 = fadd <4 x double> %res, %res1 101 %res4 = fadd <4 x double> %res2, %res3 102 ret <4 x double> %res4 103 } 104 105 declare <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float>, <8 x float>, i8) nounwind readonly 106 107 define <8 x float> @test_x86_vbroadcast_ss_ps_256(<4 x float> %a0, <8 x float> %a1, i8 %mask ) { 108 ; CHECK-LABEL: test_x86_vbroadcast_ss_ps_256: 109 ; CHECK: ## BB#0: 110 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x18,0xd0] 111 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 112 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x18,0xc8] 113 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x18,0xc0] 114 ; CHECK-NEXT: vaddps %ymm1, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc9] 115 ; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1] 116 ; CHECK-NEXT: retq ## encoding: [0xc3] 117 %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 -1) 118 %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> %a1, i8 %mask) 119 %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 %mask) 120 %res3 = fadd <8 x float> %res, %res1 121 %res4 = fadd <8 x float> %res2, %res3 122 ret <8 x float> %res4 123 } 124 125 declare <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float>, <4 x float>, i8) nounwind readonly 126 127 define <4 x float> @test_x86_vbroadcast_ss_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask ) { 128 ; CHECK-LABEL: test_x86_vbroadcast_ss_ps_128: 129 ; CHECK: ## BB#0: 130 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x18,0xd0] 131 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 132 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x18,0xc8] 133 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x18,0xc0] 134 ; CHECK-NEXT: vaddps %xmm1, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc9] 135 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1] 136 ; CHECK-NEXT: retq ## encoding: [0xc3] 137 %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1) 138 %res1 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask) 139 %res2 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask) 140 %res3 = fadd <4 x float> %res, %res1 141 %res4 = fadd <4 x float> %res2, %res3 142 ret <4 x float> %res4 143 } 144 145 declare <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float>, <4 x float>, i8) 146 147 define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) { 148 ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_128: 149 ; CHECK: ## BB#0: 150 ; CHECK-NEXT: vmovsldup %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0x12,0xd0] 151 ; CHECK-NEXT: ## xmm2 = xmm0[0,0,2,2] 152 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 153 ; CHECK-NEXT: vmovsldup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x12,0xc8] 154 ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0,0,2,2] 155 ; CHECK-NEXT: vmovsldup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x12,0xc0] 156 ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[0,0,2,2] 157 ; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca] 158 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1] 159 ; CHECK-NEXT: retq ## encoding: [0xc3] 160 %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2) 161 %res1 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1) 162 %res2 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2) 163 %res3 = fadd <4 x float> %res, %res1 164 %res4 = fadd <4 x float> %res2, %res3 165 ret <4 x float> %res4 166 } 167 168 declare <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float>, <8 x float>, i8) 169 170 define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) { 171 ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_256: 172 ; CHECK: ## BB#0: 173 ; CHECK-NEXT: vmovsldup %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0x12,0xd0] 174 ; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2,4,4,6,6] 175 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 176 ; CHECK-NEXT: vmovsldup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x12,0xc8] 177 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6] 178 ; CHECK-NEXT: vmovsldup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x12,0xc0] 179 ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] 180 ; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca] 181 ; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1] 182 ; CHECK-NEXT: retq ## encoding: [0xc3] 183 %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2) 184 %res1 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1) 185 %res2 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2) 186 %res3 = fadd <8 x float> %res, %res1 187 %res4 = fadd <8 x float> %res2, %res3 188 ret <8 x float> %res4 189 } 190 191 declare <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float>, <4 x float>, i8) 192 193 define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) { 194 ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_128: 195 ; CHECK: ## BB#0: 196 ; CHECK-NEXT: vmovshdup %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0x16,0xd0] 197 ; CHECK-NEXT: ## xmm2 = xmm0[1,1,3,3] 198 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 199 ; CHECK-NEXT: vmovshdup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x16,0xc8] 200 ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[1,1,3,3] 201 ; CHECK-NEXT: vmovshdup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x16,0xc0] 202 ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[1,1,3,3] 203 ; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca] 204 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1] 205 ; CHECK-NEXT: retq ## encoding: [0xc3] 206 %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2) 207 %res1 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1) 208 %res2 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2) 209 %res3 = fadd <4 x float> %res, %res1 210 %res4 = fadd <4 x float> %res2, %res3 211 ret <4 x float> %res4 212 } 213 214 declare <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float>, <8 x float>, i8) 215 216 define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) { 217 ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_256: 218 ; CHECK: ## BB#0: 219 ; CHECK-NEXT: vmovshdup %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0x16,0xd0] 220 ; CHECK-NEXT: ## ymm2 = ymm0[1,1,3,3,5,5,7,7] 221 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 222 ; CHECK-NEXT: vmovshdup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x16,0xc8] 223 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7] 224 ; CHECK-NEXT: vmovshdup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x16,0xc0] 225 ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] 226 ; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca] 227 ; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1] 228 ; CHECK-NEXT: retq ## encoding: [0xc3] 229 %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2) 230 %res1 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1) 231 %res2 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2) 232 %res3 = fadd <8 x float> %res, %res1 233 %res4 = fadd <8 x float> %res2, %res3 234 ret <8 x float> %res4 235 } 236 declare <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double>, <2 x double>, i8) 237 238 define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) { 239 ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_128: 240 ; CHECK: ## BB#0: 241 ; CHECK-NEXT: vmovddup %xmm0, %xmm2 ## encoding: [0x62,0xf1,0xff,0x08,0x12,0xd0] 242 ; CHECK-NEXT: ## xmm2 = xmm0[0,0] 243 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 244 ; CHECK-NEXT: vmovddup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x12,0xc8] 245 ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0,0] 246 ; CHECK-NEXT: vmovddup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x12,0xc0] 247 ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[0,0] 248 ; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xca] 249 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc1] 250 ; CHECK-NEXT: retq ## encoding: [0xc3] 251 %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 %x2) 252 %res1 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 -1) 253 %res2 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> zeroinitializer, i8 %x2) 254 %res3 = fadd <2 x double> %res, %res1 255 %res4 = fadd <2 x double> %res2, %res3 256 ret <2 x double> %res4 257 } 258 259 declare <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double>, <4 x double>, i8) 260 261 define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) { 262 ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_256: 263 ; CHECK: ## BB#0: 264 ; CHECK-NEXT: vmovddup %ymm0, %ymm2 ## encoding: [0x62,0xf1,0xff,0x28,0x12,0xd0] 265 ; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2] 266 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 267 ; CHECK-NEXT: vmovddup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x12,0xc8] 268 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,0,2,2] 269 ; CHECK-NEXT: vmovddup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xa9,0x12,0xc0] 270 ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[0,0,2,2] 271 ; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xca] 272 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1] 273 ; CHECK-NEXT: retq ## encoding: [0xc3] 274 %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 %x2) 275 %res1 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 -1) 276 %res2 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> zeroinitializer, i8 %x2) 277 %res3 = fadd <4 x double> %res, %res1 278 %res4 = fadd <4 x double> %res2, %res3 279 ret <4 x double> %res4 280 } 281 282 declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8) 283 284 define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) { 285 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_256: 286 ; CHECK: ## BB#0: 287 ; CHECK-NEXT: vpermilpd $6, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x05,0xd0,0x06] 288 ; CHECK-NEXT: ## ymm2 = ymm0[0,1,3,2] 289 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 290 ; CHECK-NEXT: vpermilpd $6, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x06] 291 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,3,2] 292 ; CHECK-NEXT: vpermilpd $6, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xc0,0x06] 293 ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[0,1,3,2] 294 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] 295 ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0] 296 ; CHECK-NEXT: retq ## encoding: [0xc3] 297 %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3) 298 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3) 299 %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1) 300 %res3 = fadd <4 x double> %res, %res1 301 %res4 = fadd <4 x double> %res2, %res3 302 ret <4 x double> %res4 303 } 304 305 declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8) 306 307 define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) { 308 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_128: 309 ; CHECK: ## BB#0: 310 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 ## encoding: [0x62,0xf3,0xfd,0x08,0x05,0xd0,0x01] 311 ; CHECK-NEXT: ## xmm2 = xmm0[1,0] 312 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 313 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01] 314 ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[1,0] 315 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0x89,0x05,0xc0,0x01] 316 ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[1,0] 317 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] 318 ; CHECK-NEXT: vaddpd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc2] 319 ; CHECK-NEXT: retq ## encoding: [0xc3] 320 %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3) 321 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3) 322 %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1) 323 %res3 = fadd <2 x double> %res, %res1 324 %res4 = fadd <2 x double> %res3, %res2 325 ret <2 x double> %res4 326 } 327 328 declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8) 329 330 define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) { 331 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_256: 332 ; CHECK: ## BB#0: 333 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0x7d,0x28,0x04,0xd0,0x16] 334 ; CHECK-NEXT: ## ymm2 = ymm0[2,1,1,0,6,5,5,4] 335 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 336 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16] 337 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4] 338 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xc0,0x16] 339 ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4] 340 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] 341 ; CHECK-NEXT: vaddps %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc2] 342 ; CHECK-NEXT: retq ## encoding: [0xc3] 343 %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3) 344 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3) 345 %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1) 346 %res3 = fadd <8 x float> %res, %res1 347 %res4 = fadd <8 x float> %res3, %res2 348 ret <8 x float> %res4 349 } 350 351 declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8) 352 353 define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) { 354 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_128: 355 ; CHECK: ## BB#0: 356 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 ## encoding: [0x62,0xf3,0x7d,0x08,0x04,0xd0,0x16] 357 ; CHECK-NEXT: ## xmm2 = xmm0[2,1,1,0] 358 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 359 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16] 360 ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[2,1,1,0] 361 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x04,0xc0,0x16] 362 ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[2,1,1,0] 363 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] 364 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0] 365 ; CHECK-NEXT: retq ## encoding: [0xc3] 366 %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3) 367 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3) 368 %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1) 369 %res3 = fadd <4 x float> %res, %res1 370 %res4 = fadd <4 x float> %res2, %res3 371 ret <4 x float> %res4 372 } 373 374 declare <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double>, i32, <4 x double>, i8) 375 376 define <4 x double>@test_int_x86_avx512_mask_perm_df_256(<4 x double> %x0, i32 %x1, <4 x double> %x2, i8 %x3) { 377 ; CHECK-LABEL: test_int_x86_avx512_mask_perm_df_256: 378 ; CHECK: ## BB#0: 379 ; CHECK-NEXT: vpermpd $3, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x01,0xd0,0x03] 380 ; CHECK-NEXT: ## ymm2 = ymm0[3,0,0,0] 381 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 382 ; CHECK-NEXT: vpermpd $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x01,0xc8,0x03] 383 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[3,0,0,0] 384 ; CHECK-NEXT: vpermpd $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x01,0xc0,0x03] 385 ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[3,0,0,0] 386 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] 387 ; CHECK-NEXT: vaddpd %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc2] 388 ; CHECK-NEXT: retq ## encoding: [0xc3] 389 %res = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 %x3) 390 %res1 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> zeroinitializer, i8 %x3) 391 %res2 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 -1) 392 %res3 = fadd <4 x double> %res, %res1 393 %res4 = fadd <4 x double> %res3, %res2 394 ret <4 x double> %res4 395 } 396 397 declare <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64>, i32, <4 x i64>, i8) 398 399 define <4 x i64>@test_int_x86_avx512_mask_perm_di_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { 400 ; CHECK-LABEL: test_int_x86_avx512_mask_perm_di_256: 401 ; CHECK: ## BB#0: 402 ; CHECK-NEXT: vpermq $3, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x00,0xd0,0x03] 403 ; CHECK-NEXT: ## ymm2 = ymm0[3,0,0,0] 404 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 405 ; CHECK-NEXT: vpermq $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x00,0xc8,0x03] 406 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[3,0,0,0] 407 ; CHECK-NEXT: vpermq $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x00,0xc0,0x03] 408 ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[3,0,0,0] 409 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 410 ; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc2] 411 ; CHECK-NEXT: retq ## encoding: [0xc3] 412 %res = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) 413 %res1 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3) 414 %res2 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1) 415 %res3 = add <4 x i64> %res, %res1 416 %res4 = add <4 x i64> %res3, %res2 417 ret <4 x i64> %res4 418 } 419 420 declare void @llvm.x86.avx512.mask.store.pd.128(i8*, <2 x double>, i8) 421 422 define void@test_int_x86_avx512_mask_store_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) { 423 ; CHECK-LABEL: test_int_x86_avx512_mask_store_pd_128: 424 ; CHECK: ## BB#0: 425 ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 426 ; CHECK-NEXT: vmovapd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x29,0x07] 427 ; CHECK-NEXT: vmovapd %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x29,0x06] 428 ; CHECK-NEXT: retq ## encoding: [0xc3] 429 call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2) 430 call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1) 431 ret void 432 } 433 434 declare void @llvm.x86.avx512.mask.store.pd.256(i8*, <4 x double>, i8) 435 436 define void@test_int_x86_avx512_mask_store_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) { 437 ; CHECK-LABEL: test_int_x86_avx512_mask_store_pd_256: 438 ; CHECK: ## BB#0: 439 ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 440 ; CHECK-NEXT: vmovapd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x29,0x07] 441 ; CHECK-NEXT: vmovapd %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x29,0x06] 442 ; CHECK-NEXT: retq ## encoding: [0xc3] 443 call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2) 444 call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1) 445 ret void 446 } 447 448 declare void @llvm.x86.avx512.mask.storeu.pd.128(i8*, <2 x double>, i8) 449 450 define void@test_int_x86_avx512_mask_storeu_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) { 451 ; CHECK-LABEL: test_int_x86_avx512_mask_storeu_pd_128: 452 ; CHECK: ## BB#0: 453 ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 454 ; CHECK-NEXT: vmovupd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x11,0x07] 455 ; CHECK-NEXT: vmovupd %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x11,0x06] 456 ; CHECK-NEXT: retq ## encoding: [0xc3] 457 call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2) 458 call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1) 459 ret void 460 } 461 462 declare void @llvm.x86.avx512.mask.storeu.pd.256(i8*, <4 x double>, i8) 463 464 define void@test_int_x86_avx512_mask_storeu_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) { 465 ; CHECK-LABEL: test_int_x86_avx512_mask_storeu_pd_256: 466 ; CHECK: ## BB#0: 467 ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 468 ; CHECK-NEXT: vmovupd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x11,0x07] 469 ; CHECK-NEXT: vmovupd %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x11,0x06] 470 ; CHECK-NEXT: retq ## encoding: [0xc3] 471 call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2) 472 call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1) 473 ret void 474 } 475 476 declare void @llvm.x86.avx512.mask.store.ps.128(i8*, <4 x float>, i8) 477 478 define void@test_int_x86_avx512_mask_store_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) { 479 ; CHECK-LABEL: test_int_x86_avx512_mask_store_ps_128: 480 ; CHECK: ## BB#0: 481 ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 482 ; CHECK-NEXT: vmovaps %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x29,0x07] 483 ; CHECK-NEXT: vmovaps %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x06] 484 ; CHECK-NEXT: retq ## encoding: [0xc3] 485 call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2) 486 call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1) 487 ret void 488 } 489 490 declare void @llvm.x86.avx512.mask.store.ps.256(i8*, <8 x float>, i8) 491 492 define void@test_int_x86_avx512_mask_store_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) { 493 ; CHECK-LABEL: test_int_x86_avx512_mask_store_ps_256: 494 ; CHECK: ## BB#0: 495 ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 496 ; CHECK-NEXT: vmovaps %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x29,0x07] 497 ; CHECK-NEXT: vmovaps %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x06] 498 ; CHECK-NEXT: retq ## encoding: [0xc3] 499 call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2) 500 call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1) 501 ret void 502 } 503 504 declare void @llvm.x86.avx512.mask.storeu.ps.128(i8*, <4 x float>, i8) 505 506 define void@test_int_x86_avx512_mask_storeu_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) { 507 ; CHECK-LABEL: test_int_x86_avx512_mask_storeu_ps_128: 508 ; CHECK: ## BB#0: 509 ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 510 ; CHECK-NEXT: vmovups %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x11,0x07] 511 ; CHECK-NEXT: vmovups %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x06] 512 ; CHECK-NEXT: retq ## encoding: [0xc3] 513 call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2) 514 call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1) 515 ret void 516 } 517 518 declare void @llvm.x86.avx512.mask.storeu.ps.256(i8*, <8 x float>, i8) 519 520 define void@test_int_x86_avx512_mask_storeu_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) { 521 ; CHECK-LABEL: test_int_x86_avx512_mask_storeu_ps_256: 522 ; CHECK: ## BB#0: 523 ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 524 ; CHECK-NEXT: vmovups %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x11,0x07] 525 ; CHECK-NEXT: vmovups %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x06] 526 ; CHECK-NEXT: retq ## encoding: [0xc3] 527 call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2) 528 call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1) 529 ret void 530 } 531 532 declare void @llvm.x86.avx512.mask.storeu.q.128(i8*, <2 x i64>, i8) 533 534 define void@test_int_x86_avx512_mask_storeu_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) { 535 ; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_128: 536 ; CHECK: ## BB#0: 537 ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 538 ; CHECK-NEXT: vmovdqu64 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x07] 539 ; CHECK-NEXT: vmovdqu64 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x08,0x7f,0x06] 540 ; CHECK-NEXT: retq ## encoding: [0xc3] 541 call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2) 542 call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1) 543 ret void 544 } 545 546 declare void @llvm.x86.avx512.mask.storeu.q.256(i8*, <4 x i64>, i8) 547 548 define void@test_int_x86_avx512_mask_storeu_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) { 549 ; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_256: 550 ; CHECK: ## BB#0: 551 ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 552 ; CHECK-NEXT: vmovdqu64 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x07] 553 ; CHECK-NEXT: vmovdqu64 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x28,0x7f,0x06] 554 ; CHECK-NEXT: retq ## encoding: [0xc3] 555 call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2) 556 call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1) 557 ret void 558 } 559 560 declare void @llvm.x86.avx512.mask.storeu.d.128(i8*, <4 x i32>, i8) 561 562 define void@test_int_x86_avx512_mask_storeu_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) { 563 ; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_128: 564 ; CHECK: ## BB#0: 565 ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 566 ; CHECK-NEXT: vmovdqu32 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x07] 567 ; CHECK-NEXT: vmovdqu32 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7e,0x08,0x7f,0x06] 568 ; CHECK-NEXT: retq ## encoding: [0xc3] 569 call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2) 570 call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1) 571 ret void 572 } 573 574 declare void @llvm.x86.avx512.mask.storeu.d.256(i8*, <8 x i32>, i8) 575 576 define void@test_int_x86_avx512_mask_storeu_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) { 577 ; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_256: 578 ; CHECK: ## BB#0: 579 ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 580 ; CHECK-NEXT: vmovdqu32 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x07] 581 ; CHECK-NEXT: vmovdqu32 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7e,0x28,0x7f,0x06] 582 ; CHECK-NEXT: retq ## encoding: [0xc3] 583 call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2) 584 call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1) 585 ret void 586 } 587 588 declare void @llvm.x86.avx512.mask.store.q.128(i8*, <2 x i64>, i8) 589 590 define void@test_int_x86_avx512_mask_store_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) { 591 ; CHECK-LABEL: test_int_x86_avx512_mask_store_q_128: 592 ; CHECK: ## BB#0: 593 ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 594 ; CHECK-NEXT: vmovdqa64 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x07] 595 ; CHECK-NEXT: vmovdqa64 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x7f,0x06] 596 ; CHECK-NEXT: retq ## encoding: [0xc3] 597 call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2) 598 call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1) 599 ret void 600 } 601 602 declare void @llvm.x86.avx512.mask.store.q.256(i8*, <4 x i64>, i8) 603 604 define void@test_int_x86_avx512_mask_store_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) { 605 ; CHECK-LABEL: test_int_x86_avx512_mask_store_q_256: 606 ; CHECK: ## BB#0: 607 ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 608 ; CHECK-NEXT: vmovdqa64 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x07] 609 ; CHECK-NEXT: vmovdqa64 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x7f,0x06] 610 ; CHECK-NEXT: retq ## encoding: [0xc3] 611 call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2) 612 call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1) 613 ret void 614 } 615 616 declare void @llvm.x86.avx512.mask.store.d.128(i8*, <4 x i32>, i8) 617 618 define void@test_int_x86_avx512_mask_store_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) { 619 ; CHECK-LABEL: test_int_x86_avx512_mask_store_d_128: 620 ; CHECK: ## BB#0: 621 ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 622 ; CHECK-NEXT: vmovdqa32 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x07] 623 ; CHECK-NEXT: vmovdqa32 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x06] 624 ; CHECK-NEXT: retq ## encoding: [0xc3] 625 call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2) 626 call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1) 627 ret void 628 } 629 630 declare void @llvm.x86.avx512.mask.store.d.256(i8*, <8 x i32>, i8) 631 632 define void@test_int_x86_avx512_mask_store_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) { 633 ; CHECK-LABEL: test_int_x86_avx512_mask_store_d_256: 634 ; CHECK: ## BB#0: 635 ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 636 ; CHECK-NEXT: vmovdqa32 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x07] 637 ; CHECK-NEXT: vmovdqa32 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7d,0x28,0x7f,0x06] 638 ; CHECK-NEXT: retq ## encoding: [0xc3] 639 call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2) 640 call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1) 641 ret void 642 } 643 644 define <8 x float> @test_mask_load_aligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) { 645 ; CHECK-LABEL: test_mask_load_aligned_ps_256: 646 ; CHECK: ## BB#0: 647 ; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07] 648 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 649 ; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07] 650 ; CHECK-NEXT: vmovaps (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x0f] 651 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] 652 ; CHECK-NEXT: retq ## encoding: [0xc3] 653 %res = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1) 654 %res1 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> %res, i8 %mask) 655 %res2 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask) 656 %res4 = fadd <8 x float> %res2, %res1 657 ret <8 x float> %res4 658 } 659 660 declare <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8*, <8 x float>, i8) 661 662 define <8 x float> @test_mask_load_unaligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) { 663 ; CHECK-LABEL: test_mask_load_unaligned_ps_256: 664 ; CHECK: ## BB#0: 665 ; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07] 666 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 667 ; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07] 668 ; CHECK-NEXT: vmovups (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x0f] 669 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] 670 ; CHECK-NEXT: retq ## encoding: [0xc3] 671 %res = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1) 672 %res1 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> %res, i8 %mask) 673 %res2 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask) 674 %res4 = fadd <8 x float> %res2, %res1 675 ret <8 x float> %res4 676 } 677 678 declare <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8*, <8 x float>, i8) 679 680 define <4 x double> @test_mask_load_aligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) { 681 ; CHECK-LABEL: test_mask_load_aligned_pd_256: 682 ; CHECK: ## BB#0: 683 ; CHECK-NEXT: vmovapd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0x07] 684 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 685 ; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07] 686 ; CHECK-NEXT: vmovapd (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x0f] 687 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] 688 ; CHECK-NEXT: retq ## encoding: [0xc3] 689 %res = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1) 690 %res1 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> %res, i8 %mask) 691 %res2 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask) 692 %res4 = fadd <4 x double> %res2, %res1 693 ret <4 x double> %res4 694 } 695 696 declare <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8*, <4 x double>, i8) 697 698 define <4 x double> @test_mask_load_unaligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) { 699 ; CHECK-LABEL: test_mask_load_unaligned_pd_256: 700 ; CHECK: ## BB#0: 701 ; CHECK-NEXT: vmovupd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x10,0x07] 702 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 703 ; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07] 704 ; CHECK-NEXT: vmovupd (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x0f] 705 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] 706 ; CHECK-NEXT: retq ## encoding: [0xc3] 707 %res = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1) 708 %res1 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> %res, i8 %mask) 709 %res2 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask) 710 %res4 = fadd <4 x double> %res2, %res1 711 ret <4 x double> %res4 712 } 713 714 declare <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8*, <4 x double>, i8) 715 716 define <4 x float> @test_mask_load_aligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) { 717 ; CHECK-LABEL: test_mask_load_aligned_ps_128: 718 ; CHECK: ## BB#0: 719 ; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07] 720 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 721 ; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07] 722 ; CHECK-NEXT: vmovaps (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x28,0x0f] 723 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] 724 ; CHECK-NEXT: retq ## encoding: [0xc3] 725 %res = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1) 726 %res1 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> %res, i8 %mask) 727 %res2 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask) 728 %res4 = fadd <4 x float> %res2, %res1 729 ret <4 x float> %res4 730 } 731 732 declare <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8*, <4 x float>, i8) 733 734 define <4 x float> @test_mask_load_unaligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) { 735 ; CHECK-LABEL: test_mask_load_unaligned_ps_128: 736 ; CHECK: ## BB#0: 737 ; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07] 738 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 739 ; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07] 740 ; CHECK-NEXT: vmovups (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x10,0x0f] 741 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] 742 ; CHECK-NEXT: retq ## encoding: [0xc3] 743 %res = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1) 744 %res1 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> %res, i8 %mask) 745 %res2 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask) 746 %res4 = fadd <4 x float> %res2, %res1 747 ret <4 x float> %res4 748 } 749 750 declare <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8*, <4 x float>, i8) 751 752 define <2 x double> @test_mask_load_aligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) { 753 ; CHECK-LABEL: test_mask_load_aligned_pd_128: 754 ; CHECK: ## BB#0: 755 ; CHECK-NEXT: vmovapd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0x07] 756 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 757 ; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07] 758 ; CHECK-NEXT: vmovapd (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x28,0x0f] 759 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] 760 ; CHECK-NEXT: retq ## encoding: [0xc3] 761 %res = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1) 762 %res1 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> %res, i8 %mask) 763 %res2 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask) 764 %res4 = fadd <2 x double> %res2, %res1 765 ret <2 x double> %res4 766 } 767 768 declare <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8*, <2 x double>, i8) 769 770 define <2 x double> @test_mask_load_unaligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) { 771 ; CHECK-LABEL: test_mask_load_unaligned_pd_128: 772 ; CHECK: ## BB#0: 773 ; CHECK-NEXT: vmovupd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x10,0x07] 774 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 775 ; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07] 776 ; CHECK-NEXT: vmovupd (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x10,0x0f] 777 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] 778 ; CHECK-NEXT: retq ## encoding: [0xc3] 779 %res = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1) 780 %res1 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> %res, i8 %mask) 781 %res2 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask) 782 %res4 = fadd <2 x double> %res2, %res1 783 ret <2 x double> %res4 784 } 785 786 declare <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8*, <2 x double>, i8) 787 788 declare <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8*, <4 x i32>, i8) 789 790 define <4 x i32> @test_mask_load_unaligned_d_128(i8* %ptr, i8* %ptr2, <4 x i32> %data, i8 %mask) { 791 ; CHECK-LABEL: test_mask_load_unaligned_d_128: 792 ; CHECK: ## BB#0: 793 ; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x6f,0x07] 794 ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 795 ; CHECK-NEXT: vmovdqu32 (%rsi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x06] 796 ; CHECK-NEXT: vmovdqu32 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x0f] 797 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 798 ; CHECK-NEXT: retq ## encoding: [0xc3] 799 %res = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1) 800 %res1 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr2, <4 x i32> %res, i8 %mask) 801 %res2 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 %mask) 802 %res4 = add <4 x i32> %res2, %res1 803 ret <4 x i32> %res4 804 } 805 806 declare <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8*, <8 x i32>, i8) 807 808 define <8 x i32> @test_mask_load_unaligned_d_256(i8* %ptr, i8* %ptr2, <8 x i32> %data, i8 %mask) { 809 ; CHECK-LABEL: test_mask_load_unaligned_d_256: 810 ; CHECK: ## BB#0: 811 ; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x6f,0x07] 812 ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 813 ; CHECK-NEXT: vmovdqu32 (%rsi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x06] 814 ; CHECK-NEXT: vmovdqu32 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x0f] 815 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 816 ; CHECK-NEXT: retq ## encoding: [0xc3] 817 %res = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1) 818 %res1 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr2, <8 x i32> %res, i8 %mask) 819 %res2 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 %mask) 820 %res4 = add <8 x i32> %res2, %res1 821 ret <8 x i32> %res4 822 } 823 824 declare <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8*, <2 x i64>, i8) 825 826 define <2 x i64> @test_mask_load_unaligned_q_128(i8* %ptr, i8* %ptr2, <2 x i64> %data, i8 %mask) { 827 ; CHECK-LABEL: test_mask_load_unaligned_q_128: 828 ; CHECK: ## BB#0: 829 ; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x6f,0x07] 830 ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 831 ; CHECK-NEXT: vmovdqu64 (%rsi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x06] 832 ; CHECK-NEXT: vmovdqu64 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x0f] 833 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 834 ; CHECK-NEXT: retq ## encoding: [0xc3] 835 %res = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1) 836 %res1 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr2, <2 x i64> %res, i8 %mask) 837 %res2 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 %mask) 838 %res4 = add <2 x i64> %res2, %res1 839 ret <2 x i64> %res4 840 } 841 842 declare <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8*, <4 x i64>, i8) 843 844 define <4 x i64> @test_mask_load_unaligned_q_256(i8* %ptr, i8* %ptr2, <4 x i64> %data, i8 %mask) { 845 ; CHECK-LABEL: test_mask_load_unaligned_q_256: 846 ; CHECK: ## BB#0: 847 ; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0x6f,0x07] 848 ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 849 ; CHECK-NEXT: vmovdqu64 (%rsi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x06] 850 ; CHECK-NEXT: vmovdqu64 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x0f] 851 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 852 ; CHECK-NEXT: retq ## encoding: [0xc3] 853 %res = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1) 854 %res1 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr2, <4 x i64> %res, i8 %mask) 855 %res2 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 %mask) 856 %res4 = add <4 x i64> %res2, %res1 857 ret <4 x i64> %res4 858 } 859 860 declare <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8*, <4 x i32>, i8) 861 862 define <4 x i32> @test_mask_load_aligned_d_128(<4 x i32> %data, i8* %ptr, i8 %mask) { 863 ; CHECK-LABEL: test_mask_load_aligned_d_128: 864 ; CHECK: ## BB#0: 865 ; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6f,0x07] 866 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 867 ; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07] 868 ; CHECK-NEXT: vmovdqa32 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x0f] 869 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 870 ; CHECK-NEXT: retq ## encoding: [0xc3] 871 %res = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1) 872 %res1 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> %res, i8 %mask) 873 %res2 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 %mask) 874 %res4 = add <4 x i32> %res2, %res1 875 ret <4 x i32> %res4 876 } 877 878 declare <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8*, <8 x i32>, i8) 879 880 define <8 x i32> @test_mask_load_aligned_d_256(<8 x i32> %data, i8* %ptr, i8 %mask) { 881 ; CHECK-LABEL: test_mask_load_aligned_d_256: 882 ; CHECK: ## BB#0: 883 ; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x07] 884 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 885 ; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07] 886 ; CHECK-NEXT: vmovdqa32 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x0f] 887 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 888 ; CHECK-NEXT: retq ## encoding: [0xc3] 889 %res = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1) 890 %res1 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> %res, i8 %mask) 891 %res2 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 %mask) 892 %res4 = add <8 x i32> %res2, %res1 893 ret <8 x i32> %res4 894 } 895 896 declare <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8*, <2 x i64>, i8) 897 898 define <2 x i64> @test_mask_load_aligned_q_128(<2 x i64> %data, i8* %ptr, i8 %mask) { 899 ; CHECK-LABEL: test_mask_load_aligned_q_128: 900 ; CHECK: ## BB#0: 901 ; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x07] 902 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 903 ; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07] 904 ; CHECK-NEXT: vmovdqa64 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x0f] 905 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 906 ; CHECK-NEXT: retq ## encoding: [0xc3] 907 %res = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1) 908 %res1 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> %res, i8 %mask) 909 %res2 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 %mask) 910 %res4 = add <2 x i64> %res2, %res1 911 ret <2 x i64> %res4 912 } 913 914 declare <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8*, <4 x i64>, i8) 915 916 define <4 x i64> @test_mask_load_aligned_q_256(<4 x i64> %data, i8* %ptr, i8 %mask) { 917 ; CHECK-LABEL: test_mask_load_aligned_q_256: 918 ; CHECK: ## BB#0: 919 ; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0x07] 920 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 921 ; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07] 922 ; CHECK-NEXT: vmovdqa64 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x0f] 923 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 924 ; CHECK-NEXT: retq ## encoding: [0xc3] 925 %res = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1) 926 %res1 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> %res, i8 %mask) 927 %res2 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 %mask) 928 %res4 = add <4 x i64> %res2, %res1 929 ret <4 x i64> %res4 930 } 931 932 declare <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32>, i32, <4 x i32>, i8) 933 934 define <4 x i32>@test_int_x86_avx512_mask_pshuf_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { 935 ; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_d_128: 936 ; CHECK: ## BB#0: 937 ; CHECK-NEXT: vpshufd $3, %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7d,0x08,0x70,0xd0,0x03] 938 ; CHECK-NEXT: ## xmm2 = xmm0[3,0,0,0] 939 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 940 ; CHECK-NEXT: vpshufd $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x70,0xc8,0x03] 941 ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[3,0,0,0] 942 ; CHECK-NEXT: vpshufd $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x70,0xc0,0x03] 943 ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[3,0,0,0] 944 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 945 ; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2] 946 ; CHECK-NEXT: retq ## encoding: [0xc3] 947 %res = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) 948 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3) 949 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1) 950 %res3 = add <4 x i32> %res, %res1 951 %res4 = add <4 x i32> %res3, %res2 952 ret <4 x i32> %res4 953 } 954 955 declare <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32>, i32, <8 x i32>, i8) 956 957 define <8 x i32>@test_int_x86_avx512_mask_pshuf_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { 958 ; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_d_256: 959 ; CHECK: ## BB#0: 960 ; CHECK-NEXT: vpshufd $3, %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x7d,0x28,0x70,0xd0,0x03] 961 ; CHECK-NEXT: ## ymm2 = ymm0[3,0,0,0,7,4,4,4] 962 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 963 ; CHECK-NEXT: vpshufd $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x70,0xc8,0x03] 964 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[3,0,0,0,7,4,4,4] 965 ; CHECK-NEXT: vpshufd $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x70,0xc0,0x03] 966 ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[3,0,0,0,7,4,4,4] 967 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 968 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc2] 969 ; CHECK-NEXT: retq ## encoding: [0xc3] 970 %res = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) 971 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3) 972 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1) 973 %res3 = add <8 x i32> %res, %res1 974 %res4 = add <8 x i32> %res3, %res2 975 ret <8 x i32> %res4 976 } 977 978 define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) { 979 ; CHECK-LABEL: test_pcmpeq_d_256: 980 ; CHECK: ## BB#0: 981 ; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] 982 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 983 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 984 ; CHECK-NEXT: retq ## encoding: [0xc3] 985 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1) 986 ret i8 %res 987 } 988 989 define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 990 ; CHECK-LABEL: test_mask_pcmpeq_d_256: 991 ; CHECK: ## BB#0: 992 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 993 ; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1] 994 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 995 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 996 ; CHECK-NEXT: retq ## encoding: [0xc3] 997 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask) 998 ret i8 %res 999 } 1000 1001 declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8) 1002 1003 define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) { 1004 ; CHECK-LABEL: test_pcmpeq_q_256: 1005 ; CHECK: ## BB#0: 1006 ; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1] 1007 ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1008 ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1009 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1010 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1011 ; CHECK-NEXT: retq ## encoding: [0xc3] 1012 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1) 1013 ret i8 %res 1014 } 1015 1016 define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { 1017 ; CHECK-LABEL: test_mask_pcmpeq_q_256: 1018 ; CHECK: ## BB#0: 1019 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1020 ; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1] 1021 ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1022 ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1023 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1024 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1025 ; CHECK-NEXT: retq ## encoding: [0xc3] 1026 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask) 1027 ret i8 %res 1028 } 1029 1030 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8) 1031 1032 define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) { 1033 ; CHECK-LABEL: test_pcmpgt_d_256: 1034 ; CHECK: ## BB#0: 1035 ; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1] 1036 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1037 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1038 ; CHECK-NEXT: retq ## encoding: [0xc3] 1039 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1) 1040 ret i8 %res 1041 } 1042 1043 define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 1044 ; CHECK-LABEL: test_mask_pcmpgt_d_256: 1045 ; CHECK: ## BB#0: 1046 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1047 ; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc1] 1048 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1049 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1050 ; CHECK-NEXT: retq ## encoding: [0xc3] 1051 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask) 1052 ret i8 %res 1053 } 1054 1055 declare i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32>, <8 x i32>, i8) 1056 1057 define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) { 1058 ; CHECK-LABEL: test_pcmpgt_q_256: 1059 ; CHECK: ## BB#0: 1060 ; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1] 1061 ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1062 ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1063 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1064 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1065 ; CHECK-NEXT: retq ## encoding: [0xc3] 1066 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1) 1067 ret i8 %res 1068 } 1069 1070 define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { 1071 ; CHECK-LABEL: test_mask_pcmpgt_q_256: 1072 ; CHECK: ## BB#0: 1073 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1074 ; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1] 1075 ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1076 ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1077 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1078 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1079 ; CHECK-NEXT: retq ## encoding: [0xc3] 1080 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask) 1081 ret i8 %res 1082 } 1083 1084 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8) 1085 1086 define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) { 1087 ; CHECK-LABEL: test_pcmpeq_d_128: 1088 ; CHECK: ## BB#0: 1089 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1] 1090 ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1091 ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1092 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1093 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1094 ; CHECK-NEXT: retq ## encoding: [0xc3] 1095 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1) 1096 ret i8 %res 1097 } 1098 1099 define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 1100 ; CHECK-LABEL: test_mask_pcmpeq_d_128: 1101 ; CHECK: ## BB#0: 1102 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1103 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1] 1104 ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1105 ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1106 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1107 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1108 ; CHECK-NEXT: retq ## encoding: [0xc3] 1109 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask) 1110 ret i8 %res 1111 } 1112 1113 declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8) 1114 1115 define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) { 1116 ; CHECK-LABEL: test_pcmpeq_q_128: 1117 ; CHECK: ## BB#0: 1118 ; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1] 1119 ; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e] 1120 ; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e] 1121 ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1122 ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1123 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1124 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1125 ; CHECK-NEXT: retq ## encoding: [0xc3] 1126 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1) 1127 ret i8 %res 1128 } 1129 1130 define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { 1131 ; CHECK-LABEL: test_mask_pcmpeq_q_128: 1132 ; CHECK: ## BB#0: 1133 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1134 ; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1] 1135 ; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e] 1136 ; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e] 1137 ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1138 ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1139 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1140 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1141 ; CHECK-NEXT: retq ## encoding: [0xc3] 1142 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask) 1143 ret i8 %res 1144 } 1145 1146 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8) 1147 1148 define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) { 1149 ; CHECK-LABEL: test_pcmpgt_d_128: 1150 ; CHECK: ## BB#0: 1151 ; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1] 1152 ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1153 ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1154 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1155 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1156 ; CHECK-NEXT: retq ## encoding: [0xc3] 1157 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1) 1158 ret i8 %res 1159 } 1160 1161 define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 1162 ; CHECK-LABEL: test_mask_pcmpgt_d_128: 1163 ; CHECK: ## BB#0: 1164 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1165 ; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1] 1166 ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1167 ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1168 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1169 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1170 ; CHECK-NEXT: retq ## encoding: [0xc3] 1171 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask) 1172 ret i8 %res 1173 } 1174 1175 declare i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32>, <4 x i32>, i8) 1176 1177 define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) { 1178 ; CHECK-LABEL: test_pcmpgt_q_128: 1179 ; CHECK: ## BB#0: 1180 ; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1] 1181 ; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e] 1182 ; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e] 1183 ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1184 ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1185 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1186 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1187 ; CHECK-NEXT: retq ## encoding: [0xc3] 1188 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1) 1189 ret i8 %res 1190 } 1191 1192 define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { 1193 ; CHECK-LABEL: test_mask_pcmpgt_q_128: 1194 ; CHECK: ## BB#0: 1195 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1196 ; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1] 1197 ; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e] 1198 ; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e] 1199 ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] 1200 ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] 1201 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1202 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1203 ; CHECK-NEXT: retq ## encoding: [0xc3] 1204 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask) 1205 ret i8 %res 1206 } 1207 1208 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8) 1209 1210 declare <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 1211 1212 define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 1213 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_128: 1214 ; CHECK: ## BB#0: 1215 ; CHECK-NEXT: vunpckhpd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x15,0xd9] 1216 ; CHECK-NEXT: ## xmm3 = xmm0[1],xmm1[1] 1217 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1218 ; CHECK-NEXT: vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x15,0xd1] 1219 ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[1],xmm1[1] 1220 ; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc3] 1221 ; CHECK-NEXT: retq ## encoding: [0xc3] 1222 %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 1223 %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 1224 %res2 = fadd <2 x double> %res, %res1 1225 ret <2 x double> %res2 1226 } 1227 1228 declare <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 1229 1230 define <4 x double>@test_int_x86_avx512_mask_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1231 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_256: 1232 ; CHECK: ## BB#0: 1233 ; CHECK-NEXT: vunpckhpd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x15,0xd9] 1234 ; CHECK-NEXT: ## ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 1235 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1236 ; CHECK-NEXT: vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x15,0xd1] 1237 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 1238 ; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc3] 1239 ; CHECK-NEXT: retq ## encoding: [0xc3] 1240 %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1241 %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1242 %res2 = fadd <4 x double> %res, %res1 1243 ret <4 x double> %res2 1244 } 1245 1246 declare <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1247 1248 define <4 x float>@test_int_x86_avx512_mask_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1249 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_128: 1250 ; CHECK: ## BB#0: 1251 ; CHECK-NEXT: vunpckhps %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x15,0xd9] 1252 ; CHECK-NEXT: ## xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1253 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1254 ; CHECK-NEXT: vunpckhps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x15,0xd1] 1255 ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1256 ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc3] 1257 ; CHECK-NEXT: retq ## encoding: [0xc3] 1258 %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1259 %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1260 %res2 = fadd <4 x float> %res, %res1 1261 ret <4 x float> %res2 1262 } 1263 1264 declare <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1265 1266 define <8 x float>@test_int_x86_avx512_mask_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1267 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_256: 1268 ; CHECK: ## BB#0: 1269 ; CHECK-NEXT: vunpckhps %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x15,0xd9] 1270 ; CHECK-NEXT: ## ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1271 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1272 ; CHECK-NEXT: vunpckhps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x15,0xd1] 1273 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1274 ; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc3] 1275 ; CHECK-NEXT: retq ## encoding: [0xc3] 1276 %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1277 %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1278 %res2 = fadd <8 x float> %res, %res1 1279 ret <8 x float> %res2 1280 } 1281 1282 declare <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 1283 1284 define <2 x double>@test_int_x86_avx512_mask_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 1285 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_128: 1286 ; CHECK: ## BB#0: 1287 ; CHECK-NEXT: vunpcklpd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x14,0xd9] 1288 ; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0] 1289 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1290 ; CHECK-NEXT: vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x14,0xd1] 1291 ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[0] 1292 ; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc3] 1293 ; CHECK-NEXT: retq ## encoding: [0xc3] 1294 %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 1295 %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 1296 %res2 = fadd <2 x double> %res, %res1 1297 ret <2 x double> %res2 1298 } 1299 1300 declare <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 1301 1302 define <4 x double>@test_int_x86_avx512_mask_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1303 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_256: 1304 ; CHECK: ## BB#0: 1305 ; CHECK-NEXT: vunpcklpd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x14,0xd9] 1306 ; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 1307 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1308 ; CHECK-NEXT: vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x14,0xd1] 1309 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 1310 ; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc3] 1311 ; CHECK-NEXT: retq ## encoding: [0xc3] 1312 %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1313 %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1314 %res2 = fadd <4 x double> %res, %res1 1315 ret <4 x double> %res2 1316 } 1317 1318 declare <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1319 1320 define <4 x float>@test_int_x86_avx512_mask_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1321 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_128: 1322 ; CHECK: ## BB#0: 1323 ; CHECK-NEXT: vunpcklps %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x14,0xd9] 1324 ; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1325 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1326 ; CHECK-NEXT: vunpcklps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x14,0xd1] 1327 ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1328 ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc3] 1329 ; CHECK-NEXT: retq ## encoding: [0xc3] 1330 %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1331 %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1332 %res2 = fadd <4 x float> %res, %res1 1333 ret <4 x float> %res2 1334 } 1335 1336 declare <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1337 1338 define <8 x float>@test_int_x86_avx512_mask_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1339 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_256: 1340 ; CHECK: ## BB#0: 1341 ; CHECK-NEXT: vunpcklps %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x14,0xd9] 1342 ; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 1343 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1344 ; CHECK-NEXT: vunpcklps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x14,0xd1] 1345 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 1346 ; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc3] 1347 ; CHECK-NEXT: retq ## encoding: [0xc3] 1348 %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1349 %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1350 %res2 = fadd <8 x float> %res, %res1 1351 ret <8 x float> %res2 1352 } 1353 1354 declare <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1355 1356 define <4 x i32>@test_int_x86_avx512_mask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 1357 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_128: 1358 ; CHECK: ## BB#0: 1359 ; CHECK-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0x6a,0xd9] 1360 ; CHECK-NEXT: ## xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1361 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1362 ; CHECK-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6a,0xd1] 1363 ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1364 ; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc3] 1365 ; CHECK-NEXT: retq ## encoding: [0xc3] 1366 %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 1367 %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 1368 %res2 = add <4 x i32> %res, %res1 1369 ret <4 x i32> %res2 1370 } 1371 1372 declare <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1373 1374 define <4 x i32>@test_int_x86_avx512_mask_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 1375 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_128: 1376 ; CHECK: ## BB#0: 1377 ; CHECK-NEXT: vpunpckldq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0x62,0xd9] 1378 ; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1379 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1380 ; CHECK-NEXT: vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x62,0xd1] 1381 ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1382 ; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc3] 1383 ; CHECK-NEXT: retq ## encoding: [0xc3] 1384 %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 1385 %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 1386 %res2 = add <4 x i32> %res, %res1 1387 ret <4 x i32> %res2 1388 } 1389 1390 declare <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1391 1392 define <8 x i32>@test_int_x86_avx512_mask_punpckhd_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 1393 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_256: 1394 ; CHECK: ## BB#0: 1395 ; CHECK-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0x6a,0xd9] 1396 ; CHECK-NEXT: ## ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1397 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1398 ; CHECK-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6a,0xd1] 1399 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1400 ; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc3] 1401 ; CHECK-NEXT: retq ## encoding: [0xc3] 1402 %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 1403 %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 1404 %res2 = add <8 x i32> %res, %res1 1405 ret <8 x i32> %res2 1406 } 1407 1408 declare <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1409 1410 define <8 x i32>@test_int_x86_avx512_mask_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 1411 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_256: 1412 ; CHECK: ## BB#0: 1413 ; CHECK-NEXT: vpunpckldq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0x62,0xd9] 1414 ; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 1415 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1416 ; CHECK-NEXT: vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x62,0xd1] 1417 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 1418 ; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc3] 1419 ; CHECK-NEXT: retq ## encoding: [0xc3] 1420 %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 1421 %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 1422 %res2 = add <8 x i32> %res, %res1 1423 ret <8 x i32> %res2 1424 } 1425 1426 declare <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 1427 1428 define <2 x i64>@test_int_x86_avx512_mask_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 1429 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128: 1430 ; CHECK: ## BB#0: 1431 ; CHECK-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6d,0xd9] 1432 ; CHECK-NEXT: ## xmm3 = xmm0[1],xmm1[1] 1433 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1434 ; CHECK-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6d,0xd1] 1435 ; CHECK-NEXT: ## xmm2 = xmm0[1],xmm1[1] 1436 ; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc3] 1437 ; CHECK-NEXT: retq ## encoding: [0xc3] 1438 %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 1439 %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 1440 %res2 = add <2 x i64> %res, %res1 1441 ret <2 x i64> %res2 1442 } 1443 1444 declare <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 1445 1446 define <2 x i64>@test_int_x86_avx512_mask_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 1447 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128: 1448 ; CHECK: ## BB#0: 1449 ; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6c,0xd9] 1450 ; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0] 1451 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1452 ; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6c,0xd1] 1453 ; CHECK-NEXT: ## xmm2 = xmm0[0],xmm1[0] 1454 ; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc3] 1455 ; CHECK-NEXT: retq ## encoding: [0xc3] 1456 %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 1457 %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 1458 %res2 = add <2 x i64> %res, %res1 1459 ret <2 x i64> %res2 1460 } 1461 1462 declare <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 1463 1464 define <4 x i64>@test_int_x86_avx512_mask_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 1465 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256: 1466 ; CHECK: ## BB#0: 1467 ; CHECK-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6c,0xd9] 1468 ; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 1469 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1470 ; CHECK-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6c,0xd1] 1471 ; CHECK-NEXT: ## ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 1472 ; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc3] 1473 ; CHECK-NEXT: retq ## encoding: [0xc3] 1474 %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 1475 %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 1476 %res2 = add <4 x i64> %res, %res1 1477 ret <4 x i64> %res2 1478 } 1479 1480 declare <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 1481 1482 define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 1483 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256: 1484 ; CHECK: ## BB#0: 1485 ; CHECK-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6d,0xd9] 1486 ; CHECK-NEXT: ## ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 1487 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1488 ; CHECK-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6d,0xd1] 1489 ; CHECK-NEXT: ## ymm2 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 1490 ; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc3] 1491 ; CHECK-NEXT: retq ## encoding: [0xc3] 1492 %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 1493 %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 1494 %res2 = add <4 x i64> %res, %res1 1495 ret <4 x i64> %res2 1496 } 1497 1498 define <4 x i32> @test_mask_and_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 1499 ; CHECK-LABEL: test_mask_and_epi32_rr_128: 1500 ; CHECK: ## BB#0: 1501 ; CHECK-NEXT: vpandd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0xc1] 1502 ; CHECK-NEXT: retq ## encoding: [0xc3] 1503 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1504 ret <4 x i32> %res 1505 } 1506 1507 define <4 x i32> @test_mask_and_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 1508 ; CHECK-LABEL: test_mask_and_epi32_rrk_128: 1509 ; CHECK: ## BB#0: 1510 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1511 ; CHECK-NEXT: vpandd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1] 1512 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1513 ; CHECK-NEXT: retq ## encoding: [0xc3] 1514 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1515 ret <4 x i32> %res 1516 } 1517 1518 define <4 x i32> @test_mask_and_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 1519 ; CHECK-LABEL: test_mask_and_epi32_rrkz_128: 1520 ; CHECK: ## BB#0: 1521 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1522 ; CHECK-NEXT: vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1] 1523 ; CHECK-NEXT: retq ## encoding: [0xc3] 1524 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1525 ret <4 x i32> %res 1526 } 1527 1528 define <4 x i32> @test_mask_and_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 1529 ; CHECK-LABEL: test_mask_and_epi32_rm_128: 1530 ; CHECK: ## BB#0: 1531 ; CHECK-NEXT: vpandd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0x07] 1532 ; CHECK-NEXT: retq ## encoding: [0xc3] 1533 %b = load <4 x i32>, <4 x i32>* %ptr_b 1534 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1535 ret <4 x i32> %res 1536 } 1537 1538 define <4 x i32> @test_mask_and_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1539 ; CHECK-LABEL: test_mask_and_epi32_rmk_128: 1540 ; CHECK: ## BB#0: 1541 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1542 ; CHECK-NEXT: vpandd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x0f] 1543 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1544 ; CHECK-NEXT: retq ## encoding: [0xc3] 1545 %b = load <4 x i32>, <4 x i32>* %ptr_b 1546 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1547 ret <4 x i32> %res 1548 } 1549 1550 define <4 x i32> @test_mask_and_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 1551 ; CHECK-LABEL: test_mask_and_epi32_rmkz_128: 1552 ; CHECK: ## BB#0: 1553 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1554 ; CHECK-NEXT: vpandd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x07] 1555 ; CHECK-NEXT: retq ## encoding: [0xc3] 1556 %b = load <4 x i32>, <4 x i32>* %ptr_b 1557 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1558 ret <4 x i32> %res 1559 } 1560 1561 define <4 x i32> @test_mask_and_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 1562 ; CHECK-LABEL: test_mask_and_epi32_rmb_128: 1563 ; CHECK: ## BB#0: 1564 ; CHECK-NEXT: vpandd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x07] 1565 ; CHECK-NEXT: retq ## encoding: [0xc3] 1566 %q = load i32, i32* %ptr_b 1567 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1568 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1569 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1570 ret <4 x i32> %res 1571 } 1572 1573 define <4 x i32> @test_mask_and_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1574 ; CHECK-LABEL: test_mask_and_epi32_rmbk_128: 1575 ; CHECK: ## BB#0: 1576 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1577 ; CHECK-NEXT: vpandd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x0f] 1578 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1579 ; CHECK-NEXT: retq ## encoding: [0xc3] 1580 %q = load i32, i32* %ptr_b 1581 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1582 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1583 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1584 ret <4 x i32> %res 1585 } 1586 1587 define <4 x i32> @test_mask_and_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 1588 ; CHECK-LABEL: test_mask_and_epi32_rmbkz_128: 1589 ; CHECK: ## BB#0: 1590 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1591 ; CHECK-NEXT: vpandd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x07] 1592 ; CHECK-NEXT: retq ## encoding: [0xc3] 1593 %q = load i32, i32* %ptr_b 1594 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1595 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1596 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1597 ret <4 x i32> %res 1598 } 1599 1600 declare <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1601 1602 define <8 x i32> @test_mask_and_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 1603 ; CHECK-LABEL: test_mask_and_epi32_rr_256: 1604 ; CHECK: ## BB#0: 1605 ; CHECK-NEXT: vpandd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0xc1] 1606 ; CHECK-NEXT: retq ## encoding: [0xc3] 1607 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1608 ret <8 x i32> %res 1609 } 1610 1611 define <8 x i32> @test_mask_and_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 1612 ; CHECK-LABEL: test_mask_and_epi32_rrk_256: 1613 ; CHECK: ## BB#0: 1614 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1615 ; CHECK-NEXT: vpandd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1] 1616 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1617 ; CHECK-NEXT: retq ## encoding: [0xc3] 1618 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1619 ret <8 x i32> %res 1620 } 1621 1622 define <8 x i32> @test_mask_and_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 1623 ; CHECK-LABEL: test_mask_and_epi32_rrkz_256: 1624 ; CHECK: ## BB#0: 1625 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1626 ; CHECK-NEXT: vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1] 1627 ; CHECK-NEXT: retq ## encoding: [0xc3] 1628 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1629 ret <8 x i32> %res 1630 } 1631 1632 define <8 x i32> @test_mask_and_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 1633 ; CHECK-LABEL: test_mask_and_epi32_rm_256: 1634 ; CHECK: ## BB#0: 1635 ; CHECK-NEXT: vpandd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0x07] 1636 ; CHECK-NEXT: retq ## encoding: [0xc3] 1637 %b = load <8 x i32>, <8 x i32>* %ptr_b 1638 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1639 ret <8 x i32> %res 1640 } 1641 1642 define <8 x i32> @test_mask_and_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1643 ; CHECK-LABEL: test_mask_and_epi32_rmk_256: 1644 ; CHECK: ## BB#0: 1645 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1646 ; CHECK-NEXT: vpandd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x0f] 1647 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1648 ; CHECK-NEXT: retq ## encoding: [0xc3] 1649 %b = load <8 x i32>, <8 x i32>* %ptr_b 1650 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1651 ret <8 x i32> %res 1652 } 1653 1654 define <8 x i32> @test_mask_and_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 1655 ; CHECK-LABEL: test_mask_and_epi32_rmkz_256: 1656 ; CHECK: ## BB#0: 1657 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1658 ; CHECK-NEXT: vpandd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x07] 1659 ; CHECK-NEXT: retq ## encoding: [0xc3] 1660 %b = load <8 x i32>, <8 x i32>* %ptr_b 1661 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1662 ret <8 x i32> %res 1663 } 1664 1665 define <8 x i32> @test_mask_and_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 1666 ; CHECK-LABEL: test_mask_and_epi32_rmb_256: 1667 ; CHECK: ## BB#0: 1668 ; CHECK-NEXT: vpandd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x07] 1669 ; CHECK-NEXT: retq ## encoding: [0xc3] 1670 %q = load i32, i32* %ptr_b 1671 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1672 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1673 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1674 ret <8 x i32> %res 1675 } 1676 1677 define <8 x i32> @test_mask_and_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1678 ; CHECK-LABEL: test_mask_and_epi32_rmbk_256: 1679 ; CHECK: ## BB#0: 1680 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1681 ; CHECK-NEXT: vpandd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x0f] 1682 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1683 ; CHECK-NEXT: retq ## encoding: [0xc3] 1684 %q = load i32, i32* %ptr_b 1685 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1686 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1687 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1688 ret <8 x i32> %res 1689 } 1690 1691 define <8 x i32> @test_mask_and_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 1692 ; CHECK-LABEL: test_mask_and_epi32_rmbkz_256: 1693 ; CHECK: ## BB#0: 1694 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1695 ; CHECK-NEXT: vpandd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x07] 1696 ; CHECK-NEXT: retq ## encoding: [0xc3] 1697 %q = load i32, i32* %ptr_b 1698 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1699 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1700 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1701 ret <8 x i32> %res 1702 } 1703 1704 declare <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1705 1706 define <4 x i32> @test_mask_or_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 1707 ; CHECK-LABEL: test_mask_or_epi32_rr_128: 1708 ; CHECK: ## BB#0: 1709 ; CHECK-NEXT: vpord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0xc1] 1710 ; CHECK-NEXT: retq ## encoding: [0xc3] 1711 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1712 ret <4 x i32> %res 1713 } 1714 1715 define <4 x i32> @test_mask_or_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 1716 ; CHECK-LABEL: test_mask_or_epi32_rrk_128: 1717 ; CHECK: ## BB#0: 1718 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1719 ; CHECK-NEXT: vpord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1] 1720 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1721 ; CHECK-NEXT: retq ## encoding: [0xc3] 1722 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1723 ret <4 x i32> %res 1724 } 1725 1726 define <4 x i32> @test_mask_or_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 1727 ; CHECK-LABEL: test_mask_or_epi32_rrkz_128: 1728 ; CHECK: ## BB#0: 1729 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1730 ; CHECK-NEXT: vpord %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1] 1731 ; CHECK-NEXT: retq ## encoding: [0xc3] 1732 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1733 ret <4 x i32> %res 1734 } 1735 1736 define <4 x i32> @test_mask_or_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 1737 ; CHECK-LABEL: test_mask_or_epi32_rm_128: 1738 ; CHECK: ## BB#0: 1739 ; CHECK-NEXT: vpord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0x07] 1740 ; CHECK-NEXT: retq ## encoding: [0xc3] 1741 %b = load <4 x i32>, <4 x i32>* %ptr_b 1742 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1743 ret <4 x i32> %res 1744 } 1745 1746 define <4 x i32> @test_mask_or_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1747 ; CHECK-LABEL: test_mask_or_epi32_rmk_128: 1748 ; CHECK: ## BB#0: 1749 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1750 ; CHECK-NEXT: vpord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x0f] 1751 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1752 ; CHECK-NEXT: retq ## encoding: [0xc3] 1753 %b = load <4 x i32>, <4 x i32>* %ptr_b 1754 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1755 ret <4 x i32> %res 1756 } 1757 1758 define <4 x i32> @test_mask_or_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 1759 ; CHECK-LABEL: test_mask_or_epi32_rmkz_128: 1760 ; CHECK: ## BB#0: 1761 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1762 ; CHECK-NEXT: vpord (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x07] 1763 ; CHECK-NEXT: retq ## encoding: [0xc3] 1764 %b = load <4 x i32>, <4 x i32>* %ptr_b 1765 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1766 ret <4 x i32> %res 1767 } 1768 1769 define <4 x i32> @test_mask_or_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 1770 ; CHECK-LABEL: test_mask_or_epi32_rmb_128: 1771 ; CHECK: ## BB#0: 1772 ; CHECK-NEXT: vpord (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x07] 1773 ; CHECK-NEXT: retq ## encoding: [0xc3] 1774 %q = load i32, i32* %ptr_b 1775 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1776 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1777 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1778 ret <4 x i32> %res 1779 } 1780 1781 define <4 x i32> @test_mask_or_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1782 ; CHECK-LABEL: test_mask_or_epi32_rmbk_128: 1783 ; CHECK: ## BB#0: 1784 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1785 ; CHECK-NEXT: vpord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x0f] 1786 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1787 ; CHECK-NEXT: retq ## encoding: [0xc3] 1788 %q = load i32, i32* %ptr_b 1789 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1790 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1791 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1792 ret <4 x i32> %res 1793 } 1794 1795 define <4 x i32> @test_mask_or_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 1796 ; CHECK-LABEL: test_mask_or_epi32_rmbkz_128: 1797 ; CHECK: ## BB#0: 1798 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1799 ; CHECK-NEXT: vpord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x07] 1800 ; CHECK-NEXT: retq ## encoding: [0xc3] 1801 %q = load i32, i32* %ptr_b 1802 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1803 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1804 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1805 ret <4 x i32> %res 1806 } 1807 1808 declare <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1809 1810 define <8 x i32> @test_mask_or_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 1811 ; CHECK-LABEL: test_mask_or_epi32_rr_256: 1812 ; CHECK: ## BB#0: 1813 ; CHECK-NEXT: vpord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0xc1] 1814 ; CHECK-NEXT: retq ## encoding: [0xc3] 1815 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1816 ret <8 x i32> %res 1817 } 1818 1819 define <8 x i32> @test_mask_or_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 1820 ; CHECK-LABEL: test_mask_or_epi32_rrk_256: 1821 ; CHECK: ## BB#0: 1822 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1823 ; CHECK-NEXT: vpord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1] 1824 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1825 ; CHECK-NEXT: retq ## encoding: [0xc3] 1826 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1827 ret <8 x i32> %res 1828 } 1829 1830 define <8 x i32> @test_mask_or_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 1831 ; CHECK-LABEL: test_mask_or_epi32_rrkz_256: 1832 ; CHECK: ## BB#0: 1833 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1834 ; CHECK-NEXT: vpord %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1] 1835 ; CHECK-NEXT: retq ## encoding: [0xc3] 1836 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1837 ret <8 x i32> %res 1838 } 1839 1840 define <8 x i32> @test_mask_or_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 1841 ; CHECK-LABEL: test_mask_or_epi32_rm_256: 1842 ; CHECK: ## BB#0: 1843 ; CHECK-NEXT: vpord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0x07] 1844 ; CHECK-NEXT: retq ## encoding: [0xc3] 1845 %b = load <8 x i32>, <8 x i32>* %ptr_b 1846 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1847 ret <8 x i32> %res 1848 } 1849 1850 define <8 x i32> @test_mask_or_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1851 ; CHECK-LABEL: test_mask_or_epi32_rmk_256: 1852 ; CHECK: ## BB#0: 1853 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1854 ; CHECK-NEXT: vpord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x0f] 1855 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1856 ; CHECK-NEXT: retq ## encoding: [0xc3] 1857 %b = load <8 x i32>, <8 x i32>* %ptr_b 1858 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1859 ret <8 x i32> %res 1860 } 1861 1862 define <8 x i32> @test_mask_or_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 1863 ; CHECK-LABEL: test_mask_or_epi32_rmkz_256: 1864 ; CHECK: ## BB#0: 1865 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1866 ; CHECK-NEXT: vpord (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x07] 1867 ; CHECK-NEXT: retq ## encoding: [0xc3] 1868 %b = load <8 x i32>, <8 x i32>* %ptr_b 1869 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1870 ret <8 x i32> %res 1871 } 1872 1873 define <8 x i32> @test_mask_or_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 1874 ; CHECK-LABEL: test_mask_or_epi32_rmb_256: 1875 ; CHECK: ## BB#0: 1876 ; CHECK-NEXT: vpord (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x07] 1877 ; CHECK-NEXT: retq ## encoding: [0xc3] 1878 %q = load i32, i32* %ptr_b 1879 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1880 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1881 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1882 ret <8 x i32> %res 1883 } 1884 1885 define <8 x i32> @test_mask_or_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1886 ; CHECK-LABEL: test_mask_or_epi32_rmbk_256: 1887 ; CHECK: ## BB#0: 1888 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1889 ; CHECK-NEXT: vpord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x0f] 1890 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1891 ; CHECK-NEXT: retq ## encoding: [0xc3] 1892 %q = load i32, i32* %ptr_b 1893 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1894 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1895 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1896 ret <8 x i32> %res 1897 } 1898 1899 define <8 x i32> @test_mask_or_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 1900 ; CHECK-LABEL: test_mask_or_epi32_rmbkz_256: 1901 ; CHECK: ## BB#0: 1902 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1903 ; CHECK-NEXT: vpord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x07] 1904 ; CHECK-NEXT: retq ## encoding: [0xc3] 1905 %q = load i32, i32* %ptr_b 1906 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1907 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1908 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1909 ret <8 x i32> %res 1910 } 1911 1912 declare <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1913 1914 define <4 x i32> @test_mask_xor_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 1915 ; CHECK-LABEL: test_mask_xor_epi32_rr_128: 1916 ; CHECK: ## BB#0: 1917 ; CHECK-NEXT: vpxord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0xc1] 1918 ; CHECK-NEXT: retq ## encoding: [0xc3] 1919 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1920 ret <4 x i32> %res 1921 } 1922 1923 define <4 x i32> @test_mask_xor_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 1924 ; CHECK-LABEL: test_mask_xor_epi32_rrk_128: 1925 ; CHECK: ## BB#0: 1926 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1927 ; CHECK-NEXT: vpxord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1] 1928 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1929 ; CHECK-NEXT: retq ## encoding: [0xc3] 1930 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1931 ret <4 x i32> %res 1932 } 1933 1934 define <4 x i32> @test_mask_xor_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 1935 ; CHECK-LABEL: test_mask_xor_epi32_rrkz_128: 1936 ; CHECK: ## BB#0: 1937 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1938 ; CHECK-NEXT: vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1] 1939 ; CHECK-NEXT: retq ## encoding: [0xc3] 1940 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1941 ret <4 x i32> %res 1942 } 1943 1944 define <4 x i32> @test_mask_xor_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 1945 ; CHECK-LABEL: test_mask_xor_epi32_rm_128: 1946 ; CHECK: ## BB#0: 1947 ; CHECK-NEXT: vpxord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0x07] 1948 ; CHECK-NEXT: retq ## encoding: [0xc3] 1949 %b = load <4 x i32>, <4 x i32>* %ptr_b 1950 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1951 ret <4 x i32> %res 1952 } 1953 1954 define <4 x i32> @test_mask_xor_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1955 ; CHECK-LABEL: test_mask_xor_epi32_rmk_128: 1956 ; CHECK: ## BB#0: 1957 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1958 ; CHECK-NEXT: vpxord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0x0f] 1959 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1960 ; CHECK-NEXT: retq ## encoding: [0xc3] 1961 %b = load <4 x i32>, <4 x i32>* %ptr_b 1962 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1963 ret <4 x i32> %res 1964 } 1965 1966 define <4 x i32> @test_mask_xor_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 1967 ; CHECK-LABEL: test_mask_xor_epi32_rmkz_128: 1968 ; CHECK: ## BB#0: 1969 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1970 ; CHECK-NEXT: vpxord (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xef,0x07] 1971 ; CHECK-NEXT: retq ## encoding: [0xc3] 1972 %b = load <4 x i32>, <4 x i32>* %ptr_b 1973 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1974 ret <4 x i32> %res 1975 } 1976 1977 define <4 x i32> @test_mask_xor_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 1978 ; CHECK-LABEL: test_mask_xor_epi32_rmb_128: 1979 ; CHECK: ## BB#0: 1980 ; CHECK-NEXT: vpxord (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xef,0x07] 1981 ; CHECK-NEXT: retq ## encoding: [0xc3] 1982 %q = load i32, i32* %ptr_b 1983 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1984 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1985 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1986 ret <4 x i32> %res 1987 } 1988 1989 define <4 x i32> @test_mask_xor_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1990 ; CHECK-LABEL: test_mask_xor_epi32_rmbk_128: 1991 ; CHECK: ## BB#0: 1992 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1993 ; CHECK-NEXT: vpxord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xef,0x0f] 1994 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1995 ; CHECK-NEXT: retq ## encoding: [0xc3] 1996 %q = load i32, i32* %ptr_b 1997 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1998 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1999 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 2000 ret <4 x i32> %res 2001 } 2002 2003 define <4 x i32> @test_mask_xor_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 2004 ; CHECK-LABEL: test_mask_xor_epi32_rmbkz_128: 2005 ; CHECK: ## BB#0: 2006 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2007 ; CHECK-NEXT: vpxord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07] 2008 ; CHECK-NEXT: retq ## encoding: [0xc3] 2009 %q = load i32, i32* %ptr_b 2010 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2011 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2012 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 2013 ret <4 x i32> %res 2014 } 2015 2016 declare <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2017 2018 define <8 x i32> @test_mask_xor_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 2019 ; CHECK-LABEL: test_mask_xor_epi32_rr_256: 2020 ; CHECK: ## BB#0: 2021 ; CHECK-NEXT: vpxord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0xc1] 2022 ; CHECK-NEXT: retq ## encoding: [0xc3] 2023 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 2024 ret <8 x i32> %res 2025 } 2026 2027 define <8 x i32> @test_mask_xor_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 2028 ; CHECK-LABEL: test_mask_xor_epi32_rrk_256: 2029 ; CHECK: ## BB#0: 2030 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2031 ; CHECK-NEXT: vpxord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1] 2032 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2033 ; CHECK-NEXT: retq ## encoding: [0xc3] 2034 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 2035 ret <8 x i32> %res 2036 } 2037 2038 define <8 x i32> @test_mask_xor_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 2039 ; CHECK-LABEL: test_mask_xor_epi32_rrkz_256: 2040 ; CHECK: ## BB#0: 2041 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2042 ; CHECK-NEXT: vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1] 2043 ; CHECK-NEXT: retq ## encoding: [0xc3] 2044 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 2045 ret <8 x i32> %res 2046 } 2047 2048 define <8 x i32> @test_mask_xor_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 2049 ; CHECK-LABEL: test_mask_xor_epi32_rm_256: 2050 ; CHECK: ## BB#0: 2051 ; CHECK-NEXT: vpxord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0x07] 2052 ; CHECK-NEXT: retq ## encoding: [0xc3] 2053 %b = load <8 x i32>, <8 x i32>* %ptr_b 2054 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 2055 ret <8 x i32> %res 2056 } 2057 2058 define <8 x i32> @test_mask_xor_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 2059 ; CHECK-LABEL: test_mask_xor_epi32_rmk_256: 2060 ; CHECK: ## BB#0: 2061 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2062 ; CHECK-NEXT: vpxord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0x0f] 2063 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2064 ; CHECK-NEXT: retq ## encoding: [0xc3] 2065 %b = load <8 x i32>, <8 x i32>* %ptr_b 2066 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 2067 ret <8 x i32> %res 2068 } 2069 2070 define <8 x i32> @test_mask_xor_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 2071 ; CHECK-LABEL: test_mask_xor_epi32_rmkz_256: 2072 ; CHECK: ## BB#0: 2073 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2074 ; CHECK-NEXT: vpxord (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x07] 2075 ; CHECK-NEXT: retq ## encoding: [0xc3] 2076 %b = load <8 x i32>, <8 x i32>* %ptr_b 2077 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 2078 ret <8 x i32> %res 2079 } 2080 2081 define <8 x i32> @test_mask_xor_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 2082 ; CHECK-LABEL: test_mask_xor_epi32_rmb_256: 2083 ; CHECK: ## BB#0: 2084 ; CHECK-NEXT: vpxord (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xef,0x07] 2085 ; CHECK-NEXT: retq ## encoding: [0xc3] 2086 %q = load i32, i32* %ptr_b 2087 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2088 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2089 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 2090 ret <8 x i32> %res 2091 } 2092 2093 define <8 x i32> @test_mask_xor_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 2094 ; CHECK-LABEL: test_mask_xor_epi32_rmbk_256: 2095 ; CHECK: ## BB#0: 2096 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2097 ; CHECK-NEXT: vpxord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xef,0x0f] 2098 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2099 ; CHECK-NEXT: retq ## encoding: [0xc3] 2100 %q = load i32, i32* %ptr_b 2101 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2102 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2103 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 2104 ret <8 x i32> %res 2105 } 2106 2107 define <8 x i32> @test_mask_xor_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 2108 ; CHECK-LABEL: test_mask_xor_epi32_rmbkz_256: 2109 ; CHECK: ## BB#0: 2110 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2111 ; CHECK-NEXT: vpxord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x07] 2112 ; CHECK-NEXT: retq ## encoding: [0xc3] 2113 %q = load i32, i32* %ptr_b 2114 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2115 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2116 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 2117 ret <8 x i32> %res 2118 } 2119 2120 declare <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2121 2122 define <4 x i32> @test_mask_andnot_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 2123 ; CHECK-LABEL: test_mask_andnot_epi32_rr_128: 2124 ; CHECK: ## BB#0: 2125 ; CHECK-NEXT: vpandnd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdf,0xc1] 2126 ; CHECK-NEXT: retq ## encoding: [0xc3] 2127 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 2128 ret <4 x i32> %res 2129 } 2130 2131 define <4 x i32> @test_mask_andnot_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 2132 ; CHECK-LABEL: test_mask_andnot_epi32_rrk_128: 2133 ; CHECK: ## BB#0: 2134 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2135 ; CHECK-NEXT: vpandnd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1] 2136 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2137 ; CHECK-NEXT: retq ## encoding: [0xc3] 2138 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 2139 ret <4 x i32> %res 2140 } 2141 2142 define <4 x i32> @test_mask_andnot_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 2143 ; CHECK-LABEL: test_mask_andnot_epi32_rrkz_128: 2144 ; CHECK: ## BB#0: 2145 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2146 ; CHECK-NEXT: vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdf,0xc1] 2147 ; CHECK-NEXT: retq ## encoding: [0xc3] 2148 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 2149 ret <4 x i32> %res 2150 } 2151 2152 define <4 x i32> @test_mask_andnot_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 2153 ; CHECK-LABEL: test_mask_andnot_epi32_rm_128: 2154 ; CHECK: ## BB#0: 2155 ; CHECK-NEXT: vpandnd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdf,0x07] 2156 ; CHECK-NEXT: retq ## encoding: [0xc3] 2157 %b = load <4 x i32>, <4 x i32>* %ptr_b 2158 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 2159 ret <4 x i32> %res 2160 } 2161 2162 define <4 x i32> @test_mask_andnot_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 2163 ; CHECK-LABEL: test_mask_andnot_epi32_rmk_128: 2164 ; CHECK: ## BB#0: 2165 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2166 ; CHECK-NEXT: vpandnd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x0f] 2167 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2168 ; CHECK-NEXT: retq ## encoding: [0xc3] 2169 %b = load <4 x i32>, <4 x i32>* %ptr_b 2170 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 2171 ret <4 x i32> %res 2172 } 2173 2174 define <4 x i32> @test_mask_andnot_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 2175 ; CHECK-LABEL: test_mask_andnot_epi32_rmkz_128: 2176 ; CHECK: ## BB#0: 2177 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2178 ; CHECK-NEXT: vpandnd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdf,0x07] 2179 ; CHECK-NEXT: retq ## encoding: [0xc3] 2180 %b = load <4 x i32>, <4 x i32>* %ptr_b 2181 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 2182 ret <4 x i32> %res 2183 } 2184 2185 define <4 x i32> @test_mask_andnot_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 2186 ; CHECK-LABEL: test_mask_andnot_epi32_rmb_128: 2187 ; CHECK: ## BB#0: 2188 ; CHECK-NEXT: vpandnd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xdf,0x07] 2189 ; CHECK-NEXT: retq ## encoding: [0xc3] 2190 %q = load i32, i32* %ptr_b 2191 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2192 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2193 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 2194 ret <4 x i32> %res 2195 } 2196 2197 define <4 x i32> @test_mask_andnot_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 2198 ; CHECK-LABEL: test_mask_andnot_epi32_rmbk_128: 2199 ; CHECK: ## BB#0: 2200 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2201 ; CHECK-NEXT: vpandnd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x0f] 2202 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2203 ; CHECK-NEXT: retq ## encoding: [0xc3] 2204 %q = load i32, i32* %ptr_b 2205 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2206 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2207 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 2208 ret <4 x i32> %res 2209 } 2210 2211 define <4 x i32> @test_mask_andnot_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 2212 ; CHECK-LABEL: test_mask_andnot_epi32_rmbkz_128: 2213 ; CHECK: ## BB#0: 2214 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2215 ; CHECK-NEXT: vpandnd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xdf,0x07] 2216 ; CHECK-NEXT: retq ## encoding: [0xc3] 2217 %q = load i32, i32* %ptr_b 2218 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2219 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2220 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 2221 ret <4 x i32> %res 2222 } 2223 2224 declare <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2225 2226 define <8 x i32> @test_mask_andnot_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 2227 ; CHECK-LABEL: test_mask_andnot_epi32_rr_256: 2228 ; CHECK: ## BB#0: 2229 ; CHECK-NEXT: vpandnd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdf,0xc1] 2230 ; CHECK-NEXT: retq ## encoding: [0xc3] 2231 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 2232 ret <8 x i32> %res 2233 } 2234 2235 define <8 x i32> @test_mask_andnot_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 2236 ; CHECK-LABEL: test_mask_andnot_epi32_rrk_256: 2237 ; CHECK: ## BB#0: 2238 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2239 ; CHECK-NEXT: vpandnd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1] 2240 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2241 ; CHECK-NEXT: retq ## encoding: [0xc3] 2242 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 2243 ret <8 x i32> %res 2244 } 2245 2246 define <8 x i32> @test_mask_andnot_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 2247 ; CHECK-LABEL: test_mask_andnot_epi32_rrkz_256: 2248 ; CHECK: ## BB#0: 2249 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2250 ; CHECK-NEXT: vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0xc1] 2251 ; CHECK-NEXT: retq ## encoding: [0xc3] 2252 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 2253 ret <8 x i32> %res 2254 } 2255 2256 define <8 x i32> @test_mask_andnot_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 2257 ; CHECK-LABEL: test_mask_andnot_epi32_rm_256: 2258 ; CHECK: ## BB#0: 2259 ; CHECK-NEXT: vpandnd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdf,0x07] 2260 ; CHECK-NEXT: retq ## encoding: [0xc3] 2261 %b = load <8 x i32>, <8 x i32>* %ptr_b 2262 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 2263 ret <8 x i32> %res 2264 } 2265 2266 define <8 x i32> @test_mask_andnot_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 2267 ; CHECK-LABEL: test_mask_andnot_epi32_rmk_256: 2268 ; CHECK: ## BB#0: 2269 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2270 ; CHECK-NEXT: vpandnd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x0f] 2271 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2272 ; CHECK-NEXT: retq ## encoding: [0xc3] 2273 %b = load <8 x i32>, <8 x i32>* %ptr_b 2274 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 2275 ret <8 x i32> %res 2276 } 2277 2278 define <8 x i32> @test_mask_andnot_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 2279 ; CHECK-LABEL: test_mask_andnot_epi32_rmkz_256: 2280 ; CHECK: ## BB#0: 2281 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2282 ; CHECK-NEXT: vpandnd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0x07] 2283 ; CHECK-NEXT: retq ## encoding: [0xc3] 2284 %b = load <8 x i32>, <8 x i32>* %ptr_b 2285 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 2286 ret <8 x i32> %res 2287 } 2288 2289 define <8 x i32> @test_mask_andnot_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 2290 ; CHECK-LABEL: test_mask_andnot_epi32_rmb_256: 2291 ; CHECK: ## BB#0: 2292 ; CHECK-NEXT: vpandnd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xdf,0x07] 2293 ; CHECK-NEXT: retq ## encoding: [0xc3] 2294 %q = load i32, i32* %ptr_b 2295 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2296 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2297 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 2298 ret <8 x i32> %res 2299 } 2300 2301 define <8 x i32> @test_mask_andnot_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 2302 ; CHECK-LABEL: test_mask_andnot_epi32_rmbk_256: 2303 ; CHECK: ## BB#0: 2304 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2305 ; CHECK-NEXT: vpandnd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x0f] 2306 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2307 ; CHECK-NEXT: retq ## encoding: [0xc3] 2308 %q = load i32, i32* %ptr_b 2309 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2310 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2311 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 2312 ret <8 x i32> %res 2313 } 2314 2315 define <8 x i32> @test_mask_andnot_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 2316 ; CHECK-LABEL: test_mask_andnot_epi32_rmbkz_256: 2317 ; CHECK: ## BB#0: 2318 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2319 ; CHECK-NEXT: vpandnd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xdf,0x07] 2320 ; CHECK-NEXT: retq ## encoding: [0xc3] 2321 %q = load i32, i32* %ptr_b 2322 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2323 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2324 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 2325 ret <8 x i32> %res 2326 } 2327 2328 declare <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2329 2330 define <2 x i64> @test_mask_andnot_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) { 2331 ; CHECK-LABEL: test_mask_andnot_epi64_rr_128: 2332 ; CHECK: ## BB#0: 2333 ; CHECK-NEXT: vpandnq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0xc1] 2334 ; CHECK-NEXT: retq ## encoding: [0xc3] 2335 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 2336 ret <2 x i64> %res 2337 } 2338 2339 define <2 x i64> @test_mask_andnot_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) { 2340 ; CHECK-LABEL: test_mask_andnot_epi64_rrk_128: 2341 ; CHECK: ## BB#0: 2342 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2343 ; CHECK-NEXT: vpandnq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1] 2344 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2345 ; CHECK-NEXT: retq ## encoding: [0xc3] 2346 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 2347 ret <2 x i64> %res 2348 } 2349 2350 define <2 x i64> @test_mask_andnot_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { 2351 ; CHECK-LABEL: test_mask_andnot_epi64_rrkz_128: 2352 ; CHECK: ## BB#0: 2353 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2354 ; CHECK-NEXT: vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1] 2355 ; CHECK-NEXT: retq ## encoding: [0xc3] 2356 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 2357 ret <2 x i64> %res 2358 } 2359 2360 define <2 x i64> @test_mask_andnot_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) { 2361 ; CHECK-LABEL: test_mask_andnot_epi64_rm_128: 2362 ; CHECK: ## BB#0: 2363 ; CHECK-NEXT: vpandnq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0x07] 2364 ; CHECK-NEXT: retq ## encoding: [0xc3] 2365 %b = load <2 x i64>, <2 x i64>* %ptr_b 2366 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 2367 ret <2 x i64> %res 2368 } 2369 2370 define <2 x i64> @test_mask_andnot_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) { 2371 ; CHECK-LABEL: test_mask_andnot_epi64_rmk_128: 2372 ; CHECK: ## BB#0: 2373 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2374 ; CHECK-NEXT: vpandnq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x0f] 2375 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2376 ; CHECK-NEXT: retq ## encoding: [0xc3] 2377 %b = load <2 x i64>, <2 x i64>* %ptr_b 2378 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 2379 ret <2 x i64> %res 2380 } 2381 2382 define <2 x i64> @test_mask_andnot_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) { 2383 ; CHECK-LABEL: test_mask_andnot_epi64_rmkz_128: 2384 ; CHECK: ## BB#0: 2385 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2386 ; CHECK-NEXT: vpandnq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xdf,0x07] 2387 ; CHECK-NEXT: retq ## encoding: [0xc3] 2388 %b = load <2 x i64>, <2 x i64>* %ptr_b 2389 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 2390 ret <2 x i64> %res 2391 } 2392 2393 define <2 x i64> @test_mask_andnot_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) { 2394 ; CHECK-LABEL: test_mask_andnot_epi64_rmb_128: 2395 ; CHECK: ## BB#0: 2396 ; CHECK-NEXT: vpandnq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0xdf,0x07] 2397 ; CHECK-NEXT: retq ## encoding: [0xc3] 2398 %q = load i64, i64* %ptr_b 2399 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 2400 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 2401 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 2402 ret <2 x i64> %res 2403 } 2404 2405 define <2 x i64> @test_mask_andnot_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) { 2406 ; CHECK-LABEL: test_mask_andnot_epi64_rmbk_128: 2407 ; CHECK: ## BB#0: 2408 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2409 ; CHECK-NEXT: vpandnq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0xdf,0x0f] 2410 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2411 ; CHECK-NEXT: retq ## encoding: [0xc3] 2412 %q = load i64, i64* %ptr_b 2413 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 2414 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 2415 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 2416 ret <2 x i64> %res 2417 } 2418 2419 define <2 x i64> @test_mask_andnot_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) { 2420 ; CHECK-LABEL: test_mask_andnot_epi64_rmbkz_128: 2421 ; CHECK: ## BB#0: 2422 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2423 ; CHECK-NEXT: vpandnq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0xdf,0x07] 2424 ; CHECK-NEXT: retq ## encoding: [0xc3] 2425 %q = load i64, i64* %ptr_b 2426 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 2427 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 2428 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 2429 ret <2 x i64> %res 2430 } 2431 2432 declare <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 2433 2434 define <4 x i64> @test_mask_andnot_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) { 2435 ; CHECK-LABEL: test_mask_andnot_epi64_rr_256: 2436 ; CHECK: ## BB#0: 2437 ; CHECK-NEXT: vpandnq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0xc1] 2438 ; CHECK-NEXT: retq ## encoding: [0xc3] 2439 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 2440 ret <4 x i64> %res 2441 } 2442 2443 define <4 x i64> @test_mask_andnot_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) { 2444 ; CHECK-LABEL: test_mask_andnot_epi64_rrk_256: 2445 ; CHECK: ## BB#0: 2446 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2447 ; CHECK-NEXT: vpandnq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1] 2448 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2449 ; CHECK-NEXT: retq ## encoding: [0xc3] 2450 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 2451 ret <4 x i64> %res 2452 } 2453 2454 define <4 x i64> @test_mask_andnot_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { 2455 ; CHECK-LABEL: test_mask_andnot_epi64_rrkz_256: 2456 ; CHECK: ## BB#0: 2457 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2458 ; CHECK-NEXT: vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1] 2459 ; CHECK-NEXT: retq ## encoding: [0xc3] 2460 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 2461 ret <4 x i64> %res 2462 } 2463 2464 define <4 x i64> @test_mask_andnot_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) { 2465 ; CHECK-LABEL: test_mask_andnot_epi64_rm_256: 2466 ; CHECK: ## BB#0: 2467 ; CHECK-NEXT: vpandnq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0x07] 2468 ; CHECK-NEXT: retq ## encoding: [0xc3] 2469 %b = load <4 x i64>, <4 x i64>* %ptr_b 2470 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 2471 ret <4 x i64> %res 2472 } 2473 2474 define <4 x i64> @test_mask_andnot_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) { 2475 ; CHECK-LABEL: test_mask_andnot_epi64_rmk_256: 2476 ; CHECK: ## BB#0: 2477 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2478 ; CHECK-NEXT: vpandnq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x0f] 2479 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2480 ; CHECK-NEXT: retq ## encoding: [0xc3] 2481 %b = load <4 x i64>, <4 x i64>* %ptr_b 2482 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 2483 ret <4 x i64> %res 2484 } 2485 2486 define <4 x i64> @test_mask_andnot_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) { 2487 ; CHECK-LABEL: test_mask_andnot_epi64_rmkz_256: 2488 ; CHECK: ## BB#0: 2489 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2490 ; CHECK-NEXT: vpandnq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0x07] 2491 ; CHECK-NEXT: retq ## encoding: [0xc3] 2492 %b = load <4 x i64>, <4 x i64>* %ptr_b 2493 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 2494 ret <4 x i64> %res 2495 } 2496 2497 define <4 x i64> @test_mask_andnot_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) { 2498 ; CHECK-LABEL: test_mask_andnot_epi64_rmb_256: 2499 ; CHECK: ## BB#0: 2500 ; CHECK-NEXT: vpandnq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xdf,0x07] 2501 ; CHECK-NEXT: retq ## encoding: [0xc3] 2502 %q = load i64, i64* %ptr_b 2503 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 2504 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 2505 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 2506 ret <4 x i64> %res 2507 } 2508 2509 define <4 x i64> @test_mask_andnot_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) { 2510 ; CHECK-LABEL: test_mask_andnot_epi64_rmbk_256: 2511 ; CHECK: ## BB#0: 2512 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2513 ; CHECK-NEXT: vpandnq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0xdf,0x0f] 2514 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2515 ; CHECK-NEXT: retq ## encoding: [0xc3] 2516 %q = load i64, i64* %ptr_b 2517 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 2518 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 2519 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 2520 ret <4 x i64> %res 2521 } 2522 2523 define <4 x i64> @test_mask_andnot_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) { 2524 ; CHECK-LABEL: test_mask_andnot_epi64_rmbkz_256: 2525 ; CHECK: ## BB#0: 2526 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2527 ; CHECK-NEXT: vpandnq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0xdf,0x07] 2528 ; CHECK-NEXT: retq ## encoding: [0xc3] 2529 %q = load i64, i64* %ptr_b 2530 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 2531 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 2532 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 2533 ret <4 x i64> %res 2534 } 2535 2536 declare <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 2537