1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5 define <32 x i16> @test_mask_expand_load_w_512(i8* %addr, <32 x i16> %data, i32 %mask) { 6 ; X86-LABEL: test_mask_expand_load_w_512: 7 ; X86: # %bb.0: 8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 10 ; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00] 11 ; X86-NEXT: retl # encoding: [0xc3] 12 ; 13 ; X64-LABEL: test_mask_expand_load_w_512: 14 ; X64: # %bb.0: 15 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 16 ; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07] 17 ; X64-NEXT: retq # encoding: [0xc3] 18 %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 %mask) 19 ret <32 x i16> %res 20 } 21 22 define <32 x i16> @test_maskz_expand_load_w_512(i8* %addr, i32 %mask) { 23 ; X86-LABEL: test_maskz_expand_load_w_512: 24 ; X86: # %bb.0: 25 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 26 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 27 ; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x00] 28 ; X86-NEXT: retl # encoding: [0xc3] 29 ; 30 ; X64-LABEL: test_maskz_expand_load_w_512: 31 ; X64: # %bb.0: 32 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 33 ; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x07] 34 ; X64-NEXT: retq # encoding: [0xc3] 35 %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> zeroinitializer, i32 %mask) 36 ret <32 x i16> %res 37 } 38 39 declare <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 %mask) 40 41 define <32 x i16> @test_expand_load_w_512(i8* %addr, <32 x i16> %data) { 42 ; X86-LABEL: test_expand_load_w_512: 43 ; X86: # %bb.0: 44 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 45 ; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 46 ; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00] 47 ; X86-NEXT: retl # encoding: [0xc3] 48 ; 49 ; X64-LABEL: test_expand_load_w_512: 50 ; X64: # %bb.0: 51 ; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 52 ; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07] 53 ; X64-NEXT: retq # encoding: [0xc3] 54 %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 -1) 55 ret <32 x i16> %res 56 } 57 58 define <64 x i8> @test_mask_expand_load_b_512(i8* %addr, <64 x i8> %data, i64 %mask) { 59 ; X86-LABEL: test_mask_expand_load_b_512: 60 ; X86: # %bb.0: 61 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 62 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 63 ; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00] 64 ; X86-NEXT: retl # encoding: [0xc3] 65 ; 66 ; X64-LABEL: test_mask_expand_load_b_512: 67 ; X64: # %bb.0: 68 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 69 ; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07] 70 ; X64-NEXT: retq # encoding: [0xc3] 71 %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 %mask) 72 ret <64 x i8> %res 73 } 74 75 define <64 x i8> @test_maskz_expand_load_b_512(i8* %addr, i64 %mask) { 76 ; X86-LABEL: test_maskz_expand_load_b_512: 77 ; X86: # %bb.0: 78 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 79 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 80 ; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x00] 81 ; X86-NEXT: retl # encoding: [0xc3] 82 ; 83 ; X64-LABEL: test_maskz_expand_load_b_512: 84 ; X64: # %bb.0: 85 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 86 ; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x07] 87 ; X64-NEXT: retq # encoding: [0xc3] 88 %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> zeroinitializer, i64 %mask) 89 ret <64 x i8> %res 90 } 91 92 declare <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 %mask) 93 94 define <64 x i8> @test_expand_load_b_512(i8* %addr, <64 x i8> %data) { 95 ; X86-LABEL: test_expand_load_b_512: 96 ; X86: # %bb.0: 97 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 98 ; X86-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8] 99 ; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00] 100 ; X86-NEXT: retl # encoding: [0xc3] 101 ; 102 ; X64-LABEL: test_expand_load_b_512: 103 ; X64: # %bb.0: 104 ; X64-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8] 105 ; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07] 106 ; X64-NEXT: retq # encoding: [0xc3] 107 %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 -1) 108 ret <64 x i8> %res 109 } 110 111 define void @test_mask_compress_store_w_512(i8* %addr, <32 x i16> %data, i32 %mask) { 112 ; X86-LABEL: test_mask_compress_store_w_512: 113 ; X86: # %bb.0: 114 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 115 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 116 ; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00] 117 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 118 ; X86-NEXT: retl # encoding: [0xc3] 119 ; 120 ; X64-LABEL: test_mask_compress_store_w_512: 121 ; X64: # %bb.0: 122 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 123 ; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07] 124 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 125 ; X64-NEXT: retq # encoding: [0xc3] 126 call void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 %mask) 127 ret void 128 } 129 130 declare void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 %mask) 131 132 define <32 x i16> @test_mask_compress_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) { 133 ; X86-LABEL: test_mask_compress_w_512: 134 ; X86: # %bb.0: 135 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 136 ; X86-NEXT: vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1] 137 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 138 ; X86-NEXT: retl # encoding: [0xc3] 139 ; 140 ; X64-LABEL: test_mask_compress_w_512: 141 ; X64: # %bb.0: 142 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 143 ; X64-NEXT: vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1] 144 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 145 ; X64-NEXT: retq # encoding: [0xc3] 146 %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) 147 ret <32 x i16> %res 148 } 149 150 define <32 x i16> @test_maskz_compress_w_512(<32 x i16> %data, i32 %mask) { 151 ; X86-LABEL: test_maskz_compress_w_512: 152 ; X86: # %bb.0: 153 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 154 ; X86-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0] 155 ; X86-NEXT: retl # encoding: [0xc3] 156 ; 157 ; X64-LABEL: test_maskz_compress_w_512: 158 ; X64: # %bb.0: 159 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 160 ; X64-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0] 161 ; X64-NEXT: retq # encoding: [0xc3] 162 %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask) 163 ret <32 x i16> %res 164 } 165 166 define <32 x i16> @test_compress_w_512(<32 x i16> %data) { 167 ; CHECK-LABEL: test_compress_w_512: 168 ; CHECK: # %bb.0: 169 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 170 %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1) 171 ret <32 x i16> %res 172 } 173 174 declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask) 175 176 define void @test_compress_store_w_512(i8* %addr, <32 x i16> %data) { 177 ; X86-LABEL: test_compress_store_w_512: 178 ; X86: # %bb.0: 179 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 180 ; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 181 ; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00] 182 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 183 ; X86-NEXT: retl # encoding: [0xc3] 184 ; 185 ; X64-LABEL: test_compress_store_w_512: 186 ; X64: # %bb.0: 187 ; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 188 ; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07] 189 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 190 ; X64-NEXT: retq # encoding: [0xc3] 191 call void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 -1) 192 ret void 193 } 194 195 define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) { 196 ; X86-LABEL: test_mask_compress_store_b_512: 197 ; X86: # %bb.0: 198 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 199 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 200 ; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00] 201 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 202 ; X86-NEXT: retl # encoding: [0xc3] 203 ; 204 ; X64-LABEL: test_mask_compress_store_b_512: 205 ; X64: # %bb.0: 206 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 207 ; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07] 208 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 209 ; X64-NEXT: retq # encoding: [0xc3] 210 call void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 %mask) 211 ret void 212 } 213 214 declare void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 %mask) 215 216 define <64 x i8> @test_mask_compress_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) { 217 ; X86-LABEL: test_mask_compress_b_512: 218 ; X86: # %bb.0: 219 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04] 220 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 221 ; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8] 222 ; X86-NEXT: vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1] 223 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 224 ; X86-NEXT: retl # encoding: [0xc3] 225 ; 226 ; X64-LABEL: test_mask_compress_b_512: 227 ; X64: # %bb.0: 228 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 229 ; X64-NEXT: vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1] 230 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 231 ; X64-NEXT: retq # encoding: [0xc3] 232 %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) 233 ret <64 x i8> %res 234 } 235 236 define <64 x i8> @test_maskz_compress_b_512(<64 x i8> %data, i64 %mask) { 237 ; X86-LABEL: test_maskz_compress_b_512: 238 ; X86: # %bb.0: 239 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04] 240 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 241 ; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8] 242 ; X86-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0] 243 ; X86-NEXT: retl # encoding: [0xc3] 244 ; 245 ; X64-LABEL: test_maskz_compress_b_512: 246 ; X64: # %bb.0: 247 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 248 ; X64-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0] 249 ; X64-NEXT: retq # encoding: [0xc3] 250 %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask) 251 ret <64 x i8> %res 252 } 253 254 define <64 x i8> @test_compress_b_512(<64 x i8> %data) { 255 ; CHECK-LABEL: test_compress_b_512: 256 ; CHECK: # %bb.0: 257 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 258 %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1) 259 ret <64 x i8> %res 260 } 261 262 declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask) 263 264 define void @test_compress_store_b_512(i8* %addr, <64 x i8> %data) { 265 ; X86-LABEL: test_compress_store_b_512: 266 ; X86: # %bb.0: 267 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 268 ; X86-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8] 269 ; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00] 270 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 271 ; X86-NEXT: retl # encoding: [0xc3] 272 ; 273 ; X64-LABEL: test_compress_store_b_512: 274 ; X64: # %bb.0: 275 ; X64-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8] 276 ; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07] 277 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 278 ; X64-NEXT: retq # encoding: [0xc3] 279 call void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 -1) 280 ret void 281 } 282 283 define <16 x i32>@test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) { 284 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_512: 285 ; X86: # %bb.0: 286 ; X86-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xd9,0x16] 287 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 288 ; X86-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16] 289 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 290 ; X86-NEXT: retl # encoding: [0xc3] 291 ; 292 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_512: 293 ; X64: # %bb.0: 294 ; X64-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xd9,0x16] 295 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 296 ; X64-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16] 297 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 298 ; X64-NEXT: retq # encoding: [0xc3] 299 %res = call <16 x i32> @llvm.x86.avx512.mask.vpshld.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4) 300 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpshld.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1) 301 %res2 = add <16 x i32> %res, %res1 302 ret <16 x i32> %res2 303 } 304 declare <16 x i32> @llvm.x86.avx512.mask.vpshld.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16) 305 306 define <8 x i64>@test_int_x86_avx512_mask_vpshld_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) { 307 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_512: 308 ; X86: # %bb.0: 309 ; X86-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xd9,0x16] 310 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 311 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 312 ; X86-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16] 313 ; X86-NEXT: vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3] 314 ; X86-NEXT: retl # encoding: [0xc3] 315 ; 316 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_512: 317 ; X64: # %bb.0: 318 ; X64-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xd9,0x16] 319 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 320 ; X64-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16] 321 ; X64-NEXT: vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3] 322 ; X64-NEXT: retq # encoding: [0xc3] 323 %res = call <8 x i64> @llvm.x86.avx512.mask.vpshld.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4) 324 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpshld.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1) 325 %res2 = add <8 x i64> %res, %res1 326 ret <8 x i64> %res2 327 } 328 declare <8 x i64> @llvm.x86.avx512.mask.vpshld.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8) 329 330 define <32 x i16>@test_int_x86_avx512_mask_vpshld_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) { 331 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_512: 332 ; X86: # %bb.0: 333 ; X86-NEXT: vpshldw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xd9,0x16] 334 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 335 ; X86-NEXT: vpshldw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x16] 336 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 337 ; X86-NEXT: retl # encoding: [0xc3] 338 ; 339 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_512: 340 ; X64: # %bb.0: 341 ; X64-NEXT: vpshldw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xd9,0x16] 342 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 343 ; X64-NEXT: vpshldw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x16] 344 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 345 ; X64-NEXT: retq # encoding: [0xc3] 346 %res = call <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22, <32 x i16> %x3, i32 %x4) 347 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22, <32 x i16> %x3, i32 -1) 348 %res2 = add <32 x i16> %res, %res1 349 ret <32 x i16> %res2 350 } 351 declare <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16>, <32 x i16>, i32, <32 x i16>, i32) 352 353 define <16 x i32>@test_int_x86_avx512_mask_vpshrd_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) { 354 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_512: 355 ; X86: # %bb.0: 356 ; X86-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xd9,0x16] 357 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 358 ; X86-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16] 359 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 360 ; X86-NEXT: retl # encoding: [0xc3] 361 ; 362 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_512: 363 ; X64: # %bb.0: 364 ; X64-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xd9,0x16] 365 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 366 ; X64-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16] 367 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 368 ; X64-NEXT: retq # encoding: [0xc3] 369 %res = call <16 x i32> @llvm.x86.avx512.mask.vpshrd.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4) 370 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpshrd.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1) 371 %res2 = add <16 x i32> %res, %res1 372 ret <16 x i32> %res2 373 } 374 declare <16 x i32> @llvm.x86.avx512.mask.vpshrd.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16) 375 376 define <8 x i64>@test_int_x86_avx512_mask_vpshrd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) { 377 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_512: 378 ; X86: # %bb.0: 379 ; X86-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xd9,0x16] 380 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 381 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 382 ; X86-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16] 383 ; X86-NEXT: vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3] 384 ; X86-NEXT: retl # encoding: [0xc3] 385 ; 386 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_512: 387 ; X64: # %bb.0: 388 ; X64-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xd9,0x16] 389 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 390 ; X64-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16] 391 ; X64-NEXT: vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3] 392 ; X64-NEXT: retq # encoding: [0xc3] 393 %res = call <8 x i64> @llvm.x86.avx512.mask.vpshrd.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4) 394 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpshrd.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1) 395 %res2 = add <8 x i64> %res, %res1 396 ret <8 x i64> %res2 397 } 398 declare <8 x i64> @llvm.x86.avx512.mask.vpshrd.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8) 399 400 define <32 x i16>@test_int_x86_avx512_mask_vpshrd_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) { 401 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_512: 402 ; X86: # %bb.0: 403 ; X86-NEXT: vpshrdw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xd9,0x16] 404 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 405 ; X86-NEXT: vpshrdw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x16] 406 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 407 ; X86-NEXT: retl # encoding: [0xc3] 408 ; 409 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_512: 410 ; X64: # %bb.0: 411 ; X64-NEXT: vpshrdw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xd9,0x16] 412 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 413 ; X64-NEXT: vpshrdw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x16] 414 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 415 ; X64-NEXT: retq # encoding: [0xc3] 416 %res = call <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22, <32 x i16> %x3, i32 %x4) 417 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22, <32 x i16> %x3, i32 -1) 418 %res2 = add <32 x i16> %res, %res1 419 ret <32 x i16> %res2 420 } 421 declare <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16>, <32 x i16>, i32, <32 x i16>, i32) 422