1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5 define <8 x i16> @test_expand_w_128(<8 x i16> %data) { 6 ; CHECK-LABEL: test_expand_w_128: 7 ; CHECK: # %bb.0: 8 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1) 10 ret <8 x i16> %res 11 } 12 13 define <8 x i16> @test_mask_expand_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) { 14 ; X86-LABEL: test_mask_expand_w_128: 15 ; X86: # %bb.0: 16 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 17 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 18 ; X86-NEXT: vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8] 19 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 20 ; X86-NEXT: retl # encoding: [0xc3] 21 ; 22 ; X64-LABEL: test_mask_expand_w_128: 23 ; X64: # %bb.0: 24 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 25 ; X64-NEXT: vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8] 26 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 27 ; X64-NEXT: retq # encoding: [0xc3] 28 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) 29 ret <8 x i16> %res 30 } 31 32 define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) { 33 ; X86-LABEL: test_maskz_expand_w_128: 34 ; X86: # %bb.0: 35 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 36 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 37 ; X86-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0] 38 ; X86-NEXT: retl # encoding: [0xc3] 39 ; 40 ; X64-LABEL: test_maskz_expand_w_128: 41 ; X64: # %bb.0: 42 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 43 ; X64-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0] 44 ; X64-NEXT: retq # encoding: [0xc3] 45 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask) 46 ret <8 x i16> %res 47 } 48 49 declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask) 50 51 define <16 x i8> @test_expand_b_128(<16 x i8> %data) { 52 ; CHECK-LABEL: test_expand_b_128: 53 ; CHECK: # %bb.0: 54 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 55 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1) 56 ret <16 x i8> %res 57 } 58 59 define <16 x i8> @test_mask_expand_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) { 60 ; X86-LABEL: test_mask_expand_b_128: 61 ; X86: # %bb.0: 62 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 63 ; X86-NEXT: vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8] 64 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 65 ; X86-NEXT: retl # encoding: [0xc3] 66 ; 67 ; X64-LABEL: test_mask_expand_b_128: 68 ; X64: # %bb.0: 69 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 70 ; X64-NEXT: vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8] 71 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 72 ; X64-NEXT: retq # encoding: [0xc3] 73 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) 74 ret <16 x i8> %res 75 } 76 77 define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) { 78 ; X86-LABEL: test_maskz_expand_b_128: 79 ; X86: # %bb.0: 80 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 81 ; X86-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0] 82 ; X86-NEXT: retl # encoding: [0xc3] 83 ; 84 ; X64-LABEL: test_maskz_expand_b_128: 85 ; X64: # %bb.0: 86 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 87 ; X64-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0] 88 ; X64-NEXT: retq # encoding: [0xc3] 89 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask) 90 ret <16 x i8> %res 91 } 92 93 declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask) 94 95 define <8 x i16> @test_mask_compress_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) { 96 ; X86-LABEL: test_mask_compress_w_128: 97 ; X86: # %bb.0: 98 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 99 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 100 ; X86-NEXT: vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1] 101 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 102 ; X86-NEXT: retl # encoding: [0xc3] 103 ; 104 ; X64-LABEL: test_mask_compress_w_128: 105 ; X64: # %bb.0: 106 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 107 ; X64-NEXT: vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1] 108 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 109 ; X64-NEXT: retq # encoding: [0xc3] 110 %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) 111 ret <8 x i16> %res 112 } 113 114 define <8 x i16> @test_maskz_compress_w_128(<8 x i16> %data, i8 %mask) { 115 ; X86-LABEL: test_maskz_compress_w_128: 116 ; X86: # %bb.0: 117 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 118 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 119 ; X86-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0] 120 ; X86-NEXT: retl # encoding: [0xc3] 121 ; 122 ; X64-LABEL: test_maskz_compress_w_128: 123 ; X64: # %bb.0: 124 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 125 ; X64-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0] 126 ; X64-NEXT: retq # encoding: [0xc3] 127 %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask) 128 ret <8 x i16> %res 129 } 130 131 define <8 x i16> @test_compress_w_128(<8 x i16> %data) { 132 ; CHECK-LABEL: test_compress_w_128: 133 ; CHECK: # %bb.0: 134 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 135 %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1) 136 ret <8 x i16> %res 137 } 138 139 declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask) 140 141 define <16 x i8> @test_mask_compress_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) { 142 ; X86-LABEL: test_mask_compress_b_128: 143 ; X86: # %bb.0: 144 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 145 ; X86-NEXT: vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1] 146 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 147 ; X86-NEXT: retl # encoding: [0xc3] 148 ; 149 ; X64-LABEL: test_mask_compress_b_128: 150 ; X64: # %bb.0: 151 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 152 ; X64-NEXT: vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1] 153 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 154 ; X64-NEXT: retq # encoding: [0xc3] 155 %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) 156 ret <16 x i8> %res 157 } 158 159 define <16 x i8> @test_maskz_compress_b_128(<16 x i8> %data, i16 %mask) { 160 ; X86-LABEL: test_maskz_compress_b_128: 161 ; X86: # %bb.0: 162 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 163 ; X86-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0] 164 ; X86-NEXT: retl # encoding: [0xc3] 165 ; 166 ; X64-LABEL: test_maskz_compress_b_128: 167 ; X64: # %bb.0: 168 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 169 ; X64-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0] 170 ; X64-NEXT: retq # encoding: [0xc3] 171 %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask) 172 ret <16 x i8> %res 173 } 174 175 define <16 x i8> @test_compress_b_128(<16 x i8> %data) { 176 ; CHECK-LABEL: test_compress_b_128: 177 ; CHECK: # %bb.0: 178 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 179 %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1) 180 ret <16 x i8> %res 181 } 182 183 declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask) 184 185 define <16 x i16> @test_expand_w_256(<16 x i16> %data) { 186 ; CHECK-LABEL: test_expand_w_256: 187 ; CHECK: # %bb.0: 188 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 189 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1) 190 ret <16 x i16> %res 191 } 192 193 define <16 x i16> @test_mask_expand_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) { 194 ; X86-LABEL: test_mask_expand_w_256: 195 ; X86: # %bb.0: 196 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 197 ; X86-NEXT: vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8] 198 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 199 ; X86-NEXT: retl # encoding: [0xc3] 200 ; 201 ; X64-LABEL: test_mask_expand_w_256: 202 ; X64: # %bb.0: 203 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 204 ; X64-NEXT: vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8] 205 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 206 ; X64-NEXT: retq # encoding: [0xc3] 207 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) 208 ret <16 x i16> %res 209 } 210 211 define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) { 212 ; X86-LABEL: test_maskz_expand_w_256: 213 ; X86: # %bb.0: 214 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 215 ; X86-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0] 216 ; X86-NEXT: retl # encoding: [0xc3] 217 ; 218 ; X64-LABEL: test_maskz_expand_w_256: 219 ; X64: # %bb.0: 220 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 221 ; X64-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0] 222 ; X64-NEXT: retq # encoding: [0xc3] 223 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask) 224 ret <16 x i16> %res 225 } 226 227 declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask) 228 229 define <32 x i8> @test_expand_b_256(<32 x i8> %data) { 230 ; CHECK-LABEL: test_expand_b_256: 231 ; CHECK: # %bb.0: 232 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 233 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1) 234 ret <32 x i8> %res 235 } 236 237 define <32 x i8> @test_mask_expand_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) { 238 ; X86-LABEL: test_mask_expand_b_256: 239 ; X86: # %bb.0: 240 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 241 ; X86-NEXT: vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8] 242 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 243 ; X86-NEXT: retl # encoding: [0xc3] 244 ; 245 ; X64-LABEL: test_mask_expand_b_256: 246 ; X64: # %bb.0: 247 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 248 ; X64-NEXT: vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8] 249 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 250 ; X64-NEXT: retq # encoding: [0xc3] 251 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) 252 ret <32 x i8> %res 253 } 254 255 define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) { 256 ; X86-LABEL: test_maskz_expand_b_256: 257 ; X86: # %bb.0: 258 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 259 ; X86-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0] 260 ; X86-NEXT: retl # encoding: [0xc3] 261 ; 262 ; X64-LABEL: test_maskz_expand_b_256: 263 ; X64: # %bb.0: 264 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 265 ; X64-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0] 266 ; X64-NEXT: retq # encoding: [0xc3] 267 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask) 268 ret <32 x i8> %res 269 } 270 271 declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask) 272 273 define <16 x i16> @test_mask_compress_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) { 274 ; X86-LABEL: test_mask_compress_w_256: 275 ; X86: # %bb.0: 276 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 277 ; X86-NEXT: vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1] 278 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 279 ; X86-NEXT: retl # encoding: [0xc3] 280 ; 281 ; X64-LABEL: test_mask_compress_w_256: 282 ; X64: # %bb.0: 283 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 284 ; X64-NEXT: vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1] 285 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 286 ; X64-NEXT: retq # encoding: [0xc3] 287 %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) 288 ret <16 x i16> %res 289 } 290 291 define <16 x i16> @test_maskz_compress_w_256(<16 x i16> %data, i16 %mask) { 292 ; X86-LABEL: test_maskz_compress_w_256: 293 ; X86: # %bb.0: 294 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 295 ; X86-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0] 296 ; X86-NEXT: retl # encoding: [0xc3] 297 ; 298 ; X64-LABEL: test_maskz_compress_w_256: 299 ; X64: # %bb.0: 300 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 301 ; X64-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0] 302 ; X64-NEXT: retq # encoding: [0xc3] 303 %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask) 304 ret <16 x i16> %res 305 } 306 307 define <16 x i16> @test_compress_w_256(<16 x i16> %data) { 308 ; CHECK-LABEL: test_compress_w_256: 309 ; CHECK: # %bb.0: 310 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 311 %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1) 312 ret <16 x i16> %res 313 } 314 315 declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask) 316 317 define <32 x i8> @test_mask_compress_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) { 318 ; X86-LABEL: test_mask_compress_b_256: 319 ; X86: # %bb.0: 320 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 321 ; X86-NEXT: vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1] 322 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 323 ; X86-NEXT: retl # encoding: [0xc3] 324 ; 325 ; X64-LABEL: test_mask_compress_b_256: 326 ; X64: # %bb.0: 327 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 328 ; X64-NEXT: vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1] 329 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 330 ; X64-NEXT: retq # encoding: [0xc3] 331 %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) 332 ret <32 x i8> %res 333 } 334 335 define <32 x i8> @test_maskz_compress_b_256(<32 x i8> %data, i32 %mask) { 336 ; X86-LABEL: test_maskz_compress_b_256: 337 ; X86: # %bb.0: 338 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 339 ; X86-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0] 340 ; X86-NEXT: retl # encoding: [0xc3] 341 ; 342 ; X64-LABEL: test_maskz_compress_b_256: 343 ; X64: # %bb.0: 344 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 345 ; X64-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0] 346 ; X64-NEXT: retq # encoding: [0xc3] 347 %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask) 348 ret <32 x i8> %res 349 } 350 351 define <32 x i8> @test_compress_b_256(<32 x i8> %data) { 352 ; CHECK-LABEL: test_compress_b_256: 353 ; CHECK: # %bb.0: 354 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 355 %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1) 356 ret <32 x i8> %res 357 } 358 359 declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask) 360 361 define <4 x i32>@test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { 362 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128: 363 ; X86: # %bb.0: 364 ; X86-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x16] 365 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 366 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 367 ; X86-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16] 368 ; X86-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x16] 369 ; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 370 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 371 ; X86-NEXT: retl # encoding: [0xc3] 372 ; 373 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128: 374 ; X64: # %bb.0: 375 ; X64-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x16] 376 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 377 ; X64-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16] 378 ; X64-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x16] 379 ; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 380 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 381 ; X64-NEXT: retq # encoding: [0xc3] 382 %1 = call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22) 383 %2 = bitcast i8 %x4 to <8 x i1> 384 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 385 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3 386 %4 = call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22) 387 %5 = call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22) 388 %6 = bitcast i8 %x4 to <8 x i1> 389 %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 390 %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer 391 %res3 = add <4 x i32> %3, %4 392 %res4 = add <4 x i32> %res3, %7 393 ret <4 x i32> %res4 394 } 395 declare <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32>, <4 x i32>, i32) 396 397 define <8 x i32>@test_int_x86_avx512_mask_vpshld_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 398 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_256: 399 ; X86: # %bb.0: 400 ; X86-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xd9,0x16] 401 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 402 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 403 ; X86-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16] 404 ; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 405 ; X86-NEXT: retl # encoding: [0xc3] 406 ; 407 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_256: 408 ; X64: # %bb.0: 409 ; X64-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xd9,0x16] 410 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 411 ; X64-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16] 412 ; X64-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 413 ; X64-NEXT: retq # encoding: [0xc3] 414 %1 = call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22) 415 %2 = bitcast i8 %x4 to <8 x i1> 416 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3 417 %4 = call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22) 418 %res2 = add <8 x i32> %3, %4 419 ret <8 x i32> %res2 420 } 421 declare <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32>, <8 x i32>, i32) 422 423 define <2 x i64>@test_int_x86_avx512_mask_vpshld_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { 424 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_128: 425 ; X86: # %bb.0: 426 ; X86-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xd9,0x16] 427 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 428 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 429 ; X86-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16] 430 ; X86-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 431 ; X86-NEXT: retl # encoding: [0xc3] 432 ; 433 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_128: 434 ; X64: # %bb.0: 435 ; X64-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xd9,0x16] 436 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 437 ; X64-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16] 438 ; X64-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 439 ; X64-NEXT: retq # encoding: [0xc3] 440 %1 = call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22) 441 %2 = bitcast i8 %x4 to <8 x i1> 442 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 443 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3 444 %4 = call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22) 445 %res2 = add <2 x i64> %3, %4 446 ret <2 x i64> %res2 447 } 448 declare <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64>, <2 x i64>, i32) 449 450 define <4 x i64>@test_int_x86_avx512_mask_vpshld_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 451 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_256: 452 ; X86: # %bb.0: 453 ; X86-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xd9,0x16] 454 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 455 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 456 ; X86-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16] 457 ; X86-NEXT: vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3] 458 ; X86-NEXT: retl # encoding: [0xc3] 459 ; 460 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_256: 461 ; X64: # %bb.0: 462 ; X64-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xd9,0x16] 463 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 464 ; X64-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16] 465 ; X64-NEXT: vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3] 466 ; X64-NEXT: retq # encoding: [0xc3] 467 %1 = call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22) 468 %2 = bitcast i8 %x4 to <8 x i1> 469 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 470 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3 471 %4 = call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22) 472 %res2 = add <4 x i64> %3, %4 473 ret <4 x i64> %res2 474 } 475 declare <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64>, <4 x i64>, i32) 476 477 define <8 x i16>@test_int_x86_avx512_mask_vpshld_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) { 478 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_128: 479 ; X86: # %bb.0: 480 ; X86-NEXT: vpshldw $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x16] 481 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 482 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 483 ; X86-NEXT: vpshldw $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x16] 484 ; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 485 ; X86-NEXT: retl # encoding: [0xc3] 486 ; 487 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_128: 488 ; X64: # %bb.0: 489 ; X64-NEXT: vpshldw $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x16] 490 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 491 ; X64-NEXT: vpshldw $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x16] 492 ; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 493 ; X64-NEXT: retq # encoding: [0xc3] 494 %1 = call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 22) 495 %2 = bitcast i8 %x4 to <8 x i1> 496 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3 497 %4 = call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 22) 498 %res2 = add <8 x i16> %3, %4 499 ret <8 x i16> %res2 500 } 501 declare <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16>, <8 x i16>, i32) 502 503 define <16 x i16>@test_int_x86_avx512_mask_vpshld_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) { 504 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_256: 505 ; X86: # %bb.0: 506 ; X86-NEXT: vpshldw $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x16] 507 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 508 ; X86-NEXT: vpshldw $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x16] 509 ; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 510 ; X86-NEXT: retl # encoding: [0xc3] 511 ; 512 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_256: 513 ; X64: # %bb.0: 514 ; X64-NEXT: vpshldw $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x16] 515 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 516 ; X64-NEXT: vpshldw $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x16] 517 ; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 518 ; X64-NEXT: retq # encoding: [0xc3] 519 %1 = call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 22) 520 %2 = bitcast i16 %x4 to <16 x i1> 521 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3 522 %4 = call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 22) 523 %res2 = add <16 x i16> %3, %4 524 ret <16 x i16> %res2 525 } 526 declare <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16>, <16 x i16>, i32) 527 528 define <4 x i32>@test_int_x86_avx512_mask_vpshrd_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { 529 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_128: 530 ; X86: # %bb.0: 531 ; X86-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x16] 532 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 533 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 534 ; X86-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16] 535 ; X86-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x16] 536 ; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 537 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 538 ; X86-NEXT: retl # encoding: [0xc3] 539 ; 540 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128: 541 ; X64: # %bb.0: 542 ; X64-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x16] 543 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 544 ; X64-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16] 545 ; X64-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x16] 546 ; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 547 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 548 ; X64-NEXT: retq # encoding: [0xc3] 549 %1 = call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22) 550 %2 = bitcast i8 %x4 to <8 x i1> 551 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 552 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3 553 %4 = call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22) 554 %5 = call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22) 555 %6 = bitcast i8 %x4 to <8 x i1> 556 %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 557 %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer 558 %res3 = add <4 x i32> %3, %4 559 %res4 = add <4 x i32> %res3, %7 560 ret <4 x i32> %res4 561 } 562 declare <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32>, <4 x i32>, i32) 563 564 define <8 x i32>@test_int_x86_avx512_mask_vpshrd_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 565 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_256: 566 ; X86: # %bb.0: 567 ; X86-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xd9,0x16] 568 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 569 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 570 ; X86-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16] 571 ; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 572 ; X86-NEXT: retl # encoding: [0xc3] 573 ; 574 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_256: 575 ; X64: # %bb.0: 576 ; X64-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xd9,0x16] 577 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 578 ; X64-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16] 579 ; X64-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 580 ; X64-NEXT: retq # encoding: [0xc3] 581 %1 = call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22) 582 %2 = bitcast i8 %x4 to <8 x i1> 583 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3 584 %4 = call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22) 585 %res2 = add <8 x i32> %3, %4 586 ret <8 x i32> %res2 587 } 588 declare <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32>, <8 x i32>, i32) 589 590 define <2 x i64>@test_int_x86_avx512_mask_vpshrd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { 591 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_128: 592 ; X86: # %bb.0: 593 ; X86-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xd9,0x16] 594 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 595 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 596 ; X86-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16] 597 ; X86-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 598 ; X86-NEXT: retl # encoding: [0xc3] 599 ; 600 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_128: 601 ; X64: # %bb.0: 602 ; X64-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xd9,0x16] 603 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 604 ; X64-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16] 605 ; X64-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 606 ; X64-NEXT: retq # encoding: [0xc3] 607 %1 = call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22) 608 %2 = bitcast i8 %x4 to <8 x i1> 609 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 610 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3 611 %4 = call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22) 612 %res2 = add <2 x i64> %3, %4 613 ret <2 x i64> %res2 614 } 615 declare <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64>, <2 x i64>, i32) 616 617 define <4 x i64>@test_int_x86_avx512_mask_vpshrd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 618 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_256: 619 ; X86: # %bb.0: 620 ; X86-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xd9,0x16] 621 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 622 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 623 ; X86-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16] 624 ; X86-NEXT: vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3] 625 ; X86-NEXT: retl # encoding: [0xc3] 626 ; 627 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_256: 628 ; X64: # %bb.0: 629 ; X64-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xd9,0x16] 630 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 631 ; X64-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16] 632 ; X64-NEXT: vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3] 633 ; X64-NEXT: retq # encoding: [0xc3] 634 %1 = call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22) 635 %2 = bitcast i8 %x4 to <8 x i1> 636 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 637 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3 638 %4 = call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22) 639 %res2 = add <4 x i64> %3, %4 640 ret <4 x i64> %res2 641 } 642 declare <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64>, <4 x i64>, i32) 643 644 define <8 x i16>@test_int_x86_avx512_mask_vpshrd_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) { 645 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_128: 646 ; X86: # %bb.0: 647 ; X86-NEXT: vpshrdw $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x16] 648 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 649 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 650 ; X86-NEXT: vpshrdw $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x16] 651 ; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 652 ; X86-NEXT: retl # encoding: [0xc3] 653 ; 654 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_128: 655 ; X64: # %bb.0: 656 ; X64-NEXT: vpshrdw $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x16] 657 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 658 ; X64-NEXT: vpshrdw $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x16] 659 ; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 660 ; X64-NEXT: retq # encoding: [0xc3] 661 %1 = call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 22) 662 %2 = bitcast i8 %x4 to <8 x i1> 663 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3 664 %4 = call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 22) 665 %res2 = add <8 x i16> %3, %4 666 ret <8 x i16> %res2 667 } 668 declare <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16>, <8 x i16>, i32) 669 670 define <16 x i16>@test_int_x86_avx512_mask_vpshrd_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) { 671 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_256: 672 ; X86: # %bb.0: 673 ; X86-NEXT: vpshrdw $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x16] 674 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 675 ; X86-NEXT: vpshrdw $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x16] 676 ; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 677 ; X86-NEXT: retl # encoding: [0xc3] 678 ; 679 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_256: 680 ; X64: # %bb.0: 681 ; X64-NEXT: vpshrdw $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x16] 682 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 683 ; X64-NEXT: vpshrdw $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x16] 684 ; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 685 ; X64-NEXT: retq # encoding: [0xc3] 686 %1 = call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 22) 687 %2 = bitcast i16 %x4 to <16 x i1> 688 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3 689 %4 = call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 22) 690 %res2 = add <16 x i16> %3, %4 691 ret <16 x i16> %res2 692 } 693 declare <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16>, <16 x i16>, i32) 694 695 declare <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 696 declare <8 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 697 698 define <8 x i32>@test_int_x86_avx512_mask_vpshrdv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { 699 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256: 700 ; X86: # %bb.0: 701 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 702 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 703 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 704 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 705 ; X86-NEXT: vpshrdvd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x18] 706 ; X86-NEXT: vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0] 707 ; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xe2] 708 ; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x73,0xc2] 709 ; X86-NEXT: vpaddd %ymm4, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc4] 710 ; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] 711 ; X86-NEXT: retl # encoding: [0xc3] 712 ; 713 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256: 714 ; X64: # %bb.0: 715 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 716 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 717 ; X64-NEXT: vpshrdvd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x1f] 718 ; X64-NEXT: vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0] 719 ; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0x75,0x28,0x73,0xe2] 720 ; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xc2] 721 ; X64-NEXT: vpaddd %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfe,0xc0] 722 ; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] 723 ; X64-NEXT: retq # encoding: [0xc3] 724 %x2 = load <8 x i32>, <8 x i32>* %x2p 725 %res = call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 726 %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 -1) 727 %res2 = call <8 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3) 728 %res3 = add <8 x i32> %res, %res1 729 %res4 = add <8 x i32> %res2, %res3 730 ret <8 x i32> %res4 731 } 732 733 declare <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 734 declare <4 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 735 736 define <4 x i32>@test_int_x86_avx512_mask_vpshrdv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { 737 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128: 738 ; X86: # %bb.0: 739 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 740 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 741 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 742 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 743 ; X86-NEXT: vpshrdvd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x18] 744 ; X86-NEXT: vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0] 745 ; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xe2] 746 ; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x73,0xc2] 747 ; X86-NEXT: vpaddd %xmm4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc4] 748 ; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 749 ; X86-NEXT: retl # encoding: [0xc3] 750 ; 751 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128: 752 ; X64: # %bb.0: 753 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 754 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 755 ; X64-NEXT: vpshrdvd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x1f] 756 ; X64-NEXT: vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0] 757 ; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0x75,0x08,0x73,0xe2] 758 ; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xc2] 759 ; X64-NEXT: vpaddd %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfe,0xc0] 760 ; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 761 ; X64-NEXT: retq # encoding: [0xc3] 762 %x2 = load <4 x i32>, <4 x i32>* %x2p 763 %res = call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 764 %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 -1) 765 %res2 = call <4 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3) 766 %res3 = add <4 x i32> %res, %res1 767 %res4 = add <4 x i32> %res2, %res3 768 ret <4 x i32> %res4 769 } 770 771 declare <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 772 declare <4 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 773 774 define <4 x i64>@test_int_x86_avx512_mask_vpshrdv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64>* %x2p, <4 x i64> %x4, i8 %x3) { 775 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256: 776 ; X86: # %bb.0: 777 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 778 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 779 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 780 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 781 ; X86-NEXT: vpshrdvq (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x18] 782 ; X86-NEXT: vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0] 783 ; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xe2] 784 ; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x73,0xc2] 785 ; X86-NEXT: vpaddq %ymm4, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc4] 786 ; X86-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] 787 ; X86-NEXT: retl # encoding: [0xc3] 788 ; 789 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256: 790 ; X64: # %bb.0: 791 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 792 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 793 ; X64-NEXT: vpshrdvq (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x1f] 794 ; X64-NEXT: vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0] 795 ; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0xf5,0x28,0x73,0xe2] 796 ; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xc2] 797 ; X64-NEXT: vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0] 798 ; X64-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] 799 ; X64-NEXT: retq # encoding: [0xc3] 800 %x2 = load <4 x i64>, <4 x i64>* %x2p 801 %res = call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 802 %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4, i8 -1) 803 %res2 = call <4 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4, i8 %x3) 804 %res3 = add <4 x i64> %res, %res1 805 %res4 = add <4 x i64> %res2, %res3 806 ret <4 x i64> %res4 807 } 808 809 declare <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 810 declare <2 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 811 812 define <2 x i64>@test_int_x86_avx512_mask_vpshrdv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64>* %x2p, <2 x i64> %x4, i8 %x3) { 813 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128: 814 ; X86: # %bb.0: 815 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 816 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 817 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 818 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 819 ; X86-NEXT: vpshrdvq (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x18] 820 ; X86-NEXT: vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0] 821 ; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xe2] 822 ; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x73,0xc2] 823 ; X86-NEXT: vpaddq %xmm4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc4] 824 ; X86-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] 825 ; X86-NEXT: retl # encoding: [0xc3] 826 ; 827 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128: 828 ; X64: # %bb.0: 829 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 830 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 831 ; X64-NEXT: vpshrdvq (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x1f] 832 ; X64-NEXT: vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0] 833 ; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0xf5,0x08,0x73,0xe2] 834 ; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xc2] 835 ; X64-NEXT: vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0] 836 ; X64-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] 837 ; X64-NEXT: retq # encoding: [0xc3] 838 %x2 = load <2 x i64>, <2 x i64>* %x2p 839 %res = call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 840 %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4, i8 -1) 841 %res2 = call <2 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4, i8 %x3) 842 %res3 = add <2 x i64> %res, %res1 843 %res4 = add <2 x i64> %res2, %res3 844 ret <2 x i64> %res4 845 } 846 847 declare <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 848 declare <16 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 849 850 define <16 x i16>@test_int_x86_avx512_mask_vpshrdv_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16>* %x2p, <16 x i16> %x4, i16 %x3) { 851 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256: 852 ; X86: # %bb.0: 853 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 854 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 855 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 856 ; X86-NEXT: vpshrdvw (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x18] 857 ; X86-NEXT: vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0] 858 ; X86-NEXT: vpshrdvw %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xe2] 859 ; X86-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x72,0xc2] 860 ; X86-NEXT: vpaddw %ymm4, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc4] 861 ; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] 862 ; X86-NEXT: retl # encoding: [0xc3] 863 ; 864 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256: 865 ; X64: # %bb.0: 866 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 867 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 868 ; X64-NEXT: vpshrdvw (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x1f] 869 ; X64-NEXT: vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0] 870 ; X64-NEXT: vpshrdvw %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0xf5,0x28,0x72,0xe2] 871 ; X64-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xc2] 872 ; X64-NEXT: vpaddw %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfd,0xc0] 873 ; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] 874 ; X64-NEXT: retq # encoding: [0xc3] 875 %x2 = load <16 x i16>, <16 x i16>* %x2p 876 %res = call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 877 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4, i16 -1) 878 %res2 = call <16 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4, i16 %x3) 879 %res3 = add <16 x i16> %res, %res1 880 %res4 = add <16 x i16> %res2, %res3 881 ret <16 x i16> %res4 882 } 883 884 declare <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 885 declare <8 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 886 887 define <8 x i16>@test_int_x86_avx512_mask_vpshrdv_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16>* %x2p, <8 x i16> %x4, i8 %x3) { 888 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128: 889 ; X86: # %bb.0: 890 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 891 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 892 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 893 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 894 ; X86-NEXT: vpshrdvw (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x18] 895 ; X86-NEXT: vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0] 896 ; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xe2] 897 ; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x72,0xc2] 898 ; X86-NEXT: vpaddw %xmm4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc4] 899 ; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] 900 ; X86-NEXT: retl # encoding: [0xc3] 901 ; 902 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128: 903 ; X64: # %bb.0: 904 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 905 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 906 ; X64-NEXT: vpshrdvw (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x1f] 907 ; X64-NEXT: vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0] 908 ; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0xf5,0x08,0x72,0xe2] 909 ; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xc2] 910 ; X64-NEXT: vpaddw %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfd,0xc0] 911 ; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] 912 ; X64-NEXT: retq # encoding: [0xc3] 913 %x2 = load <8 x i16>, <8 x i16>* %x2p 914 %res = call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 915 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4, i8 -1) 916 %res2 = call <8 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4, i8 %x3) 917 %res3 = add <8 x i16> %res, %res1 918 %res4 = add <8 x i16> %res2, %res3 919 ret <8 x i16> %res4 920 } 921 922 declare <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 923 declare <8 x i32> @llvm.x86.avx512.maskz.vpshldv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 924 925 define <8 x i32>@test_int_x86_avx512_mask_vpshldv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { 926 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_256: 927 ; X86: # %bb.0: 928 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 929 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 930 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 931 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 932 ; X86-NEXT: vpshldvd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x18] 933 ; X86-NEXT: vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0] 934 ; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xe2] 935 ; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x71,0xc2] 936 ; X86-NEXT: vpaddd %ymm4, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc4] 937 ; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] 938 ; X86-NEXT: retl # encoding: [0xc3] 939 ; 940 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_256: 941 ; X64: # %bb.0: 942 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 943 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 944 ; X64-NEXT: vpshldvd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x1f] 945 ; X64-NEXT: vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0] 946 ; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0x75,0x28,0x71,0xe2] 947 ; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xc2] 948 ; X64-NEXT: vpaddd %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfe,0xc0] 949 ; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] 950 ; X64-NEXT: retq # encoding: [0xc3] 951 %x2 = load <8 x i32>, <8 x i32>* %x2p 952 %res = call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 953 %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 -1) 954 %res2 = call <8 x i32> @llvm.x86.avx512.maskz.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3) 955 %res3 = add <8 x i32> %res, %res1 956 %res4 = add <8 x i32> %res2, %res3 957 ret <8 x i32> %res4 958 } 959 960 declare <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 961 declare <4 x i32> @llvm.x86.avx512.maskz.vpshldv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 962 963 define <4 x i32>@test_int_x86_avx512_mask_vpshldv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { 964 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_128: 965 ; X86: # %bb.0: 966 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 967 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 968 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 969 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 970 ; X86-NEXT: vpshldvd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x18] 971 ; X86-NEXT: vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0] 972 ; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xe2] 973 ; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x71,0xc2] 974 ; X86-NEXT: vpaddd %xmm4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc4] 975 ; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 976 ; X86-NEXT: retl # encoding: [0xc3] 977 ; 978 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_128: 979 ; X64: # %bb.0: 980 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 981 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 982 ; X64-NEXT: vpshldvd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x1f] 983 ; X64-NEXT: vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0] 984 ; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0x75,0x08,0x71,0xe2] 985 ; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xc2] 986 ; X64-NEXT: vpaddd %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfe,0xc0] 987 ; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 988 ; X64-NEXT: retq # encoding: [0xc3] 989 %x2 = load <4 x i32>, <4 x i32>* %x2p 990 %res = call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 991 %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 -1) 992 %res2 = call <4 x i32> @llvm.x86.avx512.maskz.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3) 993 %res3 = add <4 x i32> %res, %res1 994 %res4 = add <4 x i32> %res2, %res3 995 ret <4 x i32> %res4 996 } 997 998 declare <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 999 declare <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 1000 1001 define <4 x i64>@test_int_x86_avx512_mask_vpshldv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64>* %x2p, <4 x i64> %x4, i8 %x3) { 1002 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_256: 1003 ; X86: # %bb.0: 1004 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1005 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1006 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1007 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1008 ; X86-NEXT: vpshldvq (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x18] 1009 ; X86-NEXT: vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0] 1010 ; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xe2] 1011 ; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x71,0xc2] 1012 ; X86-NEXT: vpaddq %ymm4, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc4] 1013 ; X86-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] 1014 ; X86-NEXT: retl # encoding: [0xc3] 1015 ; 1016 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_256: 1017 ; X64: # %bb.0: 1018 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1019 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1020 ; X64-NEXT: vpshldvq (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x1f] 1021 ; X64-NEXT: vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0] 1022 ; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0xf5,0x28,0x71,0xe2] 1023 ; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xc2] 1024 ; X64-NEXT: vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0] 1025 ; X64-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] 1026 ; X64-NEXT: retq # encoding: [0xc3] 1027 %x2 = load <4 x i64>, <4 x i64>* %x2p 1028 %res = call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 1029 %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4, i8 -1) 1030 %res2 = call <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4, i8 %x3) 1031 %res3 = add <4 x i64> %res, %res1 1032 %res4 = add <4 x i64> %res2, %res3 1033 ret <4 x i64> %res4 1034 } 1035 1036 declare <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 1037 declare <2 x i64> @llvm.x86.avx512.maskz.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 1038 1039 define <2 x i64>@test_int_x86_avx512_mask_vpshldv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64>* %x2p, <2 x i64> %x4, i8 %x3) { 1040 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_128: 1041 ; X86: # %bb.0: 1042 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1043 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1044 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1045 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1046 ; X86-NEXT: vpshldvq (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x18] 1047 ; X86-NEXT: vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0] 1048 ; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xe2] 1049 ; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x71,0xc2] 1050 ; X86-NEXT: vpaddq %xmm4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc4] 1051 ; X86-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] 1052 ; X86-NEXT: retl # encoding: [0xc3] 1053 ; 1054 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_128: 1055 ; X64: # %bb.0: 1056 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1057 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1058 ; X64-NEXT: vpshldvq (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x1f] 1059 ; X64-NEXT: vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0] 1060 ; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0xf5,0x08,0x71,0xe2] 1061 ; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xc2] 1062 ; X64-NEXT: vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0] 1063 ; X64-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] 1064 ; X64-NEXT: retq # encoding: [0xc3] 1065 %x2 = load <2 x i64>, <2 x i64>* %x2p 1066 %res = call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 1067 %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4, i8 -1) 1068 %res2 = call <2 x i64> @llvm.x86.avx512.maskz.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4, i8 %x3) 1069 %res3 = add <2 x i64> %res, %res1 1070 %res4 = add <2 x i64> %res2, %res3 1071 ret <2 x i64> %res4 1072 } 1073 1074 declare <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1075 declare <16 x i16> @llvm.x86.avx512.maskz.vpshldv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1076 1077 define <16 x i16>@test_int_x86_avx512_mask_vpshldv_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16>* %x2p, <16 x i16> %x4, i16 %x3) { 1078 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_256: 1079 ; X86: # %bb.0: 1080 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1081 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1082 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1083 ; X86-NEXT: vpshldvw (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x18] 1084 ; X86-NEXT: vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0] 1085 ; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xe2] 1086 ; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x70,0xc2] 1087 ; X86-NEXT: vpaddw %ymm4, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc4] 1088 ; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] 1089 ; X86-NEXT: retl # encoding: [0xc3] 1090 ; 1091 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_256: 1092 ; X64: # %bb.0: 1093 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1094 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1095 ; X64-NEXT: vpshldvw (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x1f] 1096 ; X64-NEXT: vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0] 1097 ; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0xf5,0x28,0x70,0xe2] 1098 ; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xc2] 1099 ; X64-NEXT: vpaddw %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfd,0xc0] 1100 ; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] 1101 ; X64-NEXT: retq # encoding: [0xc3] 1102 %x2 = load <16 x i16>, <16 x i16>* %x2p 1103 %res = call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 1104 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4, i16 -1) 1105 %res2 = call <16 x i16> @llvm.x86.avx512.maskz.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4, i16 %x3) 1106 %res3 = add <16 x i16> %res, %res1 1107 %res4 = add <16 x i16> %res2, %res3 1108 ret <16 x i16> %res4 1109 } 1110 1111 declare <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1112 declare <8 x i16> @llvm.x86.avx512.maskz.vpshldv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1113 1114 define <8 x i16>@test_int_x86_avx512_mask_vpshldv_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16>* %x2p, <8 x i16> %x4, i8 %x3) { 1115 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_128: 1116 ; X86: # %bb.0: 1117 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1118 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1119 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1120 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1121 ; X86-NEXT: vpshldvw (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x18] 1122 ; X86-NEXT: vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0] 1123 ; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xe2] 1124 ; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x70,0xc2] 1125 ; X86-NEXT: vpaddw %xmm4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc4] 1126 ; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] 1127 ; X86-NEXT: retl # encoding: [0xc3] 1128 ; 1129 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_128: 1130 ; X64: # %bb.0: 1131 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1132 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1133 ; X64-NEXT: vpshldvw (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x1f] 1134 ; X64-NEXT: vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0] 1135 ; X64-NEXT: vpshldvw %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0xf5,0x08,0x70,0xe2] 1136 ; X64-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xc2] 1137 ; X64-NEXT: vpaddw %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfd,0xc0] 1138 ; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] 1139 ; X64-NEXT: retq # encoding: [0xc3] 1140 %x2 = load <8 x i16>, <8 x i16>* %x2p 1141 %res = call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 1142 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4, i8 -1) 1143 %res2 = call <8 x i16> @llvm.x86.avx512.maskz.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4, i8 %x3) 1144 %res3 = add <8 x i16> %res, %res1 1145 %res4 = add <8 x i16> %res2, %res3 1146 ret <8 x i16> %res4 1147 } 1148 1149