1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5 define <32 x i16> @test_expand_w_512(<32 x i16> %data) { 6 ; CHECK-LABEL: test_expand_w_512: 7 ; CHECK: # %bb.0: 8 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9 %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1) 10 ret <32 x i16> %res 11 } 12 13 define <32 x i16> @test_mask_expand_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) { 14 ; X86-LABEL: test_mask_expand_w_512: 15 ; X86: # %bb.0: 16 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 17 ; X86-NEXT: vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8] 18 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 19 ; X86-NEXT: retl # encoding: [0xc3] 20 ; 21 ; X64-LABEL: test_mask_expand_w_512: 22 ; X64: # %bb.0: 23 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 24 ; X64-NEXT: vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8] 25 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 26 ; X64-NEXT: retq # encoding: [0xc3] 27 %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) 28 ret <32 x i16> %res 29 } 30 31 define <32 x i16> @test_maskz_expand_w_512(<32 x i16> %data, i32 %mask) { 32 ; X86-LABEL: test_maskz_expand_w_512: 33 ; X86: # %bb.0: 34 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 35 ; X86-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0] 36 ; X86-NEXT: retl # encoding: [0xc3] 37 ; 38 ; X64-LABEL: test_maskz_expand_w_512: 39 ; X64: # %bb.0: 40 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 41 ; X64-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0] 42 ; X64-NEXT: retq # encoding: [0xc3] 43 %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask) 44 ret <32 x i16> %res 45 } 46 47 declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask) 48 49 define <64 x i8> @test_expand_b_512(<64 x i8> %data) { 50 ; CHECK-LABEL: test_expand_b_512: 51 ; CHECK: # %bb.0: 52 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 53 %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1) 54 ret <64 x i8> %res 55 } 56 57 define <64 x i8> @test_mask_expand_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) { 58 ; X86-LABEL: test_mask_expand_b_512: 59 ; X86: # %bb.0: 60 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04] 61 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 62 ; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8] 63 ; X86-NEXT: vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8] 64 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 65 ; X86-NEXT: retl # encoding: [0xc3] 66 ; 67 ; X64-LABEL: test_mask_expand_b_512: 68 ; X64: # %bb.0: 69 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 70 ; X64-NEXT: vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8] 71 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 72 ; X64-NEXT: retq # encoding: [0xc3] 73 %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) 74 ret <64 x i8> %res 75 } 76 77 define <64 x i8> @test_maskz_expand_b_512(<64 x i8> %data, i64 %mask) { 78 ; X86-LABEL: test_maskz_expand_b_512: 79 ; X86: # %bb.0: 80 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04] 81 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 82 ; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8] 83 ; X86-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0] 84 ; X86-NEXT: retl # encoding: [0xc3] 85 ; 86 ; X64-LABEL: test_maskz_expand_b_512: 87 ; X64: # %bb.0: 88 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 89 ; X64-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0] 90 ; X64-NEXT: retq # encoding: [0xc3] 91 %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask) 92 ret <64 x i8> %res 93 } 94 95 declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask) 96 97 define <16 x i32>@test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) { 98 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_512: 99 ; X86: # %bb.0: 100 ; X86-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xd9,0x16] 101 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 102 ; X86-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16] 103 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 104 ; X86-NEXT: retl # encoding: [0xc3] 105 ; 106 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_512: 107 ; X64: # %bb.0: 108 ; X64-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xd9,0x16] 109 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 110 ; X64-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16] 111 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 112 ; X64-NEXT: retq # encoding: [0xc3] 113 %1 = call <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22) 114 %2 = bitcast i16 %x4 to <16 x i1> 115 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x3 116 %4 = call <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22) 117 %res2 = add <16 x i32> %3, %4 118 ret <16 x i32> %res2 119 } 120 declare <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32>, <16 x i32>, i32) 121 122 define <8 x i64>@test_int_x86_avx512_mask_vpshld_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) { 123 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_512: 124 ; X86: # %bb.0: 125 ; X86-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xd9,0x16] 126 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 127 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 128 ; X86-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16] 129 ; X86-NEXT: vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3] 130 ; X86-NEXT: retl # encoding: [0xc3] 131 ; 132 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_512: 133 ; X64: # %bb.0: 134 ; X64-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xd9,0x16] 135 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 136 ; X64-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16] 137 ; X64-NEXT: vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3] 138 ; X64-NEXT: retq # encoding: [0xc3] 139 %1 = call <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22) 140 %2 = bitcast i8 %x4 to <8 x i1> 141 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x3 142 %4 = call <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22) 143 %res2 = add <8 x i64> %3, %4 144 ret <8 x i64> %res2 145 } 146 declare <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64>, <8 x i64>, i32) 147 148 define <32 x i16>@test_int_x86_avx512_mask_vpshld_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) { 149 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_512: 150 ; X86: # %bb.0: 151 ; X86-NEXT: vpshldw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xd9,0x16] 152 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 153 ; X86-NEXT: vpshldw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x16] 154 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 155 ; X86-NEXT: retl # encoding: [0xc3] 156 ; 157 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_512: 158 ; X64: # %bb.0: 159 ; X64-NEXT: vpshldw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xd9,0x16] 160 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 161 ; X64-NEXT: vpshldw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x16] 162 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 163 ; X64-NEXT: retq # encoding: [0xc3] 164 %1 = call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22) 165 %2 = bitcast i32 %x4 to <32 x i1> 166 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3 167 %4 = call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22) 168 %res2 = add <32 x i16> %3, %4 169 ret <32 x i16> %res2 170 } 171 declare <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16>, <32 x i16>, i32) 172 173 define <16 x i32>@test_int_x86_avx512_mask_vpshrd_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) { 174 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_512: 175 ; X86: # %bb.0: 176 ; X86-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xd9,0x16] 177 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 178 ; X86-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16] 179 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 180 ; X86-NEXT: retl # encoding: [0xc3] 181 ; 182 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_512: 183 ; X64: # %bb.0: 184 ; X64-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xd9,0x16] 185 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 186 ; X64-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16] 187 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 188 ; X64-NEXT: retq # encoding: [0xc3] 189 %1 = call <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22) 190 %2 = bitcast i16 %x4 to <16 x i1> 191 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x3 192 %4 = call <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22) 193 %res2 = add <16 x i32> %3, %4 194 ret <16 x i32> %res2 195 } 196 declare <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32>, <16 x i32>, i32) 197 198 define <8 x i64>@test_int_x86_avx512_mask_vpshrd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) { 199 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_512: 200 ; X86: # %bb.0: 201 ; X86-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xd9,0x16] 202 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 203 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 204 ; X86-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16] 205 ; X86-NEXT: vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3] 206 ; X86-NEXT: retl # encoding: [0xc3] 207 ; 208 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_512: 209 ; X64: # %bb.0: 210 ; X64-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xd9,0x16] 211 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 212 ; X64-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16] 213 ; X64-NEXT: vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3] 214 ; X64-NEXT: retq # encoding: [0xc3] 215 %1 = call <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22) 216 %2 = bitcast i8 %x4 to <8 x i1> 217 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x3 218 %4 = call <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22) 219 %res2 = add <8 x i64> %3, %4 220 ret <8 x i64> %res2 221 } 222 declare <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64>, <8 x i64>, i32) 223 224 define <32 x i16>@test_int_x86_avx512_mask_vpshrd_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) { 225 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_512: 226 ; X86: # %bb.0: 227 ; X86-NEXT: vpshrdw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xd9,0x16] 228 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 229 ; X86-NEXT: vpshrdw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x16] 230 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 231 ; X86-NEXT: retl # encoding: [0xc3] 232 ; 233 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_512: 234 ; X64: # %bb.0: 235 ; X64-NEXT: vpshrdw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xd9,0x16] 236 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 237 ; X64-NEXT: vpshrdw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x16] 238 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 239 ; X64-NEXT: retq # encoding: [0xc3] 240 %1 = call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22) 241 %2 = bitcast i32 %x4 to <32 x i1> 242 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3 243 %4 = call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22) 244 %res2 = add <32 x i16> %3, %4 245 ret <32 x i16> %res2 246 } 247 declare <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16>, <32 x i16>, i32) 248 249 declare <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 250 declare <16 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 251 252 define <16 x i32>@test_int_x86_avx512_mask_vpshrdv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { 253 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512: 254 ; X86: # %bb.0: 255 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 256 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 257 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 258 ; X86-NEXT: vpshrdvd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x18] 259 ; X86-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0] 260 ; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xe2] 261 ; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x73,0xc2] 262 ; X86-NEXT: vpaddd %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc4] 263 ; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 264 ; X86-NEXT: retl # encoding: [0xc3] 265 ; 266 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512: 267 ; X64: # %bb.0: 268 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 269 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 270 ; X64-NEXT: vpshrdvd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x1f] 271 ; X64-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0] 272 ; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x73,0xe2] 273 ; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xc2] 274 ; X64-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0] 275 ; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 276 ; X64-NEXT: retq # encoding: [0xc3] 277 %x2 = load <16 x i32>, <16 x i32>* %x2p 278 %res = call <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 279 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1) 280 %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 %x3) 281 %res3 = add <16 x i32> %res, %res1 282 %res4 = add <16 x i32> %res2, %res3 283 ret <16 x i32> %res4 284 } 285 286 declare <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 287 declare <8 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 288 289 define <8 x i64>@test_int_x86_avx512_mask_vpshrdv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2p, <8 x i64> %x4, i8 %x3) { 290 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512: 291 ; X86: # %bb.0: 292 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 293 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 294 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 295 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 296 ; X86-NEXT: vpshrdvq (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x18] 297 ; X86-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0] 298 ; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xe2] 299 ; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x73,0xc2] 300 ; X86-NEXT: vpaddq %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc4] 301 ; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] 302 ; X86-NEXT: retl # encoding: [0xc3] 303 ; 304 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512: 305 ; X64: # %bb.0: 306 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 307 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 308 ; X64-NEXT: vpshrdvq (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x1f] 309 ; X64-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0] 310 ; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0xf5,0x48,0x73,0xe2] 311 ; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xc2] 312 ; X64-NEXT: vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0] 313 ; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] 314 ; X64-NEXT: retq # encoding: [0xc3] 315 %x2 = load <8 x i64>, <8 x i64>* %x2p 316 %res = call <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 317 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4, i8 -1) 318 %res2 = call <8 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4, i8 %x3) 319 %res3 = add <8 x i64> %res, %res1 320 %res4 = add <8 x i64> %res2, %res3 321 ret <8 x i64> %res4 322 } 323 324 declare <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 325 declare <32 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 326 327 define <32 x i16>@test_int_x86_avx512_mask_vpshrdv_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>* %x2p, <32 x i16> %x4, i32 %x3) { 328 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512: 329 ; X86: # %bb.0: 330 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 331 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 332 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 333 ; X86-NEXT: vpshrdvw (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x18] 334 ; X86-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0] 335 ; X86-NEXT: vpshrdvw %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xe2] 336 ; X86-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x72,0xc2] 337 ; X86-NEXT: vpaddw %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc4] 338 ; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] 339 ; X86-NEXT: retl # encoding: [0xc3] 340 ; 341 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512: 342 ; X64: # %bb.0: 343 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 344 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 345 ; X64-NEXT: vpshrdvw (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x1f] 346 ; X64-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0] 347 ; X64-NEXT: vpshrdvw %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xe2] 348 ; X64-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x72,0xc2] 349 ; X64-NEXT: vpaddw %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc4] 350 ; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] 351 ; X64-NEXT: retq # encoding: [0xc3] 352 %x2 = load <32 x i16>, <32 x i16>* %x2p 353 %res = call <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 354 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4, i32 -1) 355 %res2 = call <32 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4, i32 %x3) 356 %res3 = add <32 x i16> %res, %res1 357 %res4 = add <32 x i16> %res2, %res3 358 ret <32 x i16> %res4 359 } 360 361 declare <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 362 declare <16 x i32> @llvm.x86.avx512.maskz.vpshldv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 363 364 define <16 x i32>@test_int_x86_avx512_mask_vpshldv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { 365 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_512: 366 ; X86: # %bb.0: 367 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 368 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 369 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 370 ; X86-NEXT: vpshldvd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x18] 371 ; X86-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0] 372 ; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xe2] 373 ; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x71,0xc2] 374 ; X86-NEXT: vpaddd %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc4] 375 ; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 376 ; X86-NEXT: retl # encoding: [0xc3] 377 ; 378 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_512: 379 ; X64: # %bb.0: 380 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 381 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 382 ; X64-NEXT: vpshldvd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x1f] 383 ; X64-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0] 384 ; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x71,0xe2] 385 ; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xc2] 386 ; X64-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0] 387 ; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 388 ; X64-NEXT: retq # encoding: [0xc3] 389 %x2 = load <16 x i32>, <16 x i32>* %x2p 390 %res = call <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 391 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1) 392 %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpshldv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 %x3) 393 %res3 = add <16 x i32> %res, %res1 394 %res4 = add <16 x i32> %res2, %res3 395 ret <16 x i32> %res4 396 } 397 398 declare <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 399 declare <8 x i64> @llvm.x86.avx512.maskz.vpshldv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 400 401 define <8 x i64>@test_int_x86_avx512_mask_vpshldv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2p, <8 x i64> %x4, i8 %x3) { 402 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_512: 403 ; X86: # %bb.0: 404 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 405 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 406 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 407 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 408 ; X86-NEXT: vpshldvq (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x18] 409 ; X86-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0] 410 ; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xe2] 411 ; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x71,0xc2] 412 ; X86-NEXT: vpaddq %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc4] 413 ; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] 414 ; X86-NEXT: retl # encoding: [0xc3] 415 ; 416 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_512: 417 ; X64: # %bb.0: 418 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 419 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 420 ; X64-NEXT: vpshldvq (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x1f] 421 ; X64-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0] 422 ; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0xf5,0x48,0x71,0xe2] 423 ; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xc2] 424 ; X64-NEXT: vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0] 425 ; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] 426 ; X64-NEXT: retq # encoding: [0xc3] 427 %x2 = load <8 x i64>, <8 x i64>* %x2p 428 %res = call <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 429 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4, i8 -1) 430 %res2 = call <8 x i64> @llvm.x86.avx512.maskz.vpshldv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4, i8 %x3) 431 %res3 = add <8 x i64> %res, %res1 432 %res4 = add <8 x i64> %res2, %res3 433 ret <8 x i64> %res4 434 } 435 436 declare <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 437 declare <32 x i16> @llvm.x86.avx512.maskz.vpshldv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 438 439 define <32 x i16>@test_int_x86_avx512_mask_vpshldv_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>* %x2p, <32 x i16> %x4, i32 %x3) { 440 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_512: 441 ; X86: # %bb.0: 442 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 443 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 444 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 445 ; X86-NEXT: vpshldvw (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x18] 446 ; X86-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0] 447 ; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xe2] 448 ; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x70,0xc2] 449 ; X86-NEXT: vpaddw %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc4] 450 ; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] 451 ; X86-NEXT: retl # encoding: [0xc3] 452 ; 453 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_512: 454 ; X64: # %bb.0: 455 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 456 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 457 ; X64-NEXT: vpshldvw (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x1f] 458 ; X64-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0] 459 ; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xe2] 460 ; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x70,0xc2] 461 ; X64-NEXT: vpaddw %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc4] 462 ; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] 463 ; X64-NEXT: retq # encoding: [0xc3] 464 %x2 = load <32 x i16>, <32 x i16>* %x2p 465 %res = call <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 466 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4, i32 -1) 467 %res2 = call <32 x i16> @llvm.x86.avx512.maskz.vpshldv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4, i32 %x3) 468 %res3 = add <32 x i16> %res, %res1 469 %res4 = add <32 x i16> %res2, %res3 470 ret <32 x i16> %res4 471 } 472 473