1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5 define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { 6 ; CHECK-LABEL: test_mask_packs_epi32_rr_512: 7 ; CHECK: # %bb.0: 8 ; CHECK-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1] 9 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) 11 ret <32 x i16> %1 12 } 13 14 define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { 15 ; X86-LABEL: test_mask_packs_epi32_rrk_512: 16 ; X86: # %bb.0: 17 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 18 ; X86-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1] 19 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 20 ; X86-NEXT: retl # encoding: [0xc3] 21 ; 22 ; X64-LABEL: test_mask_packs_epi32_rrk_512: 23 ; X64: # %bb.0: 24 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 25 ; X64-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1] 26 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 27 ; X64-NEXT: retq # encoding: [0xc3] 28 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) 29 %2 = bitcast i32 %mask to <32 x i1> 30 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru 31 ret <32 x i16> %3 32 } 33 34 define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { 35 ; X86-LABEL: test_mask_packs_epi32_rrkz_512: 36 ; X86: # %bb.0: 37 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 38 ; X86-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1] 39 ; X86-NEXT: retl # encoding: [0xc3] 40 ; 41 ; X64-LABEL: test_mask_packs_epi32_rrkz_512: 42 ; X64: # %bb.0: 43 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 44 ; X64-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1] 45 ; X64-NEXT: retq # encoding: [0xc3] 46 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) 47 %2 = bitcast i32 %mask to <32 x i1> 48 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 49 ret <32 x i16> %3 50 } 51 52 define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { 53 ; X86-LABEL: test_mask_packs_epi32_rm_512: 54 ; X86: # %bb.0: 55 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 56 ; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x00] 57 ; X86-NEXT: retl # encoding: [0xc3] 58 ; 59 ; X64-LABEL: test_mask_packs_epi32_rm_512: 60 ; X64: # %bb.0: 61 ; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07] 62 ; X64-NEXT: retq # encoding: [0xc3] 63 %b = load <16 x i32>, <16 x i32>* %ptr_b 64 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) 65 ret <32 x i16> %1 66 } 67 68 define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 69 ; X86-LABEL: test_mask_packs_epi32_rmk_512: 70 ; X86: # %bb.0: 71 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 72 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 73 ; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x08] 74 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 75 ; X86-NEXT: retl # encoding: [0xc3] 76 ; 77 ; X64-LABEL: test_mask_packs_epi32_rmk_512: 78 ; X64: # %bb.0: 79 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 80 ; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f] 81 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 82 ; X64-NEXT: retq # encoding: [0xc3] 83 %b = load <16 x i32>, <16 x i32>* %ptr_b 84 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) 85 %2 = bitcast i32 %mask to <32 x i1> 86 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru 87 ret <32 x i16> %3 88 } 89 90 define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { 91 ; X86-LABEL: test_mask_packs_epi32_rmkz_512: 92 ; X86: # %bb.0: 93 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 94 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 95 ; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x00] 96 ; X86-NEXT: retl # encoding: [0xc3] 97 ; 98 ; X64-LABEL: test_mask_packs_epi32_rmkz_512: 99 ; X64: # %bb.0: 100 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 101 ; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07] 102 ; X64-NEXT: retq # encoding: [0xc3] 103 %b = load <16 x i32>, <16 x i32>* %ptr_b 104 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) 105 %2 = bitcast i32 %mask to <32 x i1> 106 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 107 ret <32 x i16> %3 108 } 109 110 define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { 111 ; X86-LABEL: test_mask_packs_epi32_rmb_512: 112 ; X86: # %bb.0: 113 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 114 ; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x00] 115 ; X86-NEXT: retl # encoding: [0xc3] 116 ; 117 ; X64-LABEL: test_mask_packs_epi32_rmb_512: 118 ; X64: # %bb.0: 119 ; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07] 120 ; X64-NEXT: retq # encoding: [0xc3] 121 %q = load i32, i32* %ptr_b 122 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 123 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 124 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) 125 ret <32 x i16> %1 126 } 127 128 define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { 129 ; X86-LABEL: test_mask_packs_epi32_rmbk_512: 130 ; X86: # %bb.0: 131 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 132 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 133 ; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x08] 134 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 135 ; X86-NEXT: retl # encoding: [0xc3] 136 ; 137 ; X64-LABEL: test_mask_packs_epi32_rmbk_512: 138 ; X64: # %bb.0: 139 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 140 ; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f] 141 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 142 ; X64-NEXT: retq # encoding: [0xc3] 143 %q = load i32, i32* %ptr_b 144 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 145 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 146 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) 147 %2 = bitcast i32 %mask to <32 x i1> 148 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru 149 ret <32 x i16> %3 150 } 151 152 define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { 153 ; X86-LABEL: test_mask_packs_epi32_rmbkz_512: 154 ; X86: # %bb.0: 155 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 156 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 157 ; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x00] 158 ; X86-NEXT: retl # encoding: [0xc3] 159 ; 160 ; X64-LABEL: test_mask_packs_epi32_rmbkz_512: 161 ; X64: # %bb.0: 162 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 163 ; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07] 164 ; X64-NEXT: retq # encoding: [0xc3] 165 %q = load i32, i32* %ptr_b 166 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 167 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 168 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) 169 %2 = bitcast i32 %mask to <32 x i1> 170 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 171 ret <32 x i16> %3 172 } 173 174 declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>) 175 176 define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 177 ; CHECK-LABEL: test_mask_packs_epi16_rr_512: 178 ; CHECK: # %bb.0: 179 ; CHECK-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0xc1] 180 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 181 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) 182 ret <64 x i8> %1 183 } 184 185 define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { 186 ; X86-LABEL: test_mask_packs_epi16_rrk_512: 187 ; X86: # %bb.0: 188 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 189 ; X86-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1] 190 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 191 ; X86-NEXT: retl # encoding: [0xc3] 192 ; 193 ; X64-LABEL: test_mask_packs_epi16_rrk_512: 194 ; X64: # %bb.0: 195 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 196 ; X64-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1] 197 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 198 ; X64-NEXT: retq # encoding: [0xc3] 199 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) 200 %2 = bitcast i64 %mask to <64 x i1> 201 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru 202 ret <64 x i8> %3 203 } 204 205 define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { 206 ; X86-LABEL: test_mask_packs_epi16_rrkz_512: 207 ; X86: # %bb.0: 208 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 209 ; X86-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1] 210 ; X86-NEXT: retl # encoding: [0xc3] 211 ; 212 ; X64-LABEL: test_mask_packs_epi16_rrkz_512: 213 ; X64: # %bb.0: 214 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 215 ; X64-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1] 216 ; X64-NEXT: retq # encoding: [0xc3] 217 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) 218 %2 = bitcast i64 %mask to <64 x i1> 219 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer 220 ret <64 x i8> %3 221 } 222 223 define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 224 ; X86-LABEL: test_mask_packs_epi16_rm_512: 225 ; X86: # %bb.0: 226 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 227 ; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x00] 228 ; X86-NEXT: retl # encoding: [0xc3] 229 ; 230 ; X64-LABEL: test_mask_packs_epi16_rm_512: 231 ; X64: # %bb.0: 232 ; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x07] 233 ; X64-NEXT: retq # encoding: [0xc3] 234 %b = load <32 x i16>, <32 x i16>* %ptr_b 235 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) 236 ret <64 x i8> %1 237 } 238 239 define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { 240 ; X86-LABEL: test_mask_packs_epi16_rmk_512: 241 ; X86: # %bb.0: 242 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 243 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 244 ; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x08] 245 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 246 ; X86-NEXT: retl # encoding: [0xc3] 247 ; 248 ; X64-LABEL: test_mask_packs_epi16_rmk_512: 249 ; X64: # %bb.0: 250 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 251 ; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x0f] 252 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 253 ; X64-NEXT: retq # encoding: [0xc3] 254 %b = load <32 x i16>, <32 x i16>* %ptr_b 255 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) 256 %2 = bitcast i64 %mask to <64 x i1> 257 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru 258 ret <64 x i8> %3 259 } 260 261 define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { 262 ; X86-LABEL: test_mask_packs_epi16_rmkz_512: 263 ; X86: # %bb.0: 264 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 265 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 266 ; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x00] 267 ; X86-NEXT: retl # encoding: [0xc3] 268 ; 269 ; X64-LABEL: test_mask_packs_epi16_rmkz_512: 270 ; X64: # %bb.0: 271 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 272 ; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x07] 273 ; X64-NEXT: retq # encoding: [0xc3] 274 %b = load <32 x i16>, <32 x i16>* %ptr_b 275 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) 276 %2 = bitcast i64 %mask to <64 x i1> 277 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer 278 ret <64 x i8> %3 279 } 280 281 declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>) 282 283 284 define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { 285 ; CHECK-LABEL: test_mask_packus_epi32_rr_512: 286 ; CHECK: # %bb.0: 287 ; CHECK-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0xc1] 288 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 289 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) 290 ret <32 x i16> %1 291 } 292 293 define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { 294 ; X86-LABEL: test_mask_packus_epi32_rrk_512: 295 ; X86: # %bb.0: 296 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 297 ; X86-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1] 298 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 299 ; X86-NEXT: retl # encoding: [0xc3] 300 ; 301 ; X64-LABEL: test_mask_packus_epi32_rrk_512: 302 ; X64: # %bb.0: 303 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 304 ; X64-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1] 305 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 306 ; X64-NEXT: retq # encoding: [0xc3] 307 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) 308 %2 = bitcast i32 %mask to <32 x i1> 309 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru 310 ret <32 x i16> %3 311 } 312 313 define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { 314 ; X86-LABEL: test_mask_packus_epi32_rrkz_512: 315 ; X86: # %bb.0: 316 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 317 ; X86-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1] 318 ; X86-NEXT: retl # encoding: [0xc3] 319 ; 320 ; X64-LABEL: test_mask_packus_epi32_rrkz_512: 321 ; X64: # %bb.0: 322 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 323 ; X64-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1] 324 ; X64-NEXT: retq # encoding: [0xc3] 325 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) 326 %2 = bitcast i32 %mask to <32 x i1> 327 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 328 ret <32 x i16> %3 329 } 330 331 define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { 332 ; X86-LABEL: test_mask_packus_epi32_rm_512: 333 ; X86: # %bb.0: 334 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 335 ; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x00] 336 ; X86-NEXT: retl # encoding: [0xc3] 337 ; 338 ; X64-LABEL: test_mask_packus_epi32_rm_512: 339 ; X64: # %bb.0: 340 ; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x07] 341 ; X64-NEXT: retq # encoding: [0xc3] 342 %b = load <16 x i32>, <16 x i32>* %ptr_b 343 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) 344 ret <32 x i16> %1 345 } 346 347 define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 348 ; X86-LABEL: test_mask_packus_epi32_rmk_512: 349 ; X86: # %bb.0: 350 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 351 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 352 ; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x08] 353 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 354 ; X86-NEXT: retl # encoding: [0xc3] 355 ; 356 ; X64-LABEL: test_mask_packus_epi32_rmk_512: 357 ; X64: # %bb.0: 358 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 359 ; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x0f] 360 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 361 ; X64-NEXT: retq # encoding: [0xc3] 362 %b = load <16 x i32>, <16 x i32>* %ptr_b 363 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) 364 %2 = bitcast i32 %mask to <32 x i1> 365 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru 366 ret <32 x i16> %3 367 } 368 369 define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { 370 ; X86-LABEL: test_mask_packus_epi32_rmkz_512: 371 ; X86: # %bb.0: 372 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 373 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 374 ; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x00] 375 ; X86-NEXT: retl # encoding: [0xc3] 376 ; 377 ; X64-LABEL: test_mask_packus_epi32_rmkz_512: 378 ; X64: # %bb.0: 379 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 380 ; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x07] 381 ; X64-NEXT: retq # encoding: [0xc3] 382 %b = load <16 x i32>, <16 x i32>* %ptr_b 383 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) 384 %2 = bitcast i32 %mask to <32 x i1> 385 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 386 ret <32 x i16> %3 387 } 388 389 define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { 390 ; X86-LABEL: test_mask_packus_epi32_rmb_512: 391 ; X86: # %bb.0: 392 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 393 ; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x00] 394 ; X86-NEXT: retl # encoding: [0xc3] 395 ; 396 ; X64-LABEL: test_mask_packus_epi32_rmb_512: 397 ; X64: # %bb.0: 398 ; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x07] 399 ; X64-NEXT: retq # encoding: [0xc3] 400 %q = load i32, i32* %ptr_b 401 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 402 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 403 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) 404 ret <32 x i16> %1 405 } 406 407 define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { 408 ; X86-LABEL: test_mask_packus_epi32_rmbk_512: 409 ; X86: # %bb.0: 410 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 411 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 412 ; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x08] 413 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 414 ; X86-NEXT: retl # encoding: [0xc3] 415 ; 416 ; X64-LABEL: test_mask_packus_epi32_rmbk_512: 417 ; X64: # %bb.0: 418 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 419 ; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x0f] 420 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 421 ; X64-NEXT: retq # encoding: [0xc3] 422 %q = load i32, i32* %ptr_b 423 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 424 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 425 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) 426 %2 = bitcast i32 %mask to <32 x i1> 427 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru 428 ret <32 x i16> %3 429 } 430 431 define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { 432 ; X86-LABEL: test_mask_packus_epi32_rmbkz_512: 433 ; X86: # %bb.0: 434 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 435 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 436 ; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x00] 437 ; X86-NEXT: retl # encoding: [0xc3] 438 ; 439 ; X64-LABEL: test_mask_packus_epi32_rmbkz_512: 440 ; X64: # %bb.0: 441 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 442 ; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x07] 443 ; X64-NEXT: retq # encoding: [0xc3] 444 %q = load i32, i32* %ptr_b 445 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 446 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 447 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) 448 %2 = bitcast i32 %mask to <32 x i1> 449 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 450 ret <32 x i16> %3 451 } 452 453 declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>) 454 455 define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 456 ; CHECK-LABEL: test_mask_packus_epi16_rr_512: 457 ; CHECK: # %bb.0: 458 ; CHECK-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0xc1] 459 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 460 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) 461 ret <64 x i8> %1 462 } 463 464 define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { 465 ; X86-LABEL: test_mask_packus_epi16_rrk_512: 466 ; X86: # %bb.0: 467 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 468 ; X86-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1] 469 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 470 ; X86-NEXT: retl # encoding: [0xc3] 471 ; 472 ; X64-LABEL: test_mask_packus_epi16_rrk_512: 473 ; X64: # %bb.0: 474 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 475 ; X64-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1] 476 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 477 ; X64-NEXT: retq # encoding: [0xc3] 478 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) 479 %2 = bitcast i64 %mask to <64 x i1> 480 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru 481 ret <64 x i8> %3 482 } 483 484 define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { 485 ; X86-LABEL: test_mask_packus_epi16_rrkz_512: 486 ; X86: # %bb.0: 487 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 488 ; X86-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1] 489 ; X86-NEXT: retl # encoding: [0xc3] 490 ; 491 ; X64-LABEL: test_mask_packus_epi16_rrkz_512: 492 ; X64: # %bb.0: 493 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 494 ; X64-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1] 495 ; X64-NEXT: retq # encoding: [0xc3] 496 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) 497 %2 = bitcast i64 %mask to <64 x i1> 498 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer 499 ret <64 x i8> %3 500 } 501 502 define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 503 ; X86-LABEL: test_mask_packus_epi16_rm_512: 504 ; X86: # %bb.0: 505 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 506 ; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x00] 507 ; X86-NEXT: retl # encoding: [0xc3] 508 ; 509 ; X64-LABEL: test_mask_packus_epi16_rm_512: 510 ; X64: # %bb.0: 511 ; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x07] 512 ; X64-NEXT: retq # encoding: [0xc3] 513 %b = load <32 x i16>, <32 x i16>* %ptr_b 514 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) 515 ret <64 x i8> %1 516 } 517 518 define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { 519 ; X86-LABEL: test_mask_packus_epi16_rmk_512: 520 ; X86: # %bb.0: 521 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 522 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 523 ; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x08] 524 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 525 ; X86-NEXT: retl # encoding: [0xc3] 526 ; 527 ; X64-LABEL: test_mask_packus_epi16_rmk_512: 528 ; X64: # %bb.0: 529 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 530 ; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x0f] 531 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 532 ; X64-NEXT: retq # encoding: [0xc3] 533 %b = load <32 x i16>, <32 x i16>* %ptr_b 534 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) 535 %2 = bitcast i64 %mask to <64 x i1> 536 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru 537 ret <64 x i8> %3 538 } 539 540 define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { 541 ; X86-LABEL: test_mask_packus_epi16_rmkz_512: 542 ; X86: # %bb.0: 543 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 544 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 545 ; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x00] 546 ; X86-NEXT: retl # encoding: [0xc3] 547 ; 548 ; X64-LABEL: test_mask_packus_epi16_rmkz_512: 549 ; X64: # %bb.0: 550 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 551 ; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x07] 552 ; X64-NEXT: retq # encoding: [0xc3] 553 %b = load <32 x i16>, <32 x i16>* %ptr_b 554 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) 555 %2 = bitcast i64 %mask to <64 x i1> 556 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer 557 ret <64 x i8> %3 558 } 559 560 declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>) 561 562 define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 563 ; CHECK-LABEL: test_mask_adds_epi16_rr_512: 564 ; CHECK: # %bb.0: 565 ; CHECK-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0xc1] 566 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 567 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 568 ret <32 x i16> %res 569 } 570 571 define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 572 ; X86-LABEL: test_mask_adds_epi16_rrk_512: 573 ; X86: # %bb.0: 574 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 575 ; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1] 576 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 577 ; X86-NEXT: retl # encoding: [0xc3] 578 ; 579 ; X64-LABEL: test_mask_adds_epi16_rrk_512: 580 ; X64: # %bb.0: 581 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 582 ; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1] 583 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 584 ; X64-NEXT: retq # encoding: [0xc3] 585 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 586 ret <32 x i16> %res 587 } 588 589 define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 590 ; X86-LABEL: test_mask_adds_epi16_rrkz_512: 591 ; X86: # %bb.0: 592 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 593 ; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1] 594 ; X86-NEXT: retl # encoding: [0xc3] 595 ; 596 ; X64-LABEL: test_mask_adds_epi16_rrkz_512: 597 ; X64: # %bb.0: 598 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 599 ; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1] 600 ; X64-NEXT: retq # encoding: [0xc3] 601 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 602 ret <32 x i16> %res 603 } 604 605 define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 606 ; X86-LABEL: test_mask_adds_epi16_rm_512: 607 ; X86: # %bb.0: 608 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 609 ; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x00] 610 ; X86-NEXT: retl # encoding: [0xc3] 611 ; 612 ; X64-LABEL: test_mask_adds_epi16_rm_512: 613 ; X64: # %bb.0: 614 ; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x07] 615 ; X64-NEXT: retq # encoding: [0xc3] 616 %b = load <32 x i16>, <32 x i16>* %ptr_b 617 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 618 ret <32 x i16> %res 619 } 620 621 define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 622 ; X86-LABEL: test_mask_adds_epi16_rmk_512: 623 ; X86: # %bb.0: 624 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 625 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 626 ; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x08] 627 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 628 ; X86-NEXT: retl # encoding: [0xc3] 629 ; 630 ; X64-LABEL: test_mask_adds_epi16_rmk_512: 631 ; X64: # %bb.0: 632 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 633 ; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x0f] 634 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 635 ; X64-NEXT: retq # encoding: [0xc3] 636 %b = load <32 x i16>, <32 x i16>* %ptr_b 637 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 638 ret <32 x i16> %res 639 } 640 641 define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 642 ; X86-LABEL: test_mask_adds_epi16_rmkz_512: 643 ; X86: # %bb.0: 644 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 645 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 646 ; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x00] 647 ; X86-NEXT: retl # encoding: [0xc3] 648 ; 649 ; X64-LABEL: test_mask_adds_epi16_rmkz_512: 650 ; X64: # %bb.0: 651 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 652 ; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x07] 653 ; X64-NEXT: retq # encoding: [0xc3] 654 %b = load <32 x i16>, <32 x i16>* %ptr_b 655 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 656 ret <32 x i16> %res 657 } 658 659 declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 660 661 define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 662 ; CHECK-LABEL: test_mask_subs_epi16_rr_512: 663 ; CHECK: # %bb.0: 664 ; CHECK-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0xc1] 665 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 666 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 667 ret <32 x i16> %res 668 } 669 670 define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 671 ; X86-LABEL: test_mask_subs_epi16_rrk_512: 672 ; X86: # %bb.0: 673 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 674 ; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1] 675 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 676 ; X86-NEXT: retl # encoding: [0xc3] 677 ; 678 ; X64-LABEL: test_mask_subs_epi16_rrk_512: 679 ; X64: # %bb.0: 680 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 681 ; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1] 682 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 683 ; X64-NEXT: retq # encoding: [0xc3] 684 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 685 ret <32 x i16> %res 686 } 687 688 define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 689 ; X86-LABEL: test_mask_subs_epi16_rrkz_512: 690 ; X86: # %bb.0: 691 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 692 ; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1] 693 ; X86-NEXT: retl # encoding: [0xc3] 694 ; 695 ; X64-LABEL: test_mask_subs_epi16_rrkz_512: 696 ; X64: # %bb.0: 697 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 698 ; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1] 699 ; X64-NEXT: retq # encoding: [0xc3] 700 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 701 ret <32 x i16> %res 702 } 703 704 define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 705 ; X86-LABEL: test_mask_subs_epi16_rm_512: 706 ; X86: # %bb.0: 707 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 708 ; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x00] 709 ; X86-NEXT: retl # encoding: [0xc3] 710 ; 711 ; X64-LABEL: test_mask_subs_epi16_rm_512: 712 ; X64: # %bb.0: 713 ; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x07] 714 ; X64-NEXT: retq # encoding: [0xc3] 715 %b = load <32 x i16>, <32 x i16>* %ptr_b 716 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 717 ret <32 x i16> %res 718 } 719 720 define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 721 ; X86-LABEL: test_mask_subs_epi16_rmk_512: 722 ; X86: # %bb.0: 723 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 724 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 725 ; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x08] 726 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 727 ; X86-NEXT: retl # encoding: [0xc3] 728 ; 729 ; X64-LABEL: test_mask_subs_epi16_rmk_512: 730 ; X64: # %bb.0: 731 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 732 ; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x0f] 733 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 734 ; X64-NEXT: retq # encoding: [0xc3] 735 %b = load <32 x i16>, <32 x i16>* %ptr_b 736 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 737 ret <32 x i16> %res 738 } 739 740 define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 741 ; X86-LABEL: test_mask_subs_epi16_rmkz_512: 742 ; X86: # %bb.0: 743 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 744 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 745 ; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x00] 746 ; X86-NEXT: retl # encoding: [0xc3] 747 ; 748 ; X64-LABEL: test_mask_subs_epi16_rmkz_512: 749 ; X64: # %bb.0: 750 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 751 ; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x07] 752 ; X64-NEXT: retq # encoding: [0xc3] 753 %b = load <32 x i16>, <32 x i16>* %ptr_b 754 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 755 ret <32 x i16> %res 756 } 757 758 declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 759 760 define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { 761 ; CHECK-LABEL: test_mask_adds_epu16_rr_512: 762 ; CHECK: # %bb.0: 763 ; CHECK-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0xc1] 764 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 765 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 766 ret <32 x i16> %res 767 } 768 769 define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 770 ; X86-LABEL: test_mask_adds_epu16_rrk_512: 771 ; X86: # %bb.0: 772 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 773 ; X86-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0xd1] 774 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 775 ; X86-NEXT: retl # encoding: [0xc3] 776 ; 777 ; X64-LABEL: test_mask_adds_epu16_rrk_512: 778 ; X64: # %bb.0: 779 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 780 ; X64-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0xd1] 781 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 782 ; X64-NEXT: retq # encoding: [0xc3] 783 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 784 ret <32 x i16> %res 785 } 786 787 define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 788 ; X86-LABEL: test_mask_adds_epu16_rrkz_512: 789 ; X86: # %bb.0: 790 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 791 ; X86-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0xc1] 792 ; X86-NEXT: retl # encoding: [0xc3] 793 ; 794 ; X64-LABEL: test_mask_adds_epu16_rrkz_512: 795 ; X64: # %bb.0: 796 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 797 ; X64-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0xc1] 798 ; X64-NEXT: retq # encoding: [0xc3] 799 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 800 ret <32 x i16> %res 801 } 802 803 define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 804 ; X86-LABEL: test_mask_adds_epu16_rm_512: 805 ; X86: # %bb.0: 806 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 807 ; X86-NEXT: vpaddusw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0x00] 808 ; X86-NEXT: retl # encoding: [0xc3] 809 ; 810 ; X64-LABEL: test_mask_adds_epu16_rm_512: 811 ; X64: # %bb.0: 812 ; X64-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0x07] 813 ; X64-NEXT: retq # encoding: [0xc3] 814 %b = load <32 x i16>, <32 x i16>* %ptr_b 815 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 816 ret <32 x i16> %res 817 } 818 819 define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 820 ; X86-LABEL: test_mask_adds_epu16_rmk_512: 821 ; X86: # %bb.0: 822 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 823 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 824 ; X86-NEXT: vpaddusw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0x08] 825 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 826 ; X86-NEXT: retl # encoding: [0xc3] 827 ; 828 ; X64-LABEL: test_mask_adds_epu16_rmk_512: 829 ; X64: # %bb.0: 830 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 831 ; X64-NEXT: vpaddusw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0x0f] 832 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 833 ; X64-NEXT: retq # encoding: [0xc3] 834 %b = load <32 x i16>, <32 x i16>* %ptr_b 835 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 836 ret <32 x i16> %res 837 } 838 839 define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 840 ; X86-LABEL: test_mask_adds_epu16_rmkz_512: 841 ; X86: # %bb.0: 842 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 843 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 844 ; X86-NEXT: vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0x00] 845 ; X86-NEXT: retl # encoding: [0xc3] 846 ; 847 ; X64-LABEL: test_mask_adds_epu16_rmkz_512: 848 ; X64: # %bb.0: 849 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 850 ; X64-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0x07] 851 ; X64-NEXT: retq # encoding: [0xc3] 852 %b = load <32 x i16>, <32 x i16>* %ptr_b 853 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 854 ret <32 x i16> %res 855 } 856 857 declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 858 859 define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { 860 ; CHECK-LABEL: test_mask_subs_epu16_rr_512: 861 ; CHECK: # %bb.0: 862 ; CHECK-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0xc1] 863 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 864 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 865 ret <32 x i16> %res 866 } 867 868 define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 869 ; X86-LABEL: test_mask_subs_epu16_rrk_512: 870 ; X86: # %bb.0: 871 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 872 ; X86-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0xd1] 873 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 874 ; X86-NEXT: retl # encoding: [0xc3] 875 ; 876 ; X64-LABEL: test_mask_subs_epu16_rrk_512: 877 ; X64: # %bb.0: 878 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 879 ; X64-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0xd1] 880 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 881 ; X64-NEXT: retq # encoding: [0xc3] 882 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 883 ret <32 x i16> %res 884 } 885 886 define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 887 ; X86-LABEL: test_mask_subs_epu16_rrkz_512: 888 ; X86: # %bb.0: 889 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 890 ; X86-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0xc1] 891 ; X86-NEXT: retl # encoding: [0xc3] 892 ; 893 ; X64-LABEL: test_mask_subs_epu16_rrkz_512: 894 ; X64: # %bb.0: 895 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 896 ; X64-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0xc1] 897 ; X64-NEXT: retq # encoding: [0xc3] 898 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 899 ret <32 x i16> %res 900 } 901 902 define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 903 ; X86-LABEL: test_mask_subs_epu16_rm_512: 904 ; X86: # %bb.0: 905 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 906 ; X86-NEXT: vpsubusw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0x00] 907 ; X86-NEXT: retl # encoding: [0xc3] 908 ; 909 ; X64-LABEL: test_mask_subs_epu16_rm_512: 910 ; X64: # %bb.0: 911 ; X64-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0x07] 912 ; X64-NEXT: retq # encoding: [0xc3] 913 %b = load <32 x i16>, <32 x i16>* %ptr_b 914 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 915 ret <32 x i16> %res 916 } 917 918 define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 919 ; X86-LABEL: test_mask_subs_epu16_rmk_512: 920 ; X86: # %bb.0: 921 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 922 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 923 ; X86-NEXT: vpsubusw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0x08] 924 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 925 ; X86-NEXT: retl # encoding: [0xc3] 926 ; 927 ; X64-LABEL: test_mask_subs_epu16_rmk_512: 928 ; X64: # %bb.0: 929 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 930 ; X64-NEXT: vpsubusw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0x0f] 931 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 932 ; X64-NEXT: retq # encoding: [0xc3] 933 %b = load <32 x i16>, <32 x i16>* %ptr_b 934 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 935 ret <32 x i16> %res 936 } 937 938 define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 939 ; X86-LABEL: test_mask_subs_epu16_rmkz_512: 940 ; X86: # %bb.0: 941 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 942 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 943 ; X86-NEXT: vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0x00] 944 ; X86-NEXT: retl # encoding: [0xc3] 945 ; 946 ; X64-LABEL: test_mask_subs_epu16_rmkz_512: 947 ; X64: # %bb.0: 948 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 949 ; X64-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0x07] 950 ; X64-NEXT: retq # encoding: [0xc3] 951 %b = load <32 x i16>, <32 x i16>* %ptr_b 952 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 953 ret <32 x i16> %res 954 } 955 956 declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 957 958 define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 959 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: 960 ; X86: # %bb.0: 961 ; X86-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] 962 ; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda] 963 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 964 ; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca] 965 ; X86-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] 966 ; X86-NEXT: retl # encoding: [0xc3] 967 ; 968 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: 969 ; X64: # %bb.0: 970 ; X64-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] 971 ; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda] 972 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 973 ; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca] 974 ; X64-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] 975 ; X64-NEXT: retq # encoding: [0xc3] 976 %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) 977 %2 = bitcast i32 %x3 to <32 x i1> 978 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1 979 %4 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) 980 %res2 = add <32 x i16> %3, %4 981 ret <32 x i16> %res2 982 } 983 984 define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 985 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: 986 ; X86: # %bb.0: 987 ; X86-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] 988 ; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda] 989 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 990 ; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x7d,0xca] 991 ; X86-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] 992 ; X86-NEXT: retl # encoding: [0xc3] 993 ; 994 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: 995 ; X64: # %bb.0: 996 ; X64-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] 997 ; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda] 998 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 999 ; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x7d,0xca] 1000 ; X64-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] 1001 ; X64-NEXT: retq # encoding: [0xc3] 1002 %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) 1003 %2 = bitcast i32 %x3 to <32 x i1> 1004 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 1005 %4 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) 1006 %res2 = add <32 x i16> %3, %4 1007 ret <32 x i16> %res2 1008 } 1009 1010 declare <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>) 1011 1012 define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1013 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: 1014 ; X86: # %bb.0: 1015 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 1016 ; X86-NEXT: vpermt2w %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xda] 1017 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1018 ; X86-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca] 1019 ; X86-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] 1020 ; X86-NEXT: retl # encoding: [0xc3] 1021 ; 1022 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: 1023 ; X64: # %bb.0: 1024 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 1025 ; X64-NEXT: vpermt2w %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xda] 1026 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1027 ; X64-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca] 1028 ; X64-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] 1029 ; X64-NEXT: retq # encoding: [0xc3] 1030 %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) 1031 %2 = bitcast i32 %x3 to <32 x i1> 1032 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1 1033 %4 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) 1034 %res2 = add <32 x i16> %3, %4 1035 ret <32 x i16> %res2 1036 } 1037 1038 declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>) 1039 1040 define <64 x i8>@test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1) { 1041 ; CHECK-LABEL: test_int_x86_avx512_pshuf_b_512: 1042 ; CHECK: # %bb.0: 1043 ; CHECK-NEXT: vpshufb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x00,0xc1] 1044 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1045 %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) 1046 ret <64 x i8> %res 1047 } 1048 1049 define <64 x i8>@test_int_x86_avx512_pshuf_b_512_mask(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %mask) { 1050 ; X86-LABEL: test_int_x86_avx512_pshuf_b_512_mask: 1051 ; X86: # %bb.0: 1052 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 1053 ; X86-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1] 1054 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1055 ; X86-NEXT: retl # encoding: [0xc3] 1056 ; 1057 ; X64-LABEL: test_int_x86_avx512_pshuf_b_512_mask: 1058 ; X64: # %bb.0: 1059 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 1060 ; X64-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1] 1061 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1062 ; X64-NEXT: retq # encoding: [0xc3] 1063 %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) 1064 %mask.cast = bitcast i64 %mask to <64 x i1> 1065 %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> %x2 1066 ret <64 x i8> %res2 1067 } 1068 1069 define <64 x i8>@test_int_x86_avx512_pshuf_b_512_maskz(<64 x i8> %x0, <64 x i8> %x1, i64 %mask) { 1070 ; X86-LABEL: test_int_x86_avx512_pshuf_b_512_maskz: 1071 ; X86: # %bb.0: 1072 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 1073 ; X86-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x00,0xc1] 1074 ; X86-NEXT: retl # encoding: [0xc3] 1075 ; 1076 ; X64-LABEL: test_int_x86_avx512_pshuf_b_512_maskz: 1077 ; X64: # %bb.0: 1078 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 1079 ; X64-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x00,0xc1] 1080 ; X64-NEXT: retq # encoding: [0xc3] 1081 %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) 1082 %mask.cast = bitcast i64 %mask to <64 x i1> 1083 %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> zeroinitializer 1084 ret <64 x i8> %res2 1085 } 1086 1087 declare <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16>, <32 x i16>) 1088 1089 define <32 x i16> @test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1090 ; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: 1091 ; X86: # %bb.0: 1092 ; X86-NEXT: vpmulhuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xd9] 1093 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1094 ; X86-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1] 1095 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 1096 ; X86-NEXT: retl # encoding: [0xc3] 1097 ; 1098 ; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: 1099 ; X64: # %bb.0: 1100 ; X64-NEXT: vpmulhuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xd9] 1101 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1102 ; X64-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1] 1103 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 1104 ; X64-NEXT: retq # encoding: [0xc3] 1105 %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1) 1106 %2 = bitcast i32 %x3 to <32 x i1> 1107 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 1108 %4 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1) 1109 %res2 = add <32 x i16> %3, %4 1110 ret <32 x i16> %res2 1111 } 1112 1113 declare <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16>, <32 x i16>) 1114 1115 define <32 x i16> @test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1116 ; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_512: 1117 ; X86: # %bb.0: 1118 ; X86-NEXT: vpmulhw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xd9] 1119 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1120 ; X86-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1] 1121 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 1122 ; X86-NEXT: retl # encoding: [0xc3] 1123 ; 1124 ; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_512: 1125 ; X64: # %bb.0: 1126 ; X64-NEXT: vpmulhw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xd9] 1127 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1128 ; X64-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1] 1129 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 1130 ; X64-NEXT: retq # encoding: [0xc3] 1131 %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1) 1132 %2 = bitcast i32 %x3 to <32 x i1> 1133 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 1134 %4 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1) 1135 %res2 = add <32 x i16> %3, %4 1136 ret <32 x i16> %res2 1137 } 1138 1139 declare <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16>, <32 x i16>) 1140 1141 define <32 x i16> @test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1142 ; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: 1143 ; X86: # %bb.0: 1144 ; X86-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xd9] 1145 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1146 ; X86-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1] 1147 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 1148 ; X86-NEXT: retl # encoding: [0xc3] 1149 ; 1150 ; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: 1151 ; X64: # %bb.0: 1152 ; X64-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xd9] 1153 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1154 ; X64-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1] 1155 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 1156 ; X64-NEXT: retq # encoding: [0xc3] 1157 %1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1) 1158 %2 = bitcast i32 %x3 to <32 x i1> 1159 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 1160 %4 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1) 1161 %res2 = add <32 x i16> %3, %4 1162 ret <32 x i16> %res2 1163 } 1164 1165 declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32) 1166 1167 define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { 1168 ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_512: 1169 ; X86: # %bb.0: 1170 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1171 ; X86-NEXT: vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1] 1172 ; X86-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc2] 1173 ; X86-NEXT: vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca] 1174 ; X86-NEXT: vpmovwb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc0] 1175 ; X86-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1] 1176 ; X86-NEXT: retl # encoding: [0xc3] 1177 ; 1178 ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_512: 1179 ; X64: # %bb.0: 1180 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1181 ; X64-NEXT: vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1] 1182 ; X64-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc2] 1183 ; X64-NEXT: vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca] 1184 ; X64-NEXT: vpmovwb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc0] 1185 ; X64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1] 1186 ; X64-NEXT: retq # encoding: [0xc3] 1187 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) 1188 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) 1189 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) 1190 %res3 = add <32 x i8> %res0, %res1 1191 %res4 = add <32 x i8> %res3, %res2 1192 ret <32 x i8> %res4 1193 } 1194 1195 declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32) 1196 1197 define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { 1198 ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512: 1199 ; X86: # %bb.0: 1200 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1201 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1202 ; X86-NEXT: vpmovwb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x30,0x00] 1203 ; X86-NEXT: vpmovwb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0x00] 1204 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1205 ; X86-NEXT: retl # encoding: [0xc3] 1206 ; 1207 ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512: 1208 ; X64: # %bb.0: 1209 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1210 ; X64-NEXT: vpmovwb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x30,0x07] 1211 ; X64-NEXT: vpmovwb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0x07] 1212 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1213 ; X64-NEXT: retq # encoding: [0xc3] 1214 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) 1215 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) 1216 ret void 1217 } 1218 1219 declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32) 1220 1221 define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { 1222 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_512: 1223 ; X86: # %bb.0: 1224 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1225 ; X86-NEXT: vpmovswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0xc1] 1226 ; X86-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x20,0xc2] 1227 ; X86-NEXT: vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca] 1228 ; X86-NEXT: vpmovswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x20,0xc0] 1229 ; X86-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1] 1230 ; X86-NEXT: retl # encoding: [0xc3] 1231 ; 1232 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_512: 1233 ; X64: # %bb.0: 1234 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1235 ; X64-NEXT: vpmovswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0xc1] 1236 ; X64-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x20,0xc2] 1237 ; X64-NEXT: vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca] 1238 ; X64-NEXT: vpmovswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x20,0xc0] 1239 ; X64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1] 1240 ; X64-NEXT: retq # encoding: [0xc3] 1241 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) 1242 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) 1243 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) 1244 %res3 = add <32 x i8> %res0, %res1 1245 %res4 = add <32 x i8> %res3, %res2 1246 ret <32 x i8> %res4 1247 } 1248 1249 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32) 1250 1251 define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { 1252 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: 1253 ; X86: # %bb.0: 1254 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1255 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1256 ; X86-NEXT: vpmovswb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x20,0x00] 1257 ; X86-NEXT: vpmovswb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0x00] 1258 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1259 ; X86-NEXT: retl # encoding: [0xc3] 1260 ; 1261 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: 1262 ; X64: # %bb.0: 1263 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1264 ; X64-NEXT: vpmovswb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x20,0x07] 1265 ; X64-NEXT: vpmovswb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0x07] 1266 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1267 ; X64-NEXT: retq # encoding: [0xc3] 1268 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) 1269 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) 1270 ret void 1271 } 1272 1273 declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32) 1274 1275 define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { 1276 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_512: 1277 ; X86: # %bb.0: 1278 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1279 ; X86-NEXT: vpmovuswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0xc1] 1280 ; X86-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x10,0xc2] 1281 ; X86-NEXT: vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca] 1282 ; X86-NEXT: vpmovuswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x10,0xc0] 1283 ; X86-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1] 1284 ; X86-NEXT: retl # encoding: [0xc3] 1285 ; 1286 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_512: 1287 ; X64: # %bb.0: 1288 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1289 ; X64-NEXT: vpmovuswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0xc1] 1290 ; X64-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x10,0xc2] 1291 ; X64-NEXT: vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca] 1292 ; X64-NEXT: vpmovuswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x10,0xc0] 1293 ; X64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1] 1294 ; X64-NEXT: retq # encoding: [0xc3] 1295 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) 1296 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) 1297 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) 1298 %res3 = add <32 x i8> %res0, %res1 1299 %res4 = add <32 x i8> %res3, %res2 1300 ret <32 x i8> %res4 1301 } 1302 1303 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32) 1304 1305 define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { 1306 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: 1307 ; X86: # %bb.0: 1308 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1309 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1310 ; X86-NEXT: vpmovuswb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x10,0x00] 1311 ; X86-NEXT: vpmovuswb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0x00] 1312 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1313 ; X86-NEXT: retl # encoding: [0xc3] 1314 ; 1315 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: 1316 ; X64: # %bb.0: 1317 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1318 ; X64-NEXT: vpmovuswb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x10,0x07] 1319 ; X64-NEXT: vpmovuswb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0x07] 1320 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1321 ; X64-NEXT: retq # encoding: [0xc3] 1322 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) 1323 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) 1324 ret void 1325 } 1326 1327 declare <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>) 1328 1329 define <32 x i16> @test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) { 1330 ; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: 1331 ; X86: # %bb.0: 1332 ; X86-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xd9] 1333 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1334 ; X86-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1] 1335 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 1336 ; X86-NEXT: retl # encoding: [0xc3] 1337 ; 1338 ; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: 1339 ; X64: # %bb.0: 1340 ; X64-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xd9] 1341 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1342 ; X64-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1] 1343 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 1344 ; X64-NEXT: retq # encoding: [0xc3] 1345 %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1) 1346 %2 = bitcast i32 %x3 to <32 x i1> 1347 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 1348 %4 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1) 1349 %res2 = add <32 x i16> %3, %4 1350 ret <32 x i16> %res2 1351 } 1352 1353 declare <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16>, <32 x i16>) 1354 1355 define <16 x i32> @test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) { 1356 ; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: 1357 ; X86: # %bb.0: 1358 ; X86-NEXT: vpmaddwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xd9] 1359 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1360 ; X86-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1] 1361 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 1362 ; X86-NEXT: retl # encoding: [0xc3] 1363 ; 1364 ; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: 1365 ; X64: # %bb.0: 1366 ; X64-NEXT: vpmaddwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xd9] 1367 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1368 ; X64-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1] 1369 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 1370 ; X64-NEXT: retq # encoding: [0xc3] 1371 %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1) 1372 %2 = bitcast i16 %x3 to <16 x i1> 1373 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 1374 %4 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1) 1375 %res2 = add <16 x i32> %3, %4 1376 ret <16 x i32> %res2 1377 } 1378 1379 declare <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8>, <64 x i8>, i32) 1380 1381 define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) { 1382 ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: 1383 ; X86: # %bb.0: 1384 ; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x02] 1385 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1386 ; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] 1387 ; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xc1,0x02] 1388 ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] 1389 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1390 ; X86-NEXT: retl # encoding: [0xc3] 1391 ; 1392 ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: 1393 ; X64: # %bb.0: 1394 ; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x02] 1395 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1396 ; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] 1397 ; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xc1,0x02] 1398 ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] 1399 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1400 ; X64-NEXT: retq # encoding: [0xc3] 1401 %1 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2) 1402 %2 = bitcast i32 %x4 to <32 x i1> 1403 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3 1404 %4 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2) 1405 %5 = bitcast i32 %x4 to <32 x i1> 1406 %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer 1407 %7 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2) 1408 %res3 = add <32 x i16> %3, %6 1409 %res4 = add <32 x i16> %res3, %7 1410 ret <32 x i16> %res4 1411 } 1412 1413 declare <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>) 1414 1415 define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){ 1416 ; CHECK-LABEL: test_int_x86_avx512_mask_psadb_w_512: 1417 ; CHECK: # %bb.0: 1418 ; CHECK-NEXT: vpsadbw %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xc9] 1419 ; CHECK-NEXT: vpsadbw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xc2] 1420 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 1421 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1422 %res = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1) 1423 %res1 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2) 1424 %res2 = add <8 x i64> %res, %res1 1425 ret <8 x i64> %res2 1426 } 1427 1428 declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1429 1430 define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1431 ; X86-LABEL: test_int_x86_avx512_mask_psrlv32hi: 1432 ; X86: # %bb.0: 1433 ; X86-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0xd9] 1434 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1435 ; X86-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1] 1436 ; X86-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1] 1437 ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] 1438 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1439 ; X86-NEXT: retl # encoding: [0xc3] 1440 ; 1441 ; X64-LABEL: test_int_x86_avx512_mask_psrlv32hi: 1442 ; X64: # %bb.0: 1443 ; X64-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0xd9] 1444 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1445 ; X64-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1] 1446 ; X64-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1] 1447 ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] 1448 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1449 ; X64-NEXT: retq # encoding: [0xc3] 1450 %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 1451 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 1452 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 1453 %res3 = add <32 x i16> %res, %res1 1454 %res4 = add <32 x i16> %res3, %res2 1455 ret <32 x i16> %res4 1456 } 1457 1458 declare <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1459 1460 define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1461 ; X86-LABEL: test_int_x86_avx512_mask_psrav32_hi: 1462 ; X86: # %bb.0: 1463 ; X86-NEXT: vpsravw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0xd9] 1464 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1465 ; X86-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1] 1466 ; X86-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1] 1467 ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] 1468 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1469 ; X86-NEXT: retl # encoding: [0xc3] 1470 ; 1471 ; X64-LABEL: test_int_x86_avx512_mask_psrav32_hi: 1472 ; X64: # %bb.0: 1473 ; X64-NEXT: vpsravw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0xd9] 1474 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1475 ; X64-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1] 1476 ; X64-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1] 1477 ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] 1478 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1479 ; X64-NEXT: retq # encoding: [0xc3] 1480 %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 1481 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 1482 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 1483 %res3 = add <32 x i16> %res, %res1 1484 %res4 = add <32 x i16> %res3, %res2 1485 ret <32 x i16> %res4 1486 } 1487 1488 define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi_const(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1489 ; X86-LABEL: test_int_x86_avx512_mask_psrav32_hi_const: 1490 ; X86: # %bb.0: 1491 ; X86-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51] 1492 ; X86-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] 1493 ; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 1494 ; X86-NEXT: vpsravw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0x05,A,A,A,A] 1495 ; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 1496 ; X86-NEXT: retl # encoding: [0xc3] 1497 ; 1498 ; X64-LABEL: test_int_x86_avx512_mask_psrav32_hi_const: 1499 ; X64: # %bb.0: 1500 ; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51] 1501 ; X64-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] 1502 ; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1503 ; X64-NEXT: vpsravw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0x05,A,A,A,A] 1504 ; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1505 ; X64-NEXT: retq # encoding: [0xc3] 1506 %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> <i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51>, 1507 <32 x i16> <i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49>, 1508 <32 x i16> zeroinitializer, i32 -1) 1509 ret <32 x i16> %res 1510 } 1511 1512 declare <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1513 1514 define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1515 ; X86-LABEL: test_int_x86_avx512_mask_psllv32hi: 1516 ; X86: # %bb.0: 1517 ; X86-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0xd9] 1518 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1519 ; X86-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1] 1520 ; X86-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1] 1521 ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] 1522 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1523 ; X86-NEXT: retl # encoding: [0xc3] 1524 ; 1525 ; X64-LABEL: test_int_x86_avx512_mask_psllv32hi: 1526 ; X64: # %bb.0: 1527 ; X64-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0xd9] 1528 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1529 ; X64-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1] 1530 ; X64-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1] 1531 ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] 1532 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1533 ; X64-NEXT: retq # encoding: [0xc3] 1534 %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 1535 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 1536 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 1537 %res3 = add <32 x i16> %res, %res1 1538 %res4 = add <32 x i16> %res3, %res2 1539 ret <32 x i16> %res4 1540 } 1541 1542 declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>) 1543 1544 define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1545 ; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_512: 1546 ; X86: # %bb.0: 1547 ; X86-NEXT: vpermw %zmm0, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xd8] 1548 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1549 ; X86-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0] 1550 ; X86-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0] 1551 ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] 1552 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1553 ; X86-NEXT: retl # encoding: [0xc3] 1554 ; 1555 ; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_512: 1556 ; X64: # %bb.0: 1557 ; X64-NEXT: vpermw %zmm0, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xd8] 1558 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1559 ; X64-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0] 1560 ; X64-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0] 1561 ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] 1562 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1563 ; X64-NEXT: retq # encoding: [0xc3] 1564 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1) 1565 %2 = bitcast i32 %x3 to <32 x i1> 1566 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 1567 %4 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1) 1568 %5 = bitcast i32 %x3 to <32 x i1> 1569 %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer 1570 %7 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1) 1571 %res3 = add <32 x i16> %3, %6 1572 %res4 = add <32 x i16> %res3, %7 1573 ret <32 x i16> %res4 1574 } 1575 1576 define <32 x i16> @test_x86_avx512_psll_w_512(<32 x i16> %a0, <8 x i16> %a1) { 1577 ; CHECK-LABEL: test_x86_avx512_psll_w_512: 1578 ; CHECK: # %bb.0: 1579 ; CHECK-NEXT: vpsllw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf1,0xc1] 1580 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1581 %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] 1582 ret <32 x i16> %res 1583 } 1584 define <32 x i16> @test_x86_avx512_mask_psll_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) { 1585 ; X86-LABEL: test_x86_avx512_mask_psll_w_512: 1586 ; X86: # %bb.0: 1587 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1588 ; X86-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1] 1589 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1590 ; X86-NEXT: retl # encoding: [0xc3] 1591 ; 1592 ; X64-LABEL: test_x86_avx512_mask_psll_w_512: 1593 ; X64: # %bb.0: 1594 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1595 ; X64-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1] 1596 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1597 ; X64-NEXT: retq # encoding: [0xc3] 1598 %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] 1599 %mask.cast = bitcast i32 %mask to <32 x i1> 1600 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru 1601 ret <32 x i16> %res2 1602 } 1603 define <32 x i16> @test_x86_avx512_maskz_psll_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) { 1604 ; X86-LABEL: test_x86_avx512_maskz_psll_w_512: 1605 ; X86: # %bb.0: 1606 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1607 ; X86-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1] 1608 ; X86-NEXT: retl # encoding: [0xc3] 1609 ; 1610 ; X64-LABEL: test_x86_avx512_maskz_psll_w_512: 1611 ; X64: # %bb.0: 1612 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1613 ; X64-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1] 1614 ; X64-NEXT: retq # encoding: [0xc3] 1615 %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] 1616 %mask.cast = bitcast i32 %mask to <32 x i1> 1617 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer 1618 ret <32 x i16> %res2 1619 } 1620 declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) nounwind readnone 1621 1622 1623 define <32 x i16> @test_x86_avx512_pslli_w_512(<32 x i16> %a0) { 1624 ; CHECK-LABEL: test_x86_avx512_pslli_w_512: 1625 ; CHECK: # %bb.0: 1626 ; CHECK-NEXT: vpsllw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x07] 1627 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1628 %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] 1629 ret <32 x i16> %res 1630 } 1631 define <32 x i16> @test_x86_avx512_mask_pslli_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { 1632 ; X86-LABEL: test_x86_avx512_mask_pslli_w_512: 1633 ; X86: # %bb.0: 1634 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1635 ; X86-NEXT: vpsllw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x07] 1636 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1637 ; X86-NEXT: retl # encoding: [0xc3] 1638 ; 1639 ; X64-LABEL: test_x86_avx512_mask_pslli_w_512: 1640 ; X64: # %bb.0: 1641 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1642 ; X64-NEXT: vpsllw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x07] 1643 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1644 ; X64-NEXT: retq # encoding: [0xc3] 1645 %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] 1646 %mask.cast = bitcast i32 %mask to <32 x i1> 1647 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru 1648 ret <32 x i16> %res2 1649 } 1650 define <32 x i16> @test_x86_avx512_maskz_pslli_w_512(<32 x i16> %a0, i32 %mask) { 1651 ; X86-LABEL: test_x86_avx512_maskz_pslli_w_512: 1652 ; X86: # %bb.0: 1653 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1654 ; X86-NEXT: vpsllw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x07] 1655 ; X86-NEXT: retl # encoding: [0xc3] 1656 ; 1657 ; X64-LABEL: test_x86_avx512_maskz_pslli_w_512: 1658 ; X64: # %bb.0: 1659 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1660 ; X64-NEXT: vpsllw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x07] 1661 ; X64-NEXT: retq # encoding: [0xc3] 1662 %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] 1663 %mask.cast = bitcast i32 %mask to <32 x i1> 1664 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer 1665 ret <32 x i16> %res2 1666 } 1667 declare <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16>, i32) nounwind readnone 1668 1669 1670 define <32 x i16> @test_x86_avx512_psra_w_512(<32 x i16> %a0, <8 x i16> %a1) { 1671 ; CHECK-LABEL: test_x86_avx512_psra_w_512: 1672 ; CHECK: # %bb.0: 1673 ; CHECK-NEXT: vpsraw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe1,0xc1] 1674 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1675 %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] 1676 ret <32 x i16> %res 1677 } 1678 define <32 x i16> @test_x86_avx512_mask_psra_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) { 1679 ; X86-LABEL: test_x86_avx512_mask_psra_w_512: 1680 ; X86: # %bb.0: 1681 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1682 ; X86-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1] 1683 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1684 ; X86-NEXT: retl # encoding: [0xc3] 1685 ; 1686 ; X64-LABEL: test_x86_avx512_mask_psra_w_512: 1687 ; X64: # %bb.0: 1688 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1689 ; X64-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1] 1690 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1691 ; X64-NEXT: retq # encoding: [0xc3] 1692 %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] 1693 %mask.cast = bitcast i32 %mask to <32 x i1> 1694 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru 1695 ret <32 x i16> %res2 1696 } 1697 define <32 x i16> @test_x86_avx512_maskz_psra_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) { 1698 ; X86-LABEL: test_x86_avx512_maskz_psra_w_512: 1699 ; X86: # %bb.0: 1700 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1701 ; X86-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1] 1702 ; X86-NEXT: retl # encoding: [0xc3] 1703 ; 1704 ; X64-LABEL: test_x86_avx512_maskz_psra_w_512: 1705 ; X64: # %bb.0: 1706 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1707 ; X64-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1] 1708 ; X64-NEXT: retq # encoding: [0xc3] 1709 %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] 1710 %mask.cast = bitcast i32 %mask to <32 x i1> 1711 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer 1712 ret <32 x i16> %res2 1713 } 1714 declare <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16>, <8 x i16>) nounwind readnone 1715 1716 1717 define <32 x i16> @test_x86_avx512_psrai_w_512(<32 x i16> %a0) { 1718 ; CHECK-LABEL: test_x86_avx512_psrai_w_512: 1719 ; CHECK: # %bb.0: 1720 ; CHECK-NEXT: vpsraw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xe0,0x07] 1721 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1722 %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] 1723 ret <32 x i16> %res 1724 } 1725 define <32 x i16> @test_x86_avx512_mask_psrai_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { 1726 ; X86-LABEL: test_x86_avx512_mask_psrai_w_512: 1727 ; X86: # %bb.0: 1728 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1729 ; X86-NEXT: vpsraw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x07] 1730 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1731 ; X86-NEXT: retl # encoding: [0xc3] 1732 ; 1733 ; X64-LABEL: test_x86_avx512_mask_psrai_w_512: 1734 ; X64: # %bb.0: 1735 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1736 ; X64-NEXT: vpsraw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x07] 1737 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1738 ; X64-NEXT: retq # encoding: [0xc3] 1739 %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] 1740 %mask.cast = bitcast i32 %mask to <32 x i1> 1741 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru 1742 ret <32 x i16> %res2 1743 } 1744 define <32 x i16> @test_x86_avx512_maskz_psrai_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { 1745 ; X86-LABEL: test_x86_avx512_maskz_psrai_w_512: 1746 ; X86: # %bb.0: 1747 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1748 ; X86-NEXT: vpsraw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x07] 1749 ; X86-NEXT: retl # encoding: [0xc3] 1750 ; 1751 ; X64-LABEL: test_x86_avx512_maskz_psrai_w_512: 1752 ; X64: # %bb.0: 1753 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1754 ; X64-NEXT: vpsraw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x07] 1755 ; X64-NEXT: retq # encoding: [0xc3] 1756 %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] 1757 %mask.cast = bitcast i32 %mask to <32 x i1> 1758 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer 1759 ret <32 x i16> %res2 1760 } 1761 declare <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16>, i32) nounwind readnone 1762 1763 1764 define <32 x i16> @test_x86_avx512_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1) { 1765 ; CHECK-LABEL: test_x86_avx512_psrl_w_512: 1766 ; CHECK: # %bb.0: 1767 ; CHECK-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0xc1] 1768 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1769 %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] 1770 ret <32 x i16> %res 1771 } 1772 define <32 x i16> @test_x86_avx512_mask_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) { 1773 ; X86-LABEL: test_x86_avx512_mask_psrl_w_512: 1774 ; X86: # %bb.0: 1775 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1776 ; X86-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1] 1777 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1778 ; X86-NEXT: retl # encoding: [0xc3] 1779 ; 1780 ; X64-LABEL: test_x86_avx512_mask_psrl_w_512: 1781 ; X64: # %bb.0: 1782 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1783 ; X64-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1] 1784 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1785 ; X64-NEXT: retq # encoding: [0xc3] 1786 %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] 1787 %mask.cast = bitcast i32 %mask to <32 x i1> 1788 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru 1789 ret <32 x i16> %res2 1790 } 1791 define <32 x i16> @test_x86_avx512_maskz_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) { 1792 ; X86-LABEL: test_x86_avx512_maskz_psrl_w_512: 1793 ; X86: # %bb.0: 1794 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1795 ; X86-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1] 1796 ; X86-NEXT: retl # encoding: [0xc3] 1797 ; 1798 ; X64-LABEL: test_x86_avx512_maskz_psrl_w_512: 1799 ; X64: # %bb.0: 1800 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1801 ; X64-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1] 1802 ; X64-NEXT: retq # encoding: [0xc3] 1803 %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] 1804 %mask.cast = bitcast i32 %mask to <32 x i1> 1805 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer 1806 ret <32 x i16> %res2 1807 } 1808 declare <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16>, <8 x i16>) nounwind readnone 1809 1810 1811 define <32 x i16> @test_x86_avx512_psrli_w_512(<32 x i16> %a0) { 1812 ; CHECK-LABEL: test_x86_avx512_psrli_w_512: 1813 ; CHECK: # %bb.0: 1814 ; CHECK-NEXT: vpsrlw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xd0,0x07] 1815 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1816 %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] 1817 ret <32 x i16> %res 1818 } 1819 define <32 x i16> @test_x86_avx512_mask_psrli_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { 1820 ; X86-LABEL: test_x86_avx512_mask_psrli_w_512: 1821 ; X86: # %bb.0: 1822 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1823 ; X86-NEXT: vpsrlw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x07] 1824 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1825 ; X86-NEXT: retl # encoding: [0xc3] 1826 ; 1827 ; X64-LABEL: test_x86_avx512_mask_psrli_w_512: 1828 ; X64: # %bb.0: 1829 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1830 ; X64-NEXT: vpsrlw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x07] 1831 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1832 ; X64-NEXT: retq # encoding: [0xc3] 1833 %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] 1834 %mask.cast = bitcast i32 %mask to <32 x i1> 1835 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru 1836 ret <32 x i16> %res2 1837 } 1838 define <32 x i16> @test_x86_avx512_maskz_psrli_w_512(<32 x i16> %a0, i32 %mask) { 1839 ; X86-LABEL: test_x86_avx512_maskz_psrli_w_512: 1840 ; X86: # %bb.0: 1841 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1842 ; X86-NEXT: vpsrlw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x07] 1843 ; X86-NEXT: retl # encoding: [0xc3] 1844 ; 1845 ; X64-LABEL: test_x86_avx512_maskz_psrli_w_512: 1846 ; X64: # %bb.0: 1847 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1848 ; X64-NEXT: vpsrlw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x07] 1849 ; X64-NEXT: retq # encoding: [0xc3] 1850 %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] 1851 %mask.cast = bitcast i32 %mask to <32 x i1> 1852 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer 1853 ret <32 x i16> %res2 1854 } 1855 declare <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16>, i32) nounwind readnone 1856