1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5 declare <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8, <16 x i8>, i16) 6 7 define <16 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> %x1, i16 %mask) { 8 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128: 9 ; X86: # %bb.0: 10 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] 11 ; X86-NEXT: vpbroadcastb %eax, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc8] 12 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 13 ; X86-NEXT: vpbroadcastb %eax, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc0] 14 ; X86-NEXT: vpbroadcastb %eax, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xd0] 15 ; X86-NEXT: vpaddb %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] 16 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] 17 ; X86-NEXT: retl # encoding: [0xc3] 18 ; 19 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128: 20 ; X64: # %bb.0: 21 ; X64-NEXT: vpbroadcastb %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xcf] 22 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 23 ; X64-NEXT: vpbroadcastb %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7] 24 ; X64-NEXT: vpbroadcastb %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xd7] 25 ; X64-NEXT: vpaddb %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] 26 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] 27 ; X64-NEXT: retq # encoding: [0xc3] 28 %res = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1) 29 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 %mask) 30 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> zeroinitializer, i16 %mask) 31 %res3 = add <16 x i8> %res, %res1 32 %res4 = add <16 x i8> %res2, %res3 33 ret <16 x i8> %res4 34 } 35 36 37 declare <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16, <8 x i16>, i8) 38 39 define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16> %x1, i8 %mask) { 40 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128: 41 ; X86: # %bb.0: 42 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 43 ; X86-NEXT: vpbroadcastw %eax, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc8] 44 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 45 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 46 ; X86-NEXT: vpbroadcastw %eax, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc0] 47 ; X86-NEXT: vpbroadcastw %eax, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xd0] 48 ; X86-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] 49 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 50 ; X86-NEXT: retl # encoding: [0xc3] 51 ; 52 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128: 53 ; X64: # %bb.0: 54 ; X64-NEXT: vpbroadcastw %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xcf] 55 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 56 ; X64-NEXT: vpbroadcastw %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7] 57 ; X64-NEXT: vpbroadcastw %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xd7] 58 ; X64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] 59 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 60 ; X64-NEXT: retq # encoding: [0xc3] 61 %res = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1) 62 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 %mask) 63 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> zeroinitializer, i8 %mask) 64 %res3 = add <8 x i16> %res, %res1 65 %res4 = add <8 x i16> %res2, %res3 66 ret <8 x i16> %res4 67 } 68 69 70 declare <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8, <32 x i8>, i32) 71 72 define <32 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8> %x1, i32 %mask) { 73 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256: 74 ; X86: # %bb.0: 75 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] 76 ; X86-NEXT: vpbroadcastb %eax, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xc8] 77 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 78 ; X86-NEXT: vpbroadcastb %eax, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc0] 79 ; X86-NEXT: vpbroadcastb %eax, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xd0] 80 ; X86-NEXT: vpaddb %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc2] 81 ; X86-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] 82 ; X86-NEXT: retl # encoding: [0xc3] 83 ; 84 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256: 85 ; X64: # %bb.0: 86 ; X64-NEXT: vpbroadcastb %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xcf] 87 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 88 ; X64-NEXT: vpbroadcastb %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7] 89 ; X64-NEXT: vpbroadcastb %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xd7] 90 ; X64-NEXT: vpaddb %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc2] 91 ; X64-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] 92 ; X64-NEXT: retq # encoding: [0xc3] 93 %res = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1) 94 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 %mask) 95 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> zeroinitializer, i32 %mask) 96 %res3 = add <32 x i8> %res, %res1 97 %res4 = add <32 x i8> %res2, %res3 98 ret <32 x i8> %res4 99 } 100 101 102 103 declare <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16, <16 x i16>, i16) 104 105 define <16 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i16> %x1, i16 %mask) { 106 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256: 107 ; X86: # %bb.0: 108 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 109 ; X86-NEXT: vpbroadcastw %eax, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xc8] 110 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 111 ; X86-NEXT: vpbroadcastw %eax, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc0] 112 ; X86-NEXT: vpbroadcastw %eax, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xd0] 113 ; X86-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] 114 ; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 115 ; X86-NEXT: retl # encoding: [0xc3] 116 ; 117 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256: 118 ; X64: # %bb.0: 119 ; X64-NEXT: vpbroadcastw %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xcf] 120 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 121 ; X64-NEXT: vpbroadcastw %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7] 122 ; X64-NEXT: vpbroadcastw %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xd7] 123 ; X64-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] 124 ; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 125 ; X64-NEXT: retq # encoding: [0xc3] 126 %res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 -1) 127 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 %mask) 128 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> zeroinitializer, i16 %mask) 129 %res3 = add <16 x i16> %res, %res1 130 %res4 = add <16 x i16> %res2, %res3 131 ret <16 x i16> %res4 132 } 133 134 declare <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8>, <32 x i8>, i32) 135 136 define <32 x i8>@test_int_x86_avx512_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) { 137 ; X86-LABEL: test_int_x86_avx512_pbroadcastb_256: 138 ; X86: # %bb.0: 139 ; X86-NEXT: vpbroadcastb %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0xd0] 140 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 141 ; X86-NEXT: vpbroadcastb %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x78,0xc8] 142 ; X86-NEXT: vpbroadcastb %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x78,0xc0] 143 ; X86-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] 144 ; X86-NEXT: vpaddb %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0] 145 ; X86-NEXT: retl # encoding: [0xc3] 146 ; 147 ; X64-LABEL: test_int_x86_avx512_pbroadcastb_256: 148 ; X64: # %bb.0: 149 ; X64-NEXT: vpbroadcastb %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0xd0] 150 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 151 ; X64-NEXT: vpbroadcastb %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x78,0xc8] 152 ; X64-NEXT: vpbroadcastb %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x78,0xc0] 153 ; X64-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] 154 ; X64-NEXT: vpaddb %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0] 155 ; X64-NEXT: retq # encoding: [0xc3] 156 %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 -1) 157 %res1 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) 158 %res2 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> zeroinitializer, i32 %mask) 159 %res3 = add <32 x i8> %res, %res1 160 %res4 = add <32 x i8> %res2, %res3 161 ret <32 x i8> %res4 162 } 163 164 declare <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8>, <16 x i8>, i16) 165 166 define <16 x i8>@test_int_x86_avx512_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) { 167 ; X86-LABEL: test_int_x86_avx512_pbroadcastb_128: 168 ; X86: # %bb.0: 169 ; X86-NEXT: vpbroadcastb %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0xd0] 170 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 171 ; X86-NEXT: vpbroadcastb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x78,0xc8] 172 ; X86-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x78,0xc0] 173 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] 174 ; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 175 ; X86-NEXT: retl # encoding: [0xc3] 176 ; 177 ; X64-LABEL: test_int_x86_avx512_pbroadcastb_128: 178 ; X64: # %bb.0: 179 ; X64-NEXT: vpbroadcastb %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0xd0] 180 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 181 ; X64-NEXT: vpbroadcastb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x78,0xc8] 182 ; X64-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x78,0xc0] 183 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] 184 ; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 185 ; X64-NEXT: retq # encoding: [0xc3] 186 %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1) 187 %res1 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) 188 %res2 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> zeroinitializer, i16 %mask) 189 %res3 = add <16 x i8> %res, %res1 190 %res4 = add <16 x i8> %res2, %res3 191 ret <16 x i8> %res4 192 } 193 194 declare <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16>, <16 x i16>, i16) 195 196 define <16 x i16>@test_int_x86_avx512_pbroadcastw_256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) { 197 ; X86-LABEL: test_int_x86_avx512_pbroadcastw_256: 198 ; X86: # %bb.0: 199 ; X86-NEXT: vpbroadcastw %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0xd0] 200 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 201 ; X86-NEXT: vpbroadcastw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x79,0xc8] 202 ; X86-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x79,0xc0] 203 ; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 204 ; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 205 ; X86-NEXT: retl # encoding: [0xc3] 206 ; 207 ; X64-LABEL: test_int_x86_avx512_pbroadcastw_256: 208 ; X64: # %bb.0: 209 ; X64-NEXT: vpbroadcastw %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0xd0] 210 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 211 ; X64-NEXT: vpbroadcastw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x79,0xc8] 212 ; X64-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x79,0xc0] 213 ; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 214 ; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 215 ; X64-NEXT: retq # encoding: [0xc3] 216 %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 -1) 217 %res1 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) 218 %res2 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> zeroinitializer, i16 %mask) 219 %res3 = add <16 x i16> %res, %res1 220 %res4 = add <16 x i16> %res2, %res3 221 ret <16 x i16> %res4 222 } 223 224 declare <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16>, <8 x i16>, i8) 225 226 define <8 x i16>@test_int_x86_avx512_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) { 227 ; X86-LABEL: test_int_x86_avx512_pbroadcastw_128: 228 ; X86: # %bb.0: 229 ; X86-NEXT: vpbroadcastw %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0xd0] 230 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 231 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 232 ; X86-NEXT: vpbroadcastw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x79,0xc8] 233 ; X86-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x79,0xc0] 234 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 235 ; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 236 ; X86-NEXT: retl # encoding: [0xc3] 237 ; 238 ; X64-LABEL: test_int_x86_avx512_pbroadcastw_128: 239 ; X64: # %bb.0: 240 ; X64-NEXT: vpbroadcastw %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0xd0] 241 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 242 ; X64-NEXT: vpbroadcastw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x79,0xc8] 243 ; X64-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x79,0xc0] 244 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 245 ; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 246 ; X64-NEXT: retq # encoding: [0xc3] 247 %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1) 248 %res1 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) 249 %res2 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> zeroinitializer, i8 %mask) 250 %res3 = add <8 x i16> %res, %res1 251 %res4 = add <8 x i16> %res2, %res3 252 ret <8 x i16> %res4 253 } 254 255 declare <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8>, <64 x i8>, i64) 256 257 define <64 x i8>@test_int_x86_avx512_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask) { 258 ; X86-LABEL: test_int_x86_avx512_pbroadcastb_512: 259 ; X86: # %bb.0: 260 ; X86-NEXT: vpbroadcastb %xmm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0xd0] 261 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 262 ; X86-NEXT: vpbroadcastb %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8] 263 ; X86-NEXT: vpbroadcastb %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xc0] 264 ; X86-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0] 265 ; X86-NEXT: vpaddb %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0] 266 ; X86-NEXT: retl # encoding: [0xc3] 267 ; 268 ; X64-LABEL: test_int_x86_avx512_pbroadcastb_512: 269 ; X64: # %bb.0: 270 ; X64-NEXT: vpbroadcastb %xmm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0xd0] 271 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 272 ; X64-NEXT: vpbroadcastb %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8] 273 ; X64-NEXT: vpbroadcastb %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xc0] 274 ; X64-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0] 275 ; X64-NEXT: vpaddb %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0] 276 ; X64-NEXT: retq # encoding: [0xc3] 277 %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 -1) 278 %res1 = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask) 279 %res2 = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> zeroinitializer, i64 %mask) 280 %res3 = add <64 x i8> %res, %res1 281 %res4 = add <64 x i8> %res2, %res3 282 ret <64 x i8> %res4 283 } 284 285 declare <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16>, <32 x i16>, i32) 286 287 define <32 x i16>@test_int_x86_avx512_pbroadcastw_512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask) { 288 ; X86-LABEL: test_int_x86_avx512_pbroadcastw_512: 289 ; X86: # %bb.0: 290 ; X86-NEXT: vpbroadcastw %xmm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x79,0xd0] 291 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 292 ; X86-NEXT: vpbroadcastw %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8] 293 ; X86-NEXT: vpbroadcastw %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xc0] 294 ; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 295 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 296 ; X86-NEXT: retl # encoding: [0xc3] 297 ; 298 ; X64-LABEL: test_int_x86_avx512_pbroadcastw_512: 299 ; X64: # %bb.0: 300 ; X64-NEXT: vpbroadcastw %xmm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x79,0xd0] 301 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 302 ; X64-NEXT: vpbroadcastw %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8] 303 ; X64-NEXT: vpbroadcastw %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xc0] 304 ; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 305 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 306 ; X64-NEXT: retq # encoding: [0xc3] 307 %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 -1) 308 %res1 = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask) 309 %res2 = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> zeroinitializer, i32 %mask) 310 %res3 = add <32 x i16> %res, %res1 311 %res4 = add <32 x i16> %res2, %res3 312 ret <32 x i16> %res4 313 } 314 315 declare void @llvm.x86.avx512.mask.storeu.b.128(i8*, <16 x i8>, i16) 316 317 define void@test_int_x86_avx512_mask_storeu_b_128(i8* %ptr1, i8* %ptr2, <16 x i8> %x1, i16 %x2) { 318 ; X86-LABEL: test_int_x86_avx512_mask_storeu_b_128: 319 ; X86: # %bb.0: 320 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 321 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 322 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] 323 ; X86-NEXT: vmovdqu8 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7f,0x01] 324 ; X86-NEXT: vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00] 325 ; X86-NEXT: retl # encoding: [0xc3] 326 ; 327 ; X64-LABEL: test_int_x86_avx512_mask_storeu_b_128: 328 ; X64: # %bb.0: 329 ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 330 ; X64-NEXT: vmovdqu8 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7f,0x07] 331 ; X64-NEXT: vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06] 332 ; X64-NEXT: retq # encoding: [0xc3] 333 call void @llvm.x86.avx512.mask.storeu.b.128(i8* %ptr1, <16 x i8> %x1, i16 %x2) 334 call void @llvm.x86.avx512.mask.storeu.b.128(i8* %ptr2, <16 x i8> %x1, i16 -1) 335 ret void 336 } 337 338 declare void @llvm.x86.avx512.mask.storeu.b.256(i8*, <32 x i8>, i32) 339 340 define void@test_int_x86_avx512_mask_storeu_b_256(i8* %ptr1, i8* %ptr2, <32 x i8> %x1, i32 %x2) { 341 ; X86-LABEL: test_int_x86_avx512_mask_storeu_b_256: 342 ; X86: # %bb.0: 343 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 344 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 345 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c] 346 ; X86-NEXT: vmovdqu8 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x01] 347 ; X86-NEXT: vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00] 348 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 349 ; X86-NEXT: retl # encoding: [0xc3] 350 ; 351 ; X64-LABEL: test_int_x86_avx512_mask_storeu_b_256: 352 ; X64: # %bb.0: 353 ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 354 ; X64-NEXT: vmovdqu8 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x07] 355 ; X64-NEXT: vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06] 356 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 357 ; X64-NEXT: retq # encoding: [0xc3] 358 call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr1, <32 x i8> %x1, i32 %x2) 359 call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr2, <32 x i8> %x1, i32 -1) 360 ret void 361 } 362 363 declare void @llvm.x86.avx512.mask.storeu.w.128(i8*, <8 x i16>, i8) 364 365 define void@test_int_x86_avx512_mask_storeu_w_128(i8* %ptr1, i8* %ptr2, <8 x i16> %x1, i8 %x2) { 366 ; X86-LABEL: test_int_x86_avx512_mask_storeu_w_128: 367 ; X86: # %bb.0: 368 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 369 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 370 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 371 ; X86-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 372 ; X86-NEXT: vmovdqu16 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7f,0x01] 373 ; X86-NEXT: vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00] 374 ; X86-NEXT: retl # encoding: [0xc3] 375 ; 376 ; X64-LABEL: test_int_x86_avx512_mask_storeu_w_128: 377 ; X64: # %bb.0: 378 ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 379 ; X64-NEXT: vmovdqu16 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7f,0x07] 380 ; X64-NEXT: vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06] 381 ; X64-NEXT: retq # encoding: [0xc3] 382 call void @llvm.x86.avx512.mask.storeu.w.128(i8* %ptr1, <8 x i16> %x1, i8 %x2) 383 call void @llvm.x86.avx512.mask.storeu.w.128(i8* %ptr2, <8 x i16> %x1, i8 -1) 384 ret void 385 } 386 387 declare void @llvm.x86.avx512.mask.storeu.w.256(i8*, <16 x i16>, i16) 388 389 define void@test_int_x86_avx512_mask_storeu_w_256(i8* %ptr1, i8* %ptr2, <16 x i16> %x1, i16 %x2) { 390 ; X86-LABEL: test_int_x86_avx512_mask_storeu_w_256: 391 ; X86: # %bb.0: 392 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 393 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 394 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] 395 ; X86-NEXT: vmovdqu16 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7f,0x01] 396 ; X86-NEXT: vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00] 397 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 398 ; X86-NEXT: retl # encoding: [0xc3] 399 ; 400 ; X64-LABEL: test_int_x86_avx512_mask_storeu_w_256: 401 ; X64: # %bb.0: 402 ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 403 ; X64-NEXT: vmovdqu16 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7f,0x07] 404 ; X64-NEXT: vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06] 405 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 406 ; X64-NEXT: retq # encoding: [0xc3] 407 call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr1, <16 x i16> %x1, i16 %x2) 408 call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr2, <16 x i16> %x1, i16 -1) 409 ret void 410 } 411 412 declare <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8*, <8 x i16>, i8) 413 414 define <8 x i16>@test_int_x86_avx512_mask_loadu_w_128(i8* %ptr, i8* %ptr2, <8 x i16> %x1, i8 %mask) { 415 ; X86-LABEL: test_int_x86_avx512_mask_loadu_w_128: 416 ; X86: # %bb.0: 417 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 418 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 419 ; X86-NEXT: vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01] 420 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 421 ; X86-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 422 ; X86-NEXT: vmovdqu16 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0x00] 423 ; X86-NEXT: vmovdqu16 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x09] 424 ; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] 425 ; X86-NEXT: retl # encoding: [0xc3] 426 ; 427 ; X64-LABEL: test_int_x86_avx512_mask_loadu_w_128: 428 ; X64: # %bb.0: 429 ; X64-NEXT: vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] 430 ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 431 ; X64-NEXT: vmovdqu16 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0x06] 432 ; X64-NEXT: vmovdqu16 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x0f] 433 ; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] 434 ; X64-NEXT: retq # encoding: [0xc3] 435 %res0 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr, <8 x i16> %x1, i8 -1) 436 %res = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr2, <8 x i16> %res0, i8 %mask) 437 %res1 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr, <8 x i16> zeroinitializer, i8 %mask) 438 %res2 = add <8 x i16> %res, %res1 439 ret <8 x i16> %res2 440 } 441 442 declare <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8*, <16 x i16>, i16) 443 444 define <16 x i16>@test_int_x86_avx512_mask_loadu_w_256(i8* %ptr, i8* %ptr2, <16 x i16> %x1, i16 %mask) { 445 ; X86-LABEL: test_int_x86_avx512_mask_loadu_w_256: 446 ; X86: # %bb.0: 447 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 448 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 449 ; X86-NEXT: vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01] 450 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] 451 ; X86-NEXT: vmovdqu16 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x6f,0x00] 452 ; X86-NEXT: vmovdqu16 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x09] 453 ; X86-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1] 454 ; X86-NEXT: retl # encoding: [0xc3] 455 ; 456 ; X64-LABEL: test_int_x86_avx512_mask_loadu_w_256: 457 ; X64: # %bb.0: 458 ; X64-NEXT: vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] 459 ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 460 ; X64-NEXT: vmovdqu16 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x6f,0x06] 461 ; X64-NEXT: vmovdqu16 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x0f] 462 ; X64-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1] 463 ; X64-NEXT: retq # encoding: [0xc3] 464 %res0 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr, <16 x i16> %x1, i16 -1) 465 %res = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr2, <16 x i16> %res0, i16 %mask) 466 %res1 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr, <16 x i16> zeroinitializer, i16 %mask) 467 %res2 = add <16 x i16> %res, %res1 468 ret <16 x i16> %res2 469 } 470 471 declare <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8*, <16 x i8>, i16) 472 473 define <16 x i8>@test_int_x86_avx512_mask_loadu_b_128(i8* %ptr, i8* %ptr2, <16 x i8> %x1, i16 %mask) { 474 ; X86-LABEL: test_int_x86_avx512_mask_loadu_b_128: 475 ; X86: # %bb.0: 476 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 477 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 478 ; X86-NEXT: vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01] 479 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] 480 ; X86-NEXT: vmovdqu8 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x6f,0x00] 481 ; X86-NEXT: vmovdqu8 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x09] 482 ; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] 483 ; X86-NEXT: retl # encoding: [0xc3] 484 ; 485 ; X64-LABEL: test_int_x86_avx512_mask_loadu_b_128: 486 ; X64: # %bb.0: 487 ; X64-NEXT: vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] 488 ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 489 ; X64-NEXT: vmovdqu8 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x6f,0x06] 490 ; X64-NEXT: vmovdqu8 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x0f] 491 ; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] 492 ; X64-NEXT: retq # encoding: [0xc3] 493 %res0 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr, <16 x i8> %x1, i16 -1) 494 %res = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr2, <16 x i8> %res0, i16 %mask) 495 %res1 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr, <16 x i8> zeroinitializer, i16 %mask) 496 %res2 = add <16 x i8> %res, %res1 497 ret <16 x i8> %res2 498 } 499 500 declare <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8*, <32 x i8>, i32) 501 502 define <32 x i8>@test_int_x86_avx512_mask_loadu_b_256(i8* %ptr, i8* %ptr2, <32 x i8> %x1, i32 %mask) { 503 ; X86-LABEL: test_int_x86_avx512_mask_loadu_b_256: 504 ; X86: # %bb.0: 505 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 506 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 507 ; X86-NEXT: vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01] 508 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c] 509 ; X86-NEXT: vmovdqu8 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x6f,0x00] 510 ; X86-NEXT: vmovdqu8 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x09] 511 ; X86-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc1] 512 ; X86-NEXT: retl # encoding: [0xc3] 513 ; 514 ; X64-LABEL: test_int_x86_avx512_mask_loadu_b_256: 515 ; X64: # %bb.0: 516 ; X64-NEXT: vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] 517 ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 518 ; X64-NEXT: vmovdqu8 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x6f,0x06] 519 ; X64-NEXT: vmovdqu8 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x0f] 520 ; X64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc1] 521 ; X64-NEXT: retq # encoding: [0xc3] 522 %res0 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr, <32 x i8> %x1, i32 -1) 523 %res = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr2, <32 x i8> %res0, i32 %mask) 524 %res1 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr, <32 x i8> zeroinitializer, i32 %mask) 525 %res2 = add <32 x i8> %res, %res1 526 ret <32 x i8> %res2 527 } 528 529 declare <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8>, <16 x i8>, i32, <16 x i8>, i16) 530 531 define <16 x i8>@test_int_x86_avx512_mask_palignr_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x3, i16 %x4) { 532 ; X86-LABEL: test_int_x86_avx512_mask_palignr_128: 533 ; X86: # %bb.0: 534 ; X86-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x02] 535 ; X86-NEXT: # xmm3 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 536 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 537 ; X86-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x0f,0xd1,0x02] 538 ; X86-NEXT: # xmm2 {%k1} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 539 ; X86-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x0f,0xc1,0x02] 540 ; X86-NEXT: # xmm0 {%k1} {z} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 541 ; X86-NEXT: vpaddb %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3] 542 ; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 543 ; X86-NEXT: retl # encoding: [0xc3] 544 ; 545 ; X64-LABEL: test_int_x86_avx512_mask_palignr_128: 546 ; X64: # %bb.0: 547 ; X64-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x02] 548 ; X64-NEXT: # xmm3 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 549 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 550 ; X64-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x0f,0xd1,0x02] 551 ; X64-NEXT: # xmm2 {%k1} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 552 ; X64-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x0f,0xc1,0x02] 553 ; X64-NEXT: # xmm0 {%k1} {z} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 554 ; X64-NEXT: vpaddb %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3] 555 ; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 556 ; X64-NEXT: retq # encoding: [0xc3] 557 %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 %x4) 558 %res1 = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> zeroinitializer, i16 %x4) 559 %res2 = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 -1) 560 %res3 = add <16 x i8> %res, %res1 561 %res4 = add <16 x i8> %res3, %res2 562 ret <16 x i8> %res4 563 } 564 565 declare <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8>, <32 x i8>, i32, <32 x i8>, i32) 566 567 define <32 x i8>@test_int_x86_avx512_mask_palignr_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x3, i32 %x4) { 568 ; X86-LABEL: test_int_x86_avx512_mask_palignr_256: 569 ; X86: # %bb.0: 570 ; X86-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x0f,0xd9,0x02] 571 ; X86-NEXT: # ymm3 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] 572 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 573 ; X86-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x0f,0xd1,0x02] 574 ; X86-NEXT: # ymm2 {%k1} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] 575 ; X86-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x0f,0xc1,0x02] 576 ; X86-NEXT: # ymm0 {%k1} {z} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] 577 ; X86-NEXT: vpaddb %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc3] 578 ; X86-NEXT: vpaddb %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0] 579 ; X86-NEXT: retl # encoding: [0xc3] 580 ; 581 ; X64-LABEL: test_int_x86_avx512_mask_palignr_256: 582 ; X64: # %bb.0: 583 ; X64-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x0f,0xd9,0x02] 584 ; X64-NEXT: # ymm3 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] 585 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 586 ; X64-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x0f,0xd1,0x02] 587 ; X64-NEXT: # ymm2 {%k1} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] 588 ; X64-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x0f,0xc1,0x02] 589 ; X64-NEXT: # ymm0 {%k1} {z} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] 590 ; X64-NEXT: vpaddb %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc3] 591 ; X64-NEXT: vpaddb %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0] 592 ; X64-NEXT: retq # encoding: [0xc3] 593 %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 %x4) 594 %res1 = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> zeroinitializer, i32 %x4) 595 %res2 = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 -1) 596 %res3 = add <32 x i8> %res, %res1 597 %res4 = add <32 x i8> %res3, %res2 598 ret <32 x i8> %res4 599 } 600 601 declare <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16>, i32, <8 x i16>, i8) 602 603 define <8 x i16>@test_int_x86_avx512_mask_pshufh_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { 604 ; X86-LABEL: test_int_x86_avx512_mask_pshufh_w_128: 605 ; X86: # %bb.0: 606 ; X86-NEXT: vpshufhw $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xd0,0x03] 607 ; X86-NEXT: # xmm2 = xmm0[0,1,2,3,7,4,4,4] 608 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 609 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 610 ; X86-NEXT: vpshufhw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x70,0xc8,0x03] 611 ; X86-NEXT: # xmm1 {%k1} = xmm0[0,1,2,3,7,4,4,4] 612 ; X86-NEXT: vpshufhw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x70,0xc0,0x03] 613 ; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0,1,2,3,7,4,4,4] 614 ; X86-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] 615 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 616 ; X86-NEXT: retl # encoding: [0xc3] 617 ; 618 ; X64-LABEL: test_int_x86_avx512_mask_pshufh_w_128: 619 ; X64: # %bb.0: 620 ; X64-NEXT: vpshufhw $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xd0,0x03] 621 ; X64-NEXT: # xmm2 = xmm0[0,1,2,3,7,4,4,4] 622 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 623 ; X64-NEXT: vpshufhw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x70,0xc8,0x03] 624 ; X64-NEXT: # xmm1 {%k1} = xmm0[0,1,2,3,7,4,4,4] 625 ; X64-NEXT: vpshufhw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x70,0xc0,0x03] 626 ; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0,1,2,3,7,4,4,4] 627 ; X64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] 628 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 629 ; X64-NEXT: retq # encoding: [0xc3] 630 %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) 631 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3) 632 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1) 633 %res3 = add <8 x i16> %res, %res1 634 %res4 = add <8 x i16> %res3, %res2 635 ret <8 x i16> %res4 636 } 637 638 declare <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16>, i32, <16 x i16>, i16) 639 640 define <16 x i16>@test_int_x86_avx512_mask_pshufh_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { 641 ; X86-LABEL: test_int_x86_avx512_mask_pshufh_w_256: 642 ; X86: # %bb.0: 643 ; X86-NEXT: vpshufhw $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x70,0xd0,0x03] 644 ; X86-NEXT: # ymm2 = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] 645 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 646 ; X86-NEXT: vpshufhw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x70,0xc8,0x03] 647 ; X86-NEXT: # ymm1 {%k1} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] 648 ; X86-NEXT: vpshufhw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x70,0xc0,0x03] 649 ; X86-NEXT: # ymm0 {%k1} {z} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] 650 ; X86-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] 651 ; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 652 ; X86-NEXT: retl # encoding: [0xc3] 653 ; 654 ; X64-LABEL: test_int_x86_avx512_mask_pshufh_w_256: 655 ; X64: # %bb.0: 656 ; X64-NEXT: vpshufhw $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x70,0xd0,0x03] 657 ; X64-NEXT: # ymm2 = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] 658 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 659 ; X64-NEXT: vpshufhw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x70,0xc8,0x03] 660 ; X64-NEXT: # ymm1 {%k1} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] 661 ; X64-NEXT: vpshufhw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x70,0xc0,0x03] 662 ; X64-NEXT: # ymm0 {%k1} {z} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] 663 ; X64-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] 664 ; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 665 ; X64-NEXT: retq # encoding: [0xc3] 666 %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) 667 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3) 668 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1) 669 %res3 = add <16 x i16> %res, %res1 670 %res4 = add <16 x i16> %res3, %res2 671 ret <16 x i16> %res4 672 } 673 674 declare <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16>, i32, <8 x i16>, i8) 675 676 define <8 x i16>@test_int_x86_avx512_mask_pshufl_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { 677 ; X86-LABEL: test_int_x86_avx512_mask_pshufl_w_128: 678 ; X86: # %bb.0: 679 ; X86-NEXT: vpshuflw $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xd0,0x03] 680 ; X86-NEXT: # xmm2 = xmm0[3,0,0,0,4,5,6,7] 681 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 682 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 683 ; X86-NEXT: vpshuflw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x70,0xc8,0x03] 684 ; X86-NEXT: # xmm1 {%k1} = xmm0[3,0,0,0,4,5,6,7] 685 ; X86-NEXT: vpshuflw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x70,0xc0,0x03] 686 ; X86-NEXT: # xmm0 {%k1} {z} = xmm0[3,0,0,0,4,5,6,7] 687 ; X86-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] 688 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 689 ; X86-NEXT: retl # encoding: [0xc3] 690 ; 691 ; X64-LABEL: test_int_x86_avx512_mask_pshufl_w_128: 692 ; X64: # %bb.0: 693 ; X64-NEXT: vpshuflw $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xd0,0x03] 694 ; X64-NEXT: # xmm2 = xmm0[3,0,0,0,4,5,6,7] 695 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 696 ; X64-NEXT: vpshuflw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x70,0xc8,0x03] 697 ; X64-NEXT: # xmm1 {%k1} = xmm0[3,0,0,0,4,5,6,7] 698 ; X64-NEXT: vpshuflw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x70,0xc0,0x03] 699 ; X64-NEXT: # xmm0 {%k1} {z} = xmm0[3,0,0,0,4,5,6,7] 700 ; X64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] 701 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 702 ; X64-NEXT: retq # encoding: [0xc3] 703 %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) 704 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3) 705 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1) 706 %res3 = add <8 x i16> %res, %res1 707 %res4 = add <8 x i16> %res3, %res2 708 ret <8 x i16> %res4 709 } 710 711 declare <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16>, i32, <16 x i16>, i16) 712 713 define <16 x i16>@test_int_x86_avx512_mask_pshufl_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { 714 ; X86-LABEL: test_int_x86_avx512_mask_pshufl_w_256: 715 ; X86: # %bb.0: 716 ; X86-NEXT: vpshuflw $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xff,0x70,0xd0,0x03] 717 ; X86-NEXT: # ymm2 = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] 718 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 719 ; X86-NEXT: vpshuflw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x70,0xc8,0x03] 720 ; X86-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] 721 ; X86-NEXT: vpshuflw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x70,0xc0,0x03] 722 ; X86-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] 723 ; X86-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] 724 ; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 725 ; X86-NEXT: retl # encoding: [0xc3] 726 ; 727 ; X64-LABEL: test_int_x86_avx512_mask_pshufl_w_256: 728 ; X64: # %bb.0: 729 ; X64-NEXT: vpshuflw $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xff,0x70,0xd0,0x03] 730 ; X64-NEXT: # ymm2 = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] 731 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 732 ; X64-NEXT: vpshuflw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x70,0xc8,0x03] 733 ; X64-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] 734 ; X64-NEXT: vpshuflw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x70,0xc0,0x03] 735 ; X64-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] 736 ; X64-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] 737 ; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 738 ; X64-NEXT: retq # encoding: [0xc3] 739 %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) 740 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3) 741 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1) 742 %res3 = add <16 x i16> %res, %res1 743 %res4 = add <16 x i16> %res3, %res2 744 ret <16 x i16> %res4 745 } 746 747 define i32 @test_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b) { 748 ; CHECK-LABEL: test_pcmpeq_b_256: 749 ; CHECK: # %bb.0: 750 ; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 751 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 752 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 753 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 754 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1) 755 ret i32 %res 756 } 757 758 define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 759 ; X86-LABEL: test_mask_pcmpeq_b_256: 760 ; X86: # %bb.0: 761 ; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 762 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 763 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04] 764 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 765 ; X86-NEXT: retl # encoding: [0xc3] 766 ; 767 ; X64-LABEL: test_mask_pcmpeq_b_256: 768 ; X64: # %bb.0: 769 ; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 770 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 771 ; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 772 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 773 ; X64-NEXT: retq # encoding: [0xc3] 774 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask) 775 ret i32 %res 776 } 777 778 declare i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8>, <32 x i8>, i32) 779 780 define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) { 781 ; CHECK-LABEL: test_pcmpeq_w_256: 782 ; CHECK: # %bb.0: 783 ; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1] 784 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 785 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 786 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 787 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 788 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1) 789 ret i16 %res 790 } 791 792 define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 793 ; X86-LABEL: test_mask_pcmpeq_w_256: 794 ; X86: # %bb.0: 795 ; X86-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1] 796 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 797 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04] 798 ; X86-NEXT: # kill: def $ax killed $ax killed $eax 799 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 800 ; X86-NEXT: retl # encoding: [0xc3] 801 ; 802 ; X64-LABEL: test_mask_pcmpeq_w_256: 803 ; X64: # %bb.0: 804 ; X64-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1] 805 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 806 ; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 807 ; X64-NEXT: # kill: def $ax killed $ax killed $eax 808 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 809 ; X64-NEXT: retq # encoding: [0xc3] 810 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask) 811 ret i16 %res 812 } 813 814 declare i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16>, <16 x i16>, i16) 815 816 define i32 @test_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b) { 817 ; CHECK-LABEL: test_pcmpgt_b_256: 818 ; CHECK: # %bb.0: 819 ; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] 820 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 821 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 822 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 823 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1) 824 ret i32 %res 825 } 826 827 define i32 @test_mask_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 828 ; X86-LABEL: test_mask_pcmpgt_b_256: 829 ; X86: # %bb.0: 830 ; X86-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] 831 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 832 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04] 833 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 834 ; X86-NEXT: retl # encoding: [0xc3] 835 ; 836 ; X64-LABEL: test_mask_pcmpgt_b_256: 837 ; X64: # %bb.0: 838 ; X64-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] 839 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 840 ; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 841 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 842 ; X64-NEXT: retq # encoding: [0xc3] 843 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask) 844 ret i32 %res 845 } 846 847 declare i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8>, <32 x i8>, i32) 848 849 define i16 @test_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b) { 850 ; CHECK-LABEL: test_pcmpgt_w_256: 851 ; CHECK: # %bb.0: 852 ; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1] 853 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 854 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 855 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 856 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 857 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1) 858 ret i16 %res 859 } 860 861 define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 862 ; X86-LABEL: test_mask_pcmpgt_w_256: 863 ; X86: # %bb.0: 864 ; X86-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1] 865 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 866 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04] 867 ; X86-NEXT: # kill: def $ax killed $ax killed $eax 868 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 869 ; X86-NEXT: retl # encoding: [0xc3] 870 ; 871 ; X64-LABEL: test_mask_pcmpgt_w_256: 872 ; X64: # %bb.0: 873 ; X64-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1] 874 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 875 ; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 876 ; X64-NEXT: # kill: def $ax killed $ax killed $eax 877 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 878 ; X64-NEXT: retq # encoding: [0xc3] 879 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask) 880 ret i16 %res 881 } 882 883 declare i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16>, <16 x i16>, i16) 884 885 define i16 @test_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b) { 886 ; CHECK-LABEL: test_pcmpeq_b_128: 887 ; CHECK: # %bb.0: 888 ; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] 889 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 890 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 891 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 892 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1) 893 ret i16 %res 894 } 895 896 define i16 @test_mask_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 897 ; X86-LABEL: test_mask_pcmpeq_b_128: 898 ; X86: # %bb.0: 899 ; X86-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] 900 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 901 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04] 902 ; X86-NEXT: # kill: def $ax killed $ax killed $eax 903 ; X86-NEXT: retl # encoding: [0xc3] 904 ; 905 ; X64-LABEL: test_mask_pcmpeq_b_128: 906 ; X64: # %bb.0: 907 ; X64-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] 908 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 909 ; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 910 ; X64-NEXT: # kill: def $ax killed $ax killed $eax 911 ; X64-NEXT: retq # encoding: [0xc3] 912 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask) 913 ret i16 %res 914 } 915 916 declare i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8>, <16 x i8>, i16) 917 918 define i8 @test_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b) { 919 ; CHECK-LABEL: test_pcmpeq_w_128: 920 ; CHECK: # %bb.0: 921 ; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] 922 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 923 ; CHECK-NEXT: # kill: def $al killed $al killed $eax 924 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 925 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1) 926 ret i8 %res 927 } 928 929 define i8 @test_mask_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 930 ; X86-LABEL: test_mask_pcmpeq_w_128: 931 ; X86: # %bb.0: 932 ; X86-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] 933 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 934 ; X86-NEXT: andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04] 935 ; X86-NEXT: # kill: def $al killed $al killed $eax 936 ; X86-NEXT: retl # encoding: [0xc3] 937 ; 938 ; X64-LABEL: test_mask_pcmpeq_w_128: 939 ; X64: # %bb.0: 940 ; X64-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] 941 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 942 ; X64-NEXT: andb %dil, %al # encoding: [0x40,0x20,0xf8] 943 ; X64-NEXT: # kill: def $al killed $al killed $eax 944 ; X64-NEXT: retq # encoding: [0xc3] 945 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask) 946 ret i8 %res 947 } 948 949 declare i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16>, <8 x i16>, i8) 950 951 define i16 @test_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b) { 952 ; CHECK-LABEL: test_pcmpgt_b_128: 953 ; CHECK: # %bb.0: 954 ; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1] 955 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 956 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 957 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 958 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1) 959 ret i16 %res 960 } 961 962 define i16 @test_mask_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 963 ; X86-LABEL: test_mask_pcmpgt_b_128: 964 ; X86: # %bb.0: 965 ; X86-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1] 966 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 967 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04] 968 ; X86-NEXT: # kill: def $ax killed $ax killed $eax 969 ; X86-NEXT: retl # encoding: [0xc3] 970 ; 971 ; X64-LABEL: test_mask_pcmpgt_b_128: 972 ; X64: # %bb.0: 973 ; X64-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1] 974 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 975 ; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 976 ; X64-NEXT: # kill: def $ax killed $ax killed $eax 977 ; X64-NEXT: retq # encoding: [0xc3] 978 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask) 979 ret i16 %res 980 } 981 982 declare i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8>, <16 x i8>, i16) 983 984 define i8 @test_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b) { 985 ; CHECK-LABEL: test_pcmpgt_w_128: 986 ; CHECK: # %bb.0: 987 ; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1] 988 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 989 ; CHECK-NEXT: # kill: def $al killed $al killed $eax 990 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 991 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1) 992 ret i8 %res 993 } 994 995 define i8 @test_mask_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 996 ; X86-LABEL: test_mask_pcmpgt_w_128: 997 ; X86: # %bb.0: 998 ; X86-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1] 999 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1000 ; X86-NEXT: andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04] 1001 ; X86-NEXT: # kill: def $al killed $al killed $eax 1002 ; X86-NEXT: retl # encoding: [0xc3] 1003 ; 1004 ; X64-LABEL: test_mask_pcmpgt_w_128: 1005 ; X64: # %bb.0: 1006 ; X64-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1] 1007 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1008 ; X64-NEXT: andb %dil, %al # encoding: [0x40,0x20,0xf8] 1009 ; X64-NEXT: # kill: def $al killed $al killed $eax 1010 ; X64-NEXT: retq # encoding: [0xc3] 1011 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask) 1012 ret i8 %res 1013 } 1014 1015 declare i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16>, <8 x i16>, i8) 1016 1017 declare <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 1018 1019 define <16 x i8>@test_int_x86_avx512_mask_punpckhb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 1020 ; X86-LABEL: test_int_x86_avx512_mask_punpckhb_w_128: 1021 ; X86: # %bb.0: 1022 ; X86-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x68,0xd9] 1023 ; X86-NEXT: # xmm3 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 1024 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1025 ; X86-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x68,0xd1] 1026 ; X86-NEXT: # xmm2 {%k1} = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 1027 ; X86-NEXT: vpaddb %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3] 1028 ; X86-NEXT: retl # encoding: [0xc3] 1029 ; 1030 ; X64-LABEL: test_int_x86_avx512_mask_punpckhb_w_128: 1031 ; X64: # %bb.0: 1032 ; X64-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x68,0xd9] 1033 ; X64-NEXT: # xmm3 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 1034 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1035 ; X64-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x68,0xd1] 1036 ; X64-NEXT: # xmm2 {%k1} = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 1037 ; X64-NEXT: vpaddb %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3] 1038 ; X64-NEXT: retq # encoding: [0xc3] 1039 %res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 1040 %res1 = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 1041 %res2 = add <16 x i8> %res, %res1 1042 ret <16 x i8> %res2 1043 } 1044 1045 declare <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 1046 1047 define <16 x i8>@test_int_x86_avx512_mask_punpcklb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 1048 ; X86-LABEL: test_int_x86_avx512_mask_punpcklb_w_128: 1049 ; X86: # %bb.0: 1050 ; X86-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x60,0xd9] 1051 ; X86-NEXT: # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1052 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1053 ; X86-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x60,0xd1] 1054 ; X86-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1055 ; X86-NEXT: vpaddb %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3] 1056 ; X86-NEXT: retl # encoding: [0xc3] 1057 ; 1058 ; X64-LABEL: test_int_x86_avx512_mask_punpcklb_w_128: 1059 ; X64: # %bb.0: 1060 ; X64-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x60,0xd9] 1061 ; X64-NEXT: # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1062 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1063 ; X64-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x60,0xd1] 1064 ; X64-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1065 ; X64-NEXT: vpaddb %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3] 1066 ; X64-NEXT: retq # encoding: [0xc3] 1067 %res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 1068 %res1 = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 1069 %res2 = add <16 x i8> %res, %res1 1070 ret <16 x i8> %res2 1071 } 1072 1073 declare <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 1074 1075 define <32 x i8>@test_int_x86_avx512_mask_punpckhb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 1076 ; X86-LABEL: test_int_x86_avx512_mask_punpckhb_w_256: 1077 ; X86: # %bb.0: 1078 ; X86-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x68,0xd9] 1079 ; X86-NEXT: # ymm3 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 1080 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1081 ; X86-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x68,0xd1] 1082 ; X86-NEXT: # ymm2 {%k1} = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 1083 ; X86-NEXT: vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] 1084 ; X86-NEXT: retl # encoding: [0xc3] 1085 ; 1086 ; X64-LABEL: test_int_x86_avx512_mask_punpckhb_w_256: 1087 ; X64: # %bb.0: 1088 ; X64-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x68,0xd9] 1089 ; X64-NEXT: # ymm3 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 1090 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1091 ; X64-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x68,0xd1] 1092 ; X64-NEXT: # ymm2 {%k1} = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 1093 ; X64-NEXT: vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] 1094 ; X64-NEXT: retq # encoding: [0xc3] 1095 %res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 1096 %res1 = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 1097 %res2 = add <32 x i8> %res, %res1 1098 ret <32 x i8> %res2 1099 } 1100 1101 declare <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 1102 1103 define <32 x i8>@test_int_x86_avx512_mask_punpcklb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 1104 ; X86-LABEL: test_int_x86_avx512_mask_punpcklb_w_256: 1105 ; X86: # %bb.0: 1106 ; X86-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x60,0xd9] 1107 ; X86-NEXT: # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 1108 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1109 ; X86-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x60,0xd1] 1110 ; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 1111 ; X86-NEXT: vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] 1112 ; X86-NEXT: retl # encoding: [0xc3] 1113 ; 1114 ; X64-LABEL: test_int_x86_avx512_mask_punpcklb_w_256: 1115 ; X64: # %bb.0: 1116 ; X64-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x60,0xd9] 1117 ; X64-NEXT: # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 1118 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1119 ; X64-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x60,0xd1] 1120 ; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 1121 ; X64-NEXT: vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] 1122 ; X64-NEXT: retq # encoding: [0xc3] 1123 %res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 1124 %res1 = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 1125 %res2 = add <32 x i8> %res, %res1 1126 ret <32 x i8> %res2 1127 } 1128 1129 declare <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1130 1131 define <8 x i16>@test_int_x86_avx512_mask_punpcklw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 1132 ; X86-LABEL: test_int_x86_avx512_mask_punpcklw_d_128: 1133 ; X86: # %bb.0: 1134 ; X86-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x61,0xd9] 1135 ; X86-NEXT: # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1136 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1137 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1138 ; X86-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x61,0xd1] 1139 ; X86-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1140 ; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 1141 ; X86-NEXT: retl # encoding: [0xc3] 1142 ; 1143 ; X64-LABEL: test_int_x86_avx512_mask_punpcklw_d_128: 1144 ; X64: # %bb.0: 1145 ; X64-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x61,0xd9] 1146 ; X64-NEXT: # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1147 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1148 ; X64-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x61,0xd1] 1149 ; X64-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1150 ; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 1151 ; X64-NEXT: retq # encoding: [0xc3] 1152 %res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 1153 %res1 = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 1154 %res2 = add <8 x i16> %res, %res1 1155 ret <8 x i16> %res2 1156 } 1157 1158 declare <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1159 1160 define <8 x i16>@test_int_x86_avx512_mask_punpckhw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 1161 ; X86-LABEL: test_int_x86_avx512_mask_punpckhw_d_128: 1162 ; X86: # %bb.0: 1163 ; X86-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x69,0xd9] 1164 ; X86-NEXT: # xmm3 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1165 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1166 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1167 ; X86-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x69,0xd1] 1168 ; X86-NEXT: # xmm2 {%k1} = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1169 ; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 1170 ; X86-NEXT: retl # encoding: [0xc3] 1171 ; 1172 ; X64-LABEL: test_int_x86_avx512_mask_punpckhw_d_128: 1173 ; X64: # %bb.0: 1174 ; X64-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x69,0xd9] 1175 ; X64-NEXT: # xmm3 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1176 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1177 ; X64-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x69,0xd1] 1178 ; X64-NEXT: # xmm2 {%k1} = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1179 ; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 1180 ; X64-NEXT: retq # encoding: [0xc3] 1181 %res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 1182 %res1 = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 1183 %res2 = add <8 x i16> %res, %res1 1184 ret <8 x i16> %res2 1185 } 1186 1187 declare <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1188 1189 define <16 x i16>@test_int_x86_avx512_mask_punpcklw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 1190 ; X86-LABEL: test_int_x86_avx512_mask_punpcklw_d_256: 1191 ; X86: # %bb.0: 1192 ; X86-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x61,0xd9] 1193 ; X86-NEXT: # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] 1194 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1195 ; X86-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x61,0xd1] 1196 ; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] 1197 ; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 1198 ; X86-NEXT: retl # encoding: [0xc3] 1199 ; 1200 ; X64-LABEL: test_int_x86_avx512_mask_punpcklw_d_256: 1201 ; X64: # %bb.0: 1202 ; X64-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x61,0xd9] 1203 ; X64-NEXT: # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] 1204 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1205 ; X64-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x61,0xd1] 1206 ; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] 1207 ; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 1208 ; X64-NEXT: retq # encoding: [0xc3] 1209 %res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 1210 %res1 = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 1211 %res2 = add <16 x i16> %res, %res1 1212 ret <16 x i16> %res2 1213 } 1214 1215 declare <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1216 1217 define <16 x i16>@test_int_x86_avx512_mask_punpckhw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 1218 ; X86-LABEL: test_int_x86_avx512_mask_punpckhw_d_256: 1219 ; X86: # %bb.0: 1220 ; X86-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x69,0xd9] 1221 ; X86-NEXT: # ymm3 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] 1222 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1223 ; X86-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x69,0xd1] 1224 ; X86-NEXT: # ymm2 {%k1} = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] 1225 ; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 1226 ; X86-NEXT: retl # encoding: [0xc3] 1227 ; 1228 ; X64-LABEL: test_int_x86_avx512_mask_punpckhw_d_256: 1229 ; X64: # %bb.0: 1230 ; X64-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x69,0xd9] 1231 ; X64-NEXT: # ymm3 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] 1232 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1233 ; X64-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x69,0xd1] 1234 ; X64-NEXT: # ymm2 {%k1} = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] 1235 ; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 1236 ; X64-NEXT: retq # encoding: [0xc3] 1237 %res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 1238 %res1 = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 1239 %res2 = add <16 x i16> %res, %res1 1240 ret <16 x i16> %res2 1241 } 1242 1243 define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 1244 ; CHECK-LABEL: test_mask_add_epi16_rr_128: 1245 ; CHECK: # %bb.0: 1246 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] 1247 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1248 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1249 ret <8 x i16> %res 1250 } 1251 1252 define <8 x i16> @test_mask_add_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 1253 ; X86-LABEL: test_mask_add_epi16_rrk_128: 1254 ; X86: # %bb.0: 1255 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1256 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1257 ; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1] 1258 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1259 ; X86-NEXT: retl # encoding: [0xc3] 1260 ; 1261 ; X64-LABEL: test_mask_add_epi16_rrk_128: 1262 ; X64: # %bb.0: 1263 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1264 ; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1] 1265 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1266 ; X64-NEXT: retq # encoding: [0xc3] 1267 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1268 ret <8 x i16> %res 1269 } 1270 1271 define <8 x i16> @test_mask_add_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 1272 ; X86-LABEL: test_mask_add_epi16_rrkz_128: 1273 ; X86: # %bb.0: 1274 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1275 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1276 ; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1] 1277 ; X86-NEXT: retl # encoding: [0xc3] 1278 ; 1279 ; X64-LABEL: test_mask_add_epi16_rrkz_128: 1280 ; X64: # %bb.0: 1281 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1282 ; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1] 1283 ; X64-NEXT: retq # encoding: [0xc3] 1284 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1285 ret <8 x i16> %res 1286 } 1287 1288 define <8 x i16> @test_mask_add_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 1289 ; X86-LABEL: test_mask_add_epi16_rm_128: 1290 ; X86: # %bb.0: 1291 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1292 ; X86-NEXT: vpaddw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0x00] 1293 ; X86-NEXT: retl # encoding: [0xc3] 1294 ; 1295 ; X64-LABEL: test_mask_add_epi16_rm_128: 1296 ; X64: # %bb.0: 1297 ; X64-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0x07] 1298 ; X64-NEXT: retq # encoding: [0xc3] 1299 %b = load <8 x i16>, <8 x i16>* %ptr_b 1300 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1301 ret <8 x i16> %res 1302 } 1303 1304 define <8 x i16> @test_mask_add_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 1305 ; X86-LABEL: test_mask_add_epi16_rmk_128: 1306 ; X86: # %bb.0: 1307 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1308 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1309 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1310 ; X86-NEXT: vpaddw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x08] 1311 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1312 ; X86-NEXT: retl # encoding: [0xc3] 1313 ; 1314 ; X64-LABEL: test_mask_add_epi16_rmk_128: 1315 ; X64: # %bb.0: 1316 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1317 ; X64-NEXT: vpaddw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f] 1318 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1319 ; X64-NEXT: retq # encoding: [0xc3] 1320 %b = load <8 x i16>, <8 x i16>* %ptr_b 1321 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1322 ret <8 x i16> %res 1323 } 1324 1325 define <8 x i16> @test_mask_add_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 1326 ; X86-LABEL: test_mask_add_epi16_rmkz_128: 1327 ; X86: # %bb.0: 1328 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1329 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1330 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1331 ; X86-NEXT: vpaddw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x00] 1332 ; X86-NEXT: retl # encoding: [0xc3] 1333 ; 1334 ; X64-LABEL: test_mask_add_epi16_rmkz_128: 1335 ; X64: # %bb.0: 1336 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1337 ; X64-NEXT: vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x07] 1338 ; X64-NEXT: retq # encoding: [0xc3] 1339 %b = load <8 x i16>, <8 x i16>* %ptr_b 1340 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1341 ret <8 x i16> %res 1342 } 1343 1344 declare <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1345 1346 define <16 x i16> @test_mask_add_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 1347 ; CHECK-LABEL: test_mask_add_epi16_rr_256: 1348 ; CHECK: # %bb.0: 1349 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1] 1350 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1351 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1352 ret <16 x i16> %res 1353 } 1354 1355 define <16 x i16> @test_mask_add_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 1356 ; X86-LABEL: test_mask_add_epi16_rrk_256: 1357 ; X86: # %bb.0: 1358 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1359 ; X86-NEXT: vpaddw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1] 1360 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1361 ; X86-NEXT: retl # encoding: [0xc3] 1362 ; 1363 ; X64-LABEL: test_mask_add_epi16_rrk_256: 1364 ; X64: # %bb.0: 1365 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1366 ; X64-NEXT: vpaddw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1] 1367 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1368 ; X64-NEXT: retq # encoding: [0xc3] 1369 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1370 ret <16 x i16> %res 1371 } 1372 1373 define <16 x i16> @test_mask_add_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 1374 ; X86-LABEL: test_mask_add_epi16_rrkz_256: 1375 ; X86: # %bb.0: 1376 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1377 ; X86-NEXT: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1] 1378 ; X86-NEXT: retl # encoding: [0xc3] 1379 ; 1380 ; X64-LABEL: test_mask_add_epi16_rrkz_256: 1381 ; X64: # %bb.0: 1382 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1383 ; X64-NEXT: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1] 1384 ; X64-NEXT: retq # encoding: [0xc3] 1385 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1386 ret <16 x i16> %res 1387 } 1388 1389 define <16 x i16> @test_mask_add_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 1390 ; X86-LABEL: test_mask_add_epi16_rm_256: 1391 ; X86: # %bb.0: 1392 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1393 ; X86-NEXT: vpaddw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0x00] 1394 ; X86-NEXT: retl # encoding: [0xc3] 1395 ; 1396 ; X64-LABEL: test_mask_add_epi16_rm_256: 1397 ; X64: # %bb.0: 1398 ; X64-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0x07] 1399 ; X64-NEXT: retq # encoding: [0xc3] 1400 %b = load <16 x i16>, <16 x i16>* %ptr_b 1401 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1402 ret <16 x i16> %res 1403 } 1404 1405 define <16 x i16> @test_mask_add_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 1406 ; X86-LABEL: test_mask_add_epi16_rmk_256: 1407 ; X86: # %bb.0: 1408 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1409 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1410 ; X86-NEXT: vpaddw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x08] 1411 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1412 ; X86-NEXT: retl # encoding: [0xc3] 1413 ; 1414 ; X64-LABEL: test_mask_add_epi16_rmk_256: 1415 ; X64: # %bb.0: 1416 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1417 ; X64-NEXT: vpaddw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f] 1418 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1419 ; X64-NEXT: retq # encoding: [0xc3] 1420 %b = load <16 x i16>, <16 x i16>* %ptr_b 1421 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1422 ret <16 x i16> %res 1423 } 1424 1425 define <16 x i16> @test_mask_add_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 1426 ; X86-LABEL: test_mask_add_epi16_rmkz_256: 1427 ; X86: # %bb.0: 1428 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1429 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1430 ; X86-NEXT: vpaddw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x00] 1431 ; X86-NEXT: retl # encoding: [0xc3] 1432 ; 1433 ; X64-LABEL: test_mask_add_epi16_rmkz_256: 1434 ; X64: # %bb.0: 1435 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1436 ; X64-NEXT: vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x07] 1437 ; X64-NEXT: retq # encoding: [0xc3] 1438 %b = load <16 x i16>, <16 x i16>* %ptr_b 1439 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1440 ret <16 x i16> %res 1441 } 1442 1443 declare <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1444 1445 define <8 x i16> @test_mask_sub_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 1446 ; CHECK-LABEL: test_mask_sub_epi16_rr_128: 1447 ; CHECK: # %bb.0: 1448 ; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0xc1] 1449 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1450 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1451 ret <8 x i16> %res 1452 } 1453 1454 define <8 x i16> @test_mask_sub_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 1455 ; X86-LABEL: test_mask_sub_epi16_rrk_128: 1456 ; X86: # %bb.0: 1457 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1458 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1459 ; X86-NEXT: vpsubw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1] 1460 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1461 ; X86-NEXT: retl # encoding: [0xc3] 1462 ; 1463 ; X64-LABEL: test_mask_sub_epi16_rrk_128: 1464 ; X64: # %bb.0: 1465 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1466 ; X64-NEXT: vpsubw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1] 1467 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1468 ; X64-NEXT: retq # encoding: [0xc3] 1469 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1470 ret <8 x i16> %res 1471 } 1472 1473 define <8 x i16> @test_mask_sub_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 1474 ; X86-LABEL: test_mask_sub_epi16_rrkz_128: 1475 ; X86: # %bb.0: 1476 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1477 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1478 ; X86-NEXT: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1] 1479 ; X86-NEXT: retl # encoding: [0xc3] 1480 ; 1481 ; X64-LABEL: test_mask_sub_epi16_rrkz_128: 1482 ; X64: # %bb.0: 1483 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1484 ; X64-NEXT: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1] 1485 ; X64-NEXT: retq # encoding: [0xc3] 1486 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1487 ret <8 x i16> %res 1488 } 1489 1490 define <8 x i16> @test_mask_sub_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 1491 ; X86-LABEL: test_mask_sub_epi16_rm_128: 1492 ; X86: # %bb.0: 1493 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1494 ; X86-NEXT: vpsubw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0x00] 1495 ; X86-NEXT: retl # encoding: [0xc3] 1496 ; 1497 ; X64-LABEL: test_mask_sub_epi16_rm_128: 1498 ; X64: # %bb.0: 1499 ; X64-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0x07] 1500 ; X64-NEXT: retq # encoding: [0xc3] 1501 %b = load <8 x i16>, <8 x i16>* %ptr_b 1502 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1503 ret <8 x i16> %res 1504 } 1505 1506 define <8 x i16> @test_mask_sub_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 1507 ; X86-LABEL: test_mask_sub_epi16_rmk_128: 1508 ; X86: # %bb.0: 1509 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1510 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1511 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1512 ; X86-NEXT: vpsubw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x08] 1513 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1514 ; X86-NEXT: retl # encoding: [0xc3] 1515 ; 1516 ; X64-LABEL: test_mask_sub_epi16_rmk_128: 1517 ; X64: # %bb.0: 1518 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1519 ; X64-NEXT: vpsubw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f] 1520 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1521 ; X64-NEXT: retq # encoding: [0xc3] 1522 %b = load <8 x i16>, <8 x i16>* %ptr_b 1523 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1524 ret <8 x i16> %res 1525 } 1526 1527 define <8 x i16> @test_mask_sub_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 1528 ; X86-LABEL: test_mask_sub_epi16_rmkz_128: 1529 ; X86: # %bb.0: 1530 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1531 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1532 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1533 ; X86-NEXT: vpsubw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x00] 1534 ; X86-NEXT: retl # encoding: [0xc3] 1535 ; 1536 ; X64-LABEL: test_mask_sub_epi16_rmkz_128: 1537 ; X64: # %bb.0: 1538 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1539 ; X64-NEXT: vpsubw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x07] 1540 ; X64-NEXT: retq # encoding: [0xc3] 1541 %b = load <8 x i16>, <8 x i16>* %ptr_b 1542 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1543 ret <8 x i16> %res 1544 } 1545 1546 declare <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1547 1548 define <16 x i16> @test_mask_sub_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 1549 ; CHECK-LABEL: test_mask_sub_epi16_rr_256: 1550 ; CHECK: # %bb.0: 1551 ; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0xc1] 1552 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1553 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1554 ret <16 x i16> %res 1555 } 1556 1557 define <16 x i16> @test_mask_sub_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 1558 ; X86-LABEL: test_mask_sub_epi16_rrk_256: 1559 ; X86: # %bb.0: 1560 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1561 ; X86-NEXT: vpsubw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1] 1562 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1563 ; X86-NEXT: retl # encoding: [0xc3] 1564 ; 1565 ; X64-LABEL: test_mask_sub_epi16_rrk_256: 1566 ; X64: # %bb.0: 1567 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1568 ; X64-NEXT: vpsubw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1] 1569 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1570 ; X64-NEXT: retq # encoding: [0xc3] 1571 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1572 ret <16 x i16> %res 1573 } 1574 1575 define <16 x i16> @test_mask_sub_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 1576 ; X86-LABEL: test_mask_sub_epi16_rrkz_256: 1577 ; X86: # %bb.0: 1578 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1579 ; X86-NEXT: vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1] 1580 ; X86-NEXT: retl # encoding: [0xc3] 1581 ; 1582 ; X64-LABEL: test_mask_sub_epi16_rrkz_256: 1583 ; X64: # %bb.0: 1584 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1585 ; X64-NEXT: vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1] 1586 ; X64-NEXT: retq # encoding: [0xc3] 1587 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1588 ret <16 x i16> %res 1589 } 1590 1591 define <16 x i16> @test_mask_sub_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 1592 ; X86-LABEL: test_mask_sub_epi16_rm_256: 1593 ; X86: # %bb.0: 1594 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1595 ; X86-NEXT: vpsubw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0x00] 1596 ; X86-NEXT: retl # encoding: [0xc3] 1597 ; 1598 ; X64-LABEL: test_mask_sub_epi16_rm_256: 1599 ; X64: # %bb.0: 1600 ; X64-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0x07] 1601 ; X64-NEXT: retq # encoding: [0xc3] 1602 %b = load <16 x i16>, <16 x i16>* %ptr_b 1603 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1604 ret <16 x i16> %res 1605 } 1606 1607 define <16 x i16> @test_mask_sub_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 1608 ; X86-LABEL: test_mask_sub_epi16_rmk_256: 1609 ; X86: # %bb.0: 1610 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1611 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1612 ; X86-NEXT: vpsubw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x08] 1613 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1614 ; X86-NEXT: retl # encoding: [0xc3] 1615 ; 1616 ; X64-LABEL: test_mask_sub_epi16_rmk_256: 1617 ; X64: # %bb.0: 1618 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1619 ; X64-NEXT: vpsubw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f] 1620 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1621 ; X64-NEXT: retq # encoding: [0xc3] 1622 %b = load <16 x i16>, <16 x i16>* %ptr_b 1623 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1624 ret <16 x i16> %res 1625 } 1626 1627 define <16 x i16> @test_mask_sub_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 1628 ; X86-LABEL: test_mask_sub_epi16_rmkz_256: 1629 ; X86: # %bb.0: 1630 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1631 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1632 ; X86-NEXT: vpsubw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x00] 1633 ; X86-NEXT: retl # encoding: [0xc3] 1634 ; 1635 ; X64-LABEL: test_mask_sub_epi16_rmkz_256: 1636 ; X64: # %bb.0: 1637 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1638 ; X64-NEXT: vpsubw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x07] 1639 ; X64-NEXT: retq # encoding: [0xc3] 1640 %b = load <16 x i16>, <16 x i16>* %ptr_b 1641 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1642 ret <16 x i16> %res 1643 } 1644 1645 declare <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1646 1647 define <32 x i16> @test_mask_add_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1648 ; CHECK-LABEL: test_mask_add_epi16_rr_512: 1649 ; CHECK: # %bb.0: 1650 ; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1] 1651 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1652 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1653 ret <32 x i16> %res 1654 } 1655 1656 define <32 x i16> @test_mask_add_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1657 ; X86-LABEL: test_mask_add_epi16_rrk_512: 1658 ; X86: # %bb.0: 1659 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1660 ; X86-NEXT: vpaddw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1] 1661 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1662 ; X86-NEXT: retl # encoding: [0xc3] 1663 ; 1664 ; X64-LABEL: test_mask_add_epi16_rrk_512: 1665 ; X64: # %bb.0: 1666 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1667 ; X64-NEXT: vpaddw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1] 1668 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1669 ; X64-NEXT: retq # encoding: [0xc3] 1670 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1671 ret <32 x i16> %res 1672 } 1673 1674 define <32 x i16> @test_mask_add_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1675 ; X86-LABEL: test_mask_add_epi16_rrkz_512: 1676 ; X86: # %bb.0: 1677 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1678 ; X86-NEXT: vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1] 1679 ; X86-NEXT: retl # encoding: [0xc3] 1680 ; 1681 ; X64-LABEL: test_mask_add_epi16_rrkz_512: 1682 ; X64: # %bb.0: 1683 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1684 ; X64-NEXT: vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1] 1685 ; X64-NEXT: retq # encoding: [0xc3] 1686 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1687 ret <32 x i16> %res 1688 } 1689 1690 define <32 x i16> @test_mask_add_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1691 ; X86-LABEL: test_mask_add_epi16_rm_512: 1692 ; X86: # %bb.0: 1693 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1694 ; X86-NEXT: vpaddw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x00] 1695 ; X86-NEXT: retl # encoding: [0xc3] 1696 ; 1697 ; X64-LABEL: test_mask_add_epi16_rm_512: 1698 ; X64: # %bb.0: 1699 ; X64-NEXT: vpaddw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x07] 1700 ; X64-NEXT: retq # encoding: [0xc3] 1701 %b = load <32 x i16>, <32 x i16>* %ptr_b 1702 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1703 ret <32 x i16> %res 1704 } 1705 1706 define <32 x i16> @test_mask_add_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1707 ; X86-LABEL: test_mask_add_epi16_rmk_512: 1708 ; X86: # %bb.0: 1709 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1710 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1711 ; X86-NEXT: vpaddw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x08] 1712 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1713 ; X86-NEXT: retl # encoding: [0xc3] 1714 ; 1715 ; X64-LABEL: test_mask_add_epi16_rmk_512: 1716 ; X64: # %bb.0: 1717 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1718 ; X64-NEXT: vpaddw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x0f] 1719 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1720 ; X64-NEXT: retq # encoding: [0xc3] 1721 %b = load <32 x i16>, <32 x i16>* %ptr_b 1722 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1723 ret <32 x i16> %res 1724 } 1725 1726 define <32 x i16> @test_mask_add_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 1727 ; X86-LABEL: test_mask_add_epi16_rmkz_512: 1728 ; X86: # %bb.0: 1729 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1730 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1731 ; X86-NEXT: vpaddw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x00] 1732 ; X86-NEXT: retl # encoding: [0xc3] 1733 ; 1734 ; X64-LABEL: test_mask_add_epi16_rmkz_512: 1735 ; X64: # %bb.0: 1736 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1737 ; X64-NEXT: vpaddw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x07] 1738 ; X64-NEXT: retq # encoding: [0xc3] 1739 %b = load <32 x i16>, <32 x i16>* %ptr_b 1740 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1741 ret <32 x i16> %res 1742 } 1743 1744 declare <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1745 1746 define <32 x i16> @test_mask_sub_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1747 ; CHECK-LABEL: test_mask_sub_epi16_rr_512: 1748 ; CHECK: # %bb.0: 1749 ; CHECK-NEXT: vpsubw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0xc1] 1750 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1751 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1752 ret <32 x i16> %res 1753 } 1754 1755 define <32 x i16> @test_mask_sub_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1756 ; X86-LABEL: test_mask_sub_epi16_rrk_512: 1757 ; X86: # %bb.0: 1758 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1759 ; X86-NEXT: vpsubw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1] 1760 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1761 ; X86-NEXT: retl # encoding: [0xc3] 1762 ; 1763 ; X64-LABEL: test_mask_sub_epi16_rrk_512: 1764 ; X64: # %bb.0: 1765 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1766 ; X64-NEXT: vpsubw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1] 1767 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1768 ; X64-NEXT: retq # encoding: [0xc3] 1769 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1770 ret <32 x i16> %res 1771 } 1772 1773 define <32 x i16> @test_mask_sub_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1774 ; X86-LABEL: test_mask_sub_epi16_rrkz_512: 1775 ; X86: # %bb.0: 1776 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1777 ; X86-NEXT: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1] 1778 ; X86-NEXT: retl # encoding: [0xc3] 1779 ; 1780 ; X64-LABEL: test_mask_sub_epi16_rrkz_512: 1781 ; X64: # %bb.0: 1782 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1783 ; X64-NEXT: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1] 1784 ; X64-NEXT: retq # encoding: [0xc3] 1785 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1786 ret <32 x i16> %res 1787 } 1788 1789 define <32 x i16> @test_mask_sub_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1790 ; X86-LABEL: test_mask_sub_epi16_rm_512: 1791 ; X86: # %bb.0: 1792 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1793 ; X86-NEXT: vpsubw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x00] 1794 ; X86-NEXT: retl # encoding: [0xc3] 1795 ; 1796 ; X64-LABEL: test_mask_sub_epi16_rm_512: 1797 ; X64: # %bb.0: 1798 ; X64-NEXT: vpsubw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x07] 1799 ; X64-NEXT: retq # encoding: [0xc3] 1800 %b = load <32 x i16>, <32 x i16>* %ptr_b 1801 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1802 ret <32 x i16> %res 1803 } 1804 1805 define <32 x i16> @test_mask_sub_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1806 ; X86-LABEL: test_mask_sub_epi16_rmk_512: 1807 ; X86: # %bb.0: 1808 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1809 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1810 ; X86-NEXT: vpsubw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x08] 1811 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1812 ; X86-NEXT: retl # encoding: [0xc3] 1813 ; 1814 ; X64-LABEL: test_mask_sub_epi16_rmk_512: 1815 ; X64: # %bb.0: 1816 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1817 ; X64-NEXT: vpsubw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x0f] 1818 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1819 ; X64-NEXT: retq # encoding: [0xc3] 1820 %b = load <32 x i16>, <32 x i16>* %ptr_b 1821 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1822 ret <32 x i16> %res 1823 } 1824 1825 define <32 x i16> @test_mask_sub_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 1826 ; X86-LABEL: test_mask_sub_epi16_rmkz_512: 1827 ; X86: # %bb.0: 1828 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1829 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1830 ; X86-NEXT: vpsubw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x00] 1831 ; X86-NEXT: retl # encoding: [0xc3] 1832 ; 1833 ; X64-LABEL: test_mask_sub_epi16_rmkz_512: 1834 ; X64: # %bb.0: 1835 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1836 ; X64-NEXT: vpsubw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x07] 1837 ; X64-NEXT: retq # encoding: [0xc3] 1838 %b = load <32 x i16>, <32 x i16>* %ptr_b 1839 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1840 ret <32 x i16> %res 1841 } 1842 1843 declare <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1844 1845 define <32 x i16> @test_mask_mullo_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1846 ; CHECK-LABEL: test_mask_mullo_epi16_rr_512: 1847 ; CHECK: # %bb.0: 1848 ; CHECK-NEXT: vpmullw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0xc1] 1849 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1850 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1851 ret <32 x i16> %res 1852 } 1853 1854 define <32 x i16> @test_mask_mullo_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1855 ; X86-LABEL: test_mask_mullo_epi16_rrk_512: 1856 ; X86: # %bb.0: 1857 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1858 ; X86-NEXT: vpmullw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1] 1859 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1860 ; X86-NEXT: retl # encoding: [0xc3] 1861 ; 1862 ; X64-LABEL: test_mask_mullo_epi16_rrk_512: 1863 ; X64: # %bb.0: 1864 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1865 ; X64-NEXT: vpmullw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1] 1866 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1867 ; X64-NEXT: retq # encoding: [0xc3] 1868 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1869 ret <32 x i16> %res 1870 } 1871 1872 define <32 x i16> @test_mask_mullo_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1873 ; X86-LABEL: test_mask_mullo_epi16_rrkz_512: 1874 ; X86: # %bb.0: 1875 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1876 ; X86-NEXT: vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1] 1877 ; X86-NEXT: retl # encoding: [0xc3] 1878 ; 1879 ; X64-LABEL: test_mask_mullo_epi16_rrkz_512: 1880 ; X64: # %bb.0: 1881 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1882 ; X64-NEXT: vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1] 1883 ; X64-NEXT: retq # encoding: [0xc3] 1884 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1885 ret <32 x i16> %res 1886 } 1887 1888 define <32 x i16> @test_mask_mullo_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1889 ; X86-LABEL: test_mask_mullo_epi16_rm_512: 1890 ; X86: # %bb.0: 1891 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1892 ; X86-NEXT: vpmullw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x00] 1893 ; X86-NEXT: retl # encoding: [0xc3] 1894 ; 1895 ; X64-LABEL: test_mask_mullo_epi16_rm_512: 1896 ; X64: # %bb.0: 1897 ; X64-NEXT: vpmullw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x07] 1898 ; X64-NEXT: retq # encoding: [0xc3] 1899 %b = load <32 x i16>, <32 x i16>* %ptr_b 1900 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1901 ret <32 x i16> %res 1902 } 1903 1904 define <32 x i16> @test_mask_mullo_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1905 ; X86-LABEL: test_mask_mullo_epi16_rmk_512: 1906 ; X86: # %bb.0: 1907 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1908 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1909 ; X86-NEXT: vpmullw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x08] 1910 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1911 ; X86-NEXT: retl # encoding: [0xc3] 1912 ; 1913 ; X64-LABEL: test_mask_mullo_epi16_rmk_512: 1914 ; X64: # %bb.0: 1915 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1916 ; X64-NEXT: vpmullw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x0f] 1917 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1918 ; X64-NEXT: retq # encoding: [0xc3] 1919 %b = load <32 x i16>, <32 x i16>* %ptr_b 1920 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1921 ret <32 x i16> %res 1922 } 1923 1924 define <32 x i16> @test_mask_mullo_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 1925 ; X86-LABEL: test_mask_mullo_epi16_rmkz_512: 1926 ; X86: # %bb.0: 1927 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1928 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1929 ; X86-NEXT: vpmullw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x00] 1930 ; X86-NEXT: retl # encoding: [0xc3] 1931 ; 1932 ; X64-LABEL: test_mask_mullo_epi16_rmkz_512: 1933 ; X64: # %bb.0: 1934 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1935 ; X64-NEXT: vpmullw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x07] 1936 ; X64-NEXT: retq # encoding: [0xc3] 1937 %b = load <32 x i16>, <32 x i16>* %ptr_b 1938 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1939 ret <32 x i16> %res 1940 } 1941 1942 declare <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1943 1944 define <8 x i16> @test_mask_mullo_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 1945 ; CHECK-LABEL: test_mask_mullo_epi16_rr_128: 1946 ; CHECK: # %bb.0: 1947 ; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0xc1] 1948 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1949 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1950 ret <8 x i16> %res 1951 } 1952 1953 define <8 x i16> @test_mask_mullo_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 1954 ; X86-LABEL: test_mask_mullo_epi16_rrk_128: 1955 ; X86: # %bb.0: 1956 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1957 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1958 ; X86-NEXT: vpmullw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1] 1959 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1960 ; X86-NEXT: retl # encoding: [0xc3] 1961 ; 1962 ; X64-LABEL: test_mask_mullo_epi16_rrk_128: 1963 ; X64: # %bb.0: 1964 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1965 ; X64-NEXT: vpmullw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1] 1966 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1967 ; X64-NEXT: retq # encoding: [0xc3] 1968 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1969 ret <8 x i16> %res 1970 } 1971 1972 define <8 x i16> @test_mask_mullo_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 1973 ; X86-LABEL: test_mask_mullo_epi16_rrkz_128: 1974 ; X86: # %bb.0: 1975 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1976 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1977 ; X86-NEXT: vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1] 1978 ; X86-NEXT: retl # encoding: [0xc3] 1979 ; 1980 ; X64-LABEL: test_mask_mullo_epi16_rrkz_128: 1981 ; X64: # %bb.0: 1982 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1983 ; X64-NEXT: vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1] 1984 ; X64-NEXT: retq # encoding: [0xc3] 1985 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1986 ret <8 x i16> %res 1987 } 1988 1989 define <8 x i16> @test_mask_mullo_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 1990 ; X86-LABEL: test_mask_mullo_epi16_rm_128: 1991 ; X86: # %bb.0: 1992 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1993 ; X86-NEXT: vpmullw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0x00] 1994 ; X86-NEXT: retl # encoding: [0xc3] 1995 ; 1996 ; X64-LABEL: test_mask_mullo_epi16_rm_128: 1997 ; X64: # %bb.0: 1998 ; X64-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0x07] 1999 ; X64-NEXT: retq # encoding: [0xc3] 2000 %b = load <8 x i16>, <8 x i16>* %ptr_b 2001 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2002 ret <8 x i16> %res 2003 } 2004 2005 define <8 x i16> @test_mask_mullo_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 2006 ; X86-LABEL: test_mask_mullo_epi16_rmk_128: 2007 ; X86: # %bb.0: 2008 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2009 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2010 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 2011 ; X86-NEXT: vpmullw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x08] 2012 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2013 ; X86-NEXT: retl # encoding: [0xc3] 2014 ; 2015 ; X64-LABEL: test_mask_mullo_epi16_rmk_128: 2016 ; X64: # %bb.0: 2017 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2018 ; X64-NEXT: vpmullw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f] 2019 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2020 ; X64-NEXT: retq # encoding: [0xc3] 2021 %b = load <8 x i16>, <8 x i16>* %ptr_b 2022 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2023 ret <8 x i16> %res 2024 } 2025 2026 define <8 x i16> @test_mask_mullo_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 2027 ; X86-LABEL: test_mask_mullo_epi16_rmkz_128: 2028 ; X86: # %bb.0: 2029 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2030 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2031 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 2032 ; X86-NEXT: vpmullw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x00] 2033 ; X86-NEXT: retl # encoding: [0xc3] 2034 ; 2035 ; X64-LABEL: test_mask_mullo_epi16_rmkz_128: 2036 ; X64: # %bb.0: 2037 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2038 ; X64-NEXT: vpmullw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x07] 2039 ; X64-NEXT: retq # encoding: [0xc3] 2040 %b = load <8 x i16>, <8 x i16>* %ptr_b 2041 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2042 ret <8 x i16> %res 2043 } 2044 2045 declare <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2046 2047 define <16 x i16> @test_mask_mullo_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 2048 ; CHECK-LABEL: test_mask_mullo_epi16_rr_256: 2049 ; CHECK: # %bb.0: 2050 ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0xc1] 2051 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2052 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2053 ret <16 x i16> %res 2054 } 2055 2056 define <16 x i16> @test_mask_mullo_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 2057 ; X86-LABEL: test_mask_mullo_epi16_rrk_256: 2058 ; X86: # %bb.0: 2059 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2060 ; X86-NEXT: vpmullw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1] 2061 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2062 ; X86-NEXT: retl # encoding: [0xc3] 2063 ; 2064 ; X64-LABEL: test_mask_mullo_epi16_rrk_256: 2065 ; X64: # %bb.0: 2066 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2067 ; X64-NEXT: vpmullw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1] 2068 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2069 ; X64-NEXT: retq # encoding: [0xc3] 2070 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2071 ret <16 x i16> %res 2072 } 2073 2074 define <16 x i16> @test_mask_mullo_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 2075 ; X86-LABEL: test_mask_mullo_epi16_rrkz_256: 2076 ; X86: # %bb.0: 2077 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2078 ; X86-NEXT: vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1] 2079 ; X86-NEXT: retl # encoding: [0xc3] 2080 ; 2081 ; X64-LABEL: test_mask_mullo_epi16_rrkz_256: 2082 ; X64: # %bb.0: 2083 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2084 ; X64-NEXT: vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1] 2085 ; X64-NEXT: retq # encoding: [0xc3] 2086 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2087 ret <16 x i16> %res 2088 } 2089 2090 define <16 x i16> @test_mask_mullo_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 2091 ; X86-LABEL: test_mask_mullo_epi16_rm_256: 2092 ; X86: # %bb.0: 2093 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2094 ; X86-NEXT: vpmullw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0x00] 2095 ; X86-NEXT: retl # encoding: [0xc3] 2096 ; 2097 ; X64-LABEL: test_mask_mullo_epi16_rm_256: 2098 ; X64: # %bb.0: 2099 ; X64-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0x07] 2100 ; X64-NEXT: retq # encoding: [0xc3] 2101 %b = load <16 x i16>, <16 x i16>* %ptr_b 2102 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2103 ret <16 x i16> %res 2104 } 2105 2106 define <16 x i16> @test_mask_mullo_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 2107 ; X86-LABEL: test_mask_mullo_epi16_rmk_256: 2108 ; X86: # %bb.0: 2109 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2110 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2111 ; X86-NEXT: vpmullw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x08] 2112 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2113 ; X86-NEXT: retl # encoding: [0xc3] 2114 ; 2115 ; X64-LABEL: test_mask_mullo_epi16_rmk_256: 2116 ; X64: # %bb.0: 2117 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2118 ; X64-NEXT: vpmullw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f] 2119 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2120 ; X64-NEXT: retq # encoding: [0xc3] 2121 %b = load <16 x i16>, <16 x i16>* %ptr_b 2122 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2123 ret <16 x i16> %res 2124 } 2125 2126 define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 2127 ; X86-LABEL: test_mask_mullo_epi16_rmkz_256: 2128 ; X86: # %bb.0: 2129 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2130 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2131 ; X86-NEXT: vpmullw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x00] 2132 ; X86-NEXT: retl # encoding: [0xc3] 2133 ; 2134 ; X64-LABEL: test_mask_mullo_epi16_rmkz_256: 2135 ; X64: # %bb.0: 2136 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2137 ; X64-NEXT: vpmullw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x07] 2138 ; X64-NEXT: retq # encoding: [0xc3] 2139 %b = load <16 x i16>, <16 x i16>* %ptr_b 2140 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2141 ret <16 x i16> %res 2142 } 2143 2144 declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2145 2146 declare <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 2147 2148 define <16 x i8>@test_int_x86_avx512_mask_pmaxs_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { 2149 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_b_128: 2150 ; X86: # %bb.0: 2151 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2152 ; X86-NEXT: vpmaxsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3c,0xd1] 2153 ; X86-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3c,0xc1] 2154 ; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 2155 ; X86-NEXT: retl # encoding: [0xc3] 2156 ; 2157 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_b_128: 2158 ; X64: # %bb.0: 2159 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2160 ; X64-NEXT: vpmaxsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3c,0xd1] 2161 ; X64-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3c,0xc1] 2162 ; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 2163 ; X64-NEXT: retq # encoding: [0xc3] 2164 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2 ,i16 %mask) 2165 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) 2166 %res2 = add <16 x i8> %res, %res1 2167 ret <16 x i8> %res2 2168 } 2169 2170 declare <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 2171 2172 define <32 x i8>@test_int_x86_avx512_mask_pmaxs_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 2173 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_b_256: 2174 ; X86: # %bb.0: 2175 ; X86-NEXT: vpmaxsb %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3c,0xd9] 2176 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2177 ; X86-NEXT: vpmaxsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3c,0xd1] 2178 ; X86-NEXT: vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] 2179 ; X86-NEXT: retl # encoding: [0xc3] 2180 ; 2181 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_b_256: 2182 ; X64: # %bb.0: 2183 ; X64-NEXT: vpmaxsb %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3c,0xd9] 2184 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2185 ; X64-NEXT: vpmaxsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3c,0xd1] 2186 ; X64-NEXT: vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] 2187 ; X64-NEXT: retq # encoding: [0xc3] 2188 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 2189 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 2190 %res2 = add <32 x i8> %res, %res1 2191 ret <32 x i8> %res2 2192 } 2193 2194 declare <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2195 2196 define <8 x i16>@test_int_x86_avx512_mask_pmaxs_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2197 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_w_128: 2198 ; X86: # %bb.0: 2199 ; X86-NEXT: vpmaxsw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xd9] 2200 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2201 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2202 ; X86-NEXT: vpmaxsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xee,0xd1] 2203 ; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 2204 ; X86-NEXT: retl # encoding: [0xc3] 2205 ; 2206 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_w_128: 2207 ; X64: # %bb.0: 2208 ; X64-NEXT: vpmaxsw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xd9] 2209 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2210 ; X64-NEXT: vpmaxsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xee,0xd1] 2211 ; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 2212 ; X64-NEXT: retq # encoding: [0xc3] 2213 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2214 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2215 %res2 = add <8 x i16> %res, %res1 2216 ret <8 x i16> %res2 2217 } 2218 2219 declare <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2220 2221 define <16 x i16>@test_int_x86_avx512_mask_pmaxs_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { 2222 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_w_256: 2223 ; X86: # %bb.0: 2224 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2225 ; X86-NEXT: vpmaxsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xee,0xd1] 2226 ; X86-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xee,0xc1] 2227 ; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 2228 ; X86-NEXT: retl # encoding: [0xc3] 2229 ; 2230 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_w_256: 2231 ; X64: # %bb.0: 2232 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2233 ; X64-NEXT: vpmaxsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xee,0xd1] 2234 ; X64-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xee,0xc1] 2235 ; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 2236 ; X64-NEXT: retq # encoding: [0xc3] 2237 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) 2238 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) 2239 %res2 = add <16 x i16> %res, %res1 2240 ret <16 x i16> %res2 2241 } 2242 2243 declare <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 2244 2245 define <16 x i8>@test_int_x86_avx512_mask_pmaxu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2,i16 %mask) { 2246 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_b_128: 2247 ; X86: # %bb.0: 2248 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2249 ; X86-NEXT: vpmaxub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xde,0xd1] 2250 ; X86-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xde,0xc1] 2251 ; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 2252 ; X86-NEXT: retl # encoding: [0xc3] 2253 ; 2254 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_b_128: 2255 ; X64: # %bb.0: 2256 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2257 ; X64-NEXT: vpmaxub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xde,0xd1] 2258 ; X64-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xde,0xc1] 2259 ; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 2260 ; X64-NEXT: retq # encoding: [0xc3] 2261 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) 2262 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) 2263 %res2 = add <16 x i8> %res, %res1 2264 ret <16 x i8> %res2 2265 } 2266 2267 declare <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 2268 2269 define <32 x i8>@test_int_x86_avx512_mask_pmaxu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 2270 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_b_256: 2271 ; X86: # %bb.0: 2272 ; X86-NEXT: vpmaxub %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xde,0xd9] 2273 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2274 ; X86-NEXT: vpmaxub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xde,0xd1] 2275 ; X86-NEXT: vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] 2276 ; X86-NEXT: retl # encoding: [0xc3] 2277 ; 2278 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_b_256: 2279 ; X64: # %bb.0: 2280 ; X64-NEXT: vpmaxub %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xde,0xd9] 2281 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2282 ; X64-NEXT: vpmaxub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xde,0xd1] 2283 ; X64-NEXT: vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] 2284 ; X64-NEXT: retq # encoding: [0xc3] 2285 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 2286 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 2287 %res2 = add <32 x i8> %res, %res1 2288 ret <32 x i8> %res2 2289 } 2290 2291 declare <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2292 2293 define <8 x i16>@test_int_x86_avx512_mask_pmaxu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2294 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_w_128: 2295 ; X86: # %bb.0: 2296 ; X86-NEXT: vpmaxuw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3e,0xd9] 2297 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2298 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2299 ; X86-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3e,0xd1] 2300 ; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 2301 ; X86-NEXT: retl # encoding: [0xc3] 2302 ; 2303 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_w_128: 2304 ; X64: # %bb.0: 2305 ; X64-NEXT: vpmaxuw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3e,0xd9] 2306 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2307 ; X64-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3e,0xd1] 2308 ; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 2309 ; X64-NEXT: retq # encoding: [0xc3] 2310 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2311 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2312 %res2 = add <8 x i16> %res, %res1 2313 ret <8 x i16> %res2 2314 } 2315 2316 declare <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2317 2318 define <16 x i16>@test_int_x86_avx512_mask_pmaxu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { 2319 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_w_256: 2320 ; X86: # %bb.0: 2321 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2322 ; X86-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3e,0xd1] 2323 ; X86-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3e,0xc1] 2324 ; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 2325 ; X86-NEXT: retl # encoding: [0xc3] 2326 ; 2327 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_w_256: 2328 ; X64: # %bb.0: 2329 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2330 ; X64-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3e,0xd1] 2331 ; X64-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3e,0xc1] 2332 ; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 2333 ; X64-NEXT: retq # encoding: [0xc3] 2334 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) 2335 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) 2336 %res2 = add <16 x i16> %res, %res1 2337 ret <16 x i16> %res2 2338 } 2339 2340 declare <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 2341 2342 define <16 x i8>@test_int_x86_avx512_mask_pmins_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { 2343 ; X86-LABEL: test_int_x86_avx512_mask_pmins_b_128: 2344 ; X86: # %bb.0: 2345 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2346 ; X86-NEXT: vpminsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x38,0xd1] 2347 ; X86-NEXT: vpminsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x38,0xc1] 2348 ; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 2349 ; X86-NEXT: retl # encoding: [0xc3] 2350 ; 2351 ; X64-LABEL: test_int_x86_avx512_mask_pmins_b_128: 2352 ; X64: # %bb.0: 2353 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2354 ; X64-NEXT: vpminsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x38,0xd1] 2355 ; X64-NEXT: vpminsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x38,0xc1] 2356 ; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 2357 ; X64-NEXT: retq # encoding: [0xc3] 2358 %res = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) 2359 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) 2360 %res2 = add <16 x i8> %res, %res1 2361 ret <16 x i8> %res2 2362 } 2363 2364 declare <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 2365 2366 define <32 x i8>@test_int_x86_avx512_mask_pmins_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 2367 ; X86-LABEL: test_int_x86_avx512_mask_pmins_b_256: 2368 ; X86: # %bb.0: 2369 ; X86-NEXT: vpminsb %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x38,0xd9] 2370 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2371 ; X86-NEXT: vpminsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x38,0xd1] 2372 ; X86-NEXT: vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] 2373 ; X86-NEXT: retl # encoding: [0xc3] 2374 ; 2375 ; X64-LABEL: test_int_x86_avx512_mask_pmins_b_256: 2376 ; X64: # %bb.0: 2377 ; X64-NEXT: vpminsb %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x38,0xd9] 2378 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2379 ; X64-NEXT: vpminsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x38,0xd1] 2380 ; X64-NEXT: vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] 2381 ; X64-NEXT: retq # encoding: [0xc3] 2382 %res = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 2383 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 2384 %res2 = add <32 x i8> %res, %res1 2385 ret <32 x i8> %res2 2386 } 2387 2388 declare <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2389 2390 define <8 x i16>@test_int_x86_avx512_mask_pmins_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2391 ; X86-LABEL: test_int_x86_avx512_mask_pmins_w_128: 2392 ; X86: # %bb.0: 2393 ; X86-NEXT: vpminsw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xd9] 2394 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2395 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2396 ; X86-NEXT: vpminsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xea,0xd1] 2397 ; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 2398 ; X86-NEXT: retl # encoding: [0xc3] 2399 ; 2400 ; X64-LABEL: test_int_x86_avx512_mask_pmins_w_128: 2401 ; X64: # %bb.0: 2402 ; X64-NEXT: vpminsw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xd9] 2403 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2404 ; X64-NEXT: vpminsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xea,0xd1] 2405 ; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 2406 ; X64-NEXT: retq # encoding: [0xc3] 2407 %res = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2408 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2409 %res2 = add <8 x i16> %res, %res1 2410 ret <8 x i16> %res2 2411 } 2412 2413 declare <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2414 2415 define <16 x i16>@test_int_x86_avx512_mask_pmins_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { 2416 ; X86-LABEL: test_int_x86_avx512_mask_pmins_w_256: 2417 ; X86: # %bb.0: 2418 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2419 ; X86-NEXT: vpminsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xea,0xd1] 2420 ; X86-NEXT: vpminsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xea,0xc1] 2421 ; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 2422 ; X86-NEXT: retl # encoding: [0xc3] 2423 ; 2424 ; X64-LABEL: test_int_x86_avx512_mask_pmins_w_256: 2425 ; X64: # %bb.0: 2426 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2427 ; X64-NEXT: vpminsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xea,0xd1] 2428 ; X64-NEXT: vpminsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xea,0xc1] 2429 ; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 2430 ; X64-NEXT: retq # encoding: [0xc3] 2431 %res = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) 2432 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) 2433 %res2 = add <16 x i16> %res, %res1 2434 ret <16 x i16> %res2 2435 } 2436 2437 declare <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 2438 2439 define <16 x i8>@test_int_x86_avx512_mask_pminu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { 2440 ; X86-LABEL: test_int_x86_avx512_mask_pminu_b_128: 2441 ; X86: # %bb.0: 2442 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2443 ; X86-NEXT: vpminub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xda,0xd1] 2444 ; X86-NEXT: vpminub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xda,0xc1] 2445 ; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 2446 ; X86-NEXT: retl # encoding: [0xc3] 2447 ; 2448 ; X64-LABEL: test_int_x86_avx512_mask_pminu_b_128: 2449 ; X64: # %bb.0: 2450 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2451 ; X64-NEXT: vpminub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xda,0xd1] 2452 ; X64-NEXT: vpminub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xda,0xc1] 2453 ; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 2454 ; X64-NEXT: retq # encoding: [0xc3] 2455 %res = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) 2456 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) 2457 %res2 = add <16 x i8> %res, %res1 2458 ret <16 x i8> %res2 2459 } 2460 2461 declare <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 2462 2463 define <32 x i8>@test_int_x86_avx512_mask_pminu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 2464 ; X86-LABEL: test_int_x86_avx512_mask_pminu_b_256: 2465 ; X86: # %bb.0: 2466 ; X86-NEXT: vpminub %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xda,0xd9] 2467 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2468 ; X86-NEXT: vpminub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xda,0xd1] 2469 ; X86-NEXT: vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] 2470 ; X86-NEXT: retl # encoding: [0xc3] 2471 ; 2472 ; X64-LABEL: test_int_x86_avx512_mask_pminu_b_256: 2473 ; X64: # %bb.0: 2474 ; X64-NEXT: vpminub %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xda,0xd9] 2475 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2476 ; X64-NEXT: vpminub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xda,0xd1] 2477 ; X64-NEXT: vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] 2478 ; X64-NEXT: retq # encoding: [0xc3] 2479 %res = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 2480 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 2481 %res2 = add <32 x i8> %res, %res1 2482 ret <32 x i8> %res2 2483 } 2484 2485 declare <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2486 2487 define <8 x i16>@test_int_x86_avx512_mask_pminu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2488 ; X86-LABEL: test_int_x86_avx512_mask_pminu_w_128: 2489 ; X86: # %bb.0: 2490 ; X86-NEXT: vpminuw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3a,0xd9] 2491 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2492 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2493 ; X86-NEXT: vpminuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3a,0xd1] 2494 ; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 2495 ; X86-NEXT: retl # encoding: [0xc3] 2496 ; 2497 ; X64-LABEL: test_int_x86_avx512_mask_pminu_w_128: 2498 ; X64: # %bb.0: 2499 ; X64-NEXT: vpminuw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3a,0xd9] 2500 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2501 ; X64-NEXT: vpminuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3a,0xd1] 2502 ; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 2503 ; X64-NEXT: retq # encoding: [0xc3] 2504 %res = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2505 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2506 %res2 = add <8 x i16> %res, %res1 2507 ret <8 x i16> %res2 2508 } 2509 2510 declare <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2511 2512 define <16 x i16>@test_int_x86_avx512_mask_pminu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { 2513 ; X86-LABEL: test_int_x86_avx512_mask_pminu_w_256: 2514 ; X86: # %bb.0: 2515 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2516 ; X86-NEXT: vpminuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3a,0xd1] 2517 ; X86-NEXT: vpminuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3a,0xc1] 2518 ; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 2519 ; X86-NEXT: retl # encoding: [0xc3] 2520 ; 2521 ; X64-LABEL: test_int_x86_avx512_mask_pminu_w_256: 2522 ; X64: # %bb.0: 2523 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2524 ; X64-NEXT: vpminuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3a,0xd1] 2525 ; X64-NEXT: vpminuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3a,0xc1] 2526 ; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 2527 ; X64-NEXT: retq # encoding: [0xc3] 2528 %res = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) 2529 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) 2530 %res2 = add <16 x i16> %res, %res1 2531 ret <16 x i16> %res2 2532 } 2533 2534 declare <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2535 2536 define <8 x i16>@test_int_x86_avx512_mask_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2537 ; X86-LABEL: test_int_x86_avx512_mask_psrl_w_128: 2538 ; X86: # %bb.0: 2539 ; X86-NEXT: vpsrlw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xd9] 2540 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2541 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2542 ; X86-NEXT: vpsrlw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd1,0xd1] 2543 ; X86-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd1,0xc1] 2544 ; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] 2545 ; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 2546 ; X86-NEXT: retl # encoding: [0xc3] 2547 ; 2548 ; X64-LABEL: test_int_x86_avx512_mask_psrl_w_128: 2549 ; X64: # %bb.0: 2550 ; X64-NEXT: vpsrlw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xd9] 2551 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2552 ; X64-NEXT: vpsrlw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd1,0xd1] 2553 ; X64-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd1,0xc1] 2554 ; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] 2555 ; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 2556 ; X64-NEXT: retq # encoding: [0xc3] 2557 %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2558 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2559 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 2560 %res3 = add <8 x i16> %res, %res1 2561 %res4 = add <8 x i16> %res2, %res3 2562 ret <8 x i16> %res4 2563 } 2564 2565 declare <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16) 2566 2567 define <16 x i16>@test_int_x86_avx512_mask_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) { 2568 ; X86-LABEL: test_int_x86_avx512_mask_psrl_w_256: 2569 ; X86: # %bb.0: 2570 ; X86-NEXT: vpsrlw %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xd9] 2571 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2572 ; X86-NEXT: vpsrlw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd1,0xd1] 2573 ; X86-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd1,0xc1] 2574 ; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] 2575 ; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 2576 ; X86-NEXT: retl # encoding: [0xc3] 2577 ; 2578 ; X64-LABEL: test_int_x86_avx512_mask_psrl_w_256: 2579 ; X64: # %bb.0: 2580 ; X64-NEXT: vpsrlw %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xd9] 2581 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2582 ; X64-NEXT: vpsrlw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd1,0xd1] 2583 ; X64-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd1,0xc1] 2584 ; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] 2585 ; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 2586 ; X64-NEXT: retq # encoding: [0xc3] 2587 %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) 2588 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1) 2589 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 2590 %res3 = add <16 x i16> %res, %res1 2591 %res4 = add <16 x i16> %res3, %res2 2592 ret <16 x i16> %res4 2593 } 2594 2595 declare <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2596 2597 define <8 x i16>@test_int_x86_avx512_mask_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2598 ; X86-LABEL: test_int_x86_avx512_mask_psra_w_128: 2599 ; X86: # %bb.0: 2600 ; X86-NEXT: vpsraw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xd9] 2601 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2602 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2603 ; X86-NEXT: vpsraw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe1,0xd1] 2604 ; X86-NEXT: vpsraw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe1,0xc1] 2605 ; X86-NEXT: vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] 2606 ; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 2607 ; X86-NEXT: retl # encoding: [0xc3] 2608 ; 2609 ; X64-LABEL: test_int_x86_avx512_mask_psra_w_128: 2610 ; X64: # %bb.0: 2611 ; X64-NEXT: vpsraw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xd9] 2612 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2613 ; X64-NEXT: vpsraw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe1,0xd1] 2614 ; X64-NEXT: vpsraw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe1,0xc1] 2615 ; X64-NEXT: vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] 2616 ; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 2617 ; X64-NEXT: retq # encoding: [0xc3] 2618 %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2619 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 2620 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2621 %res3 = add <8 x i16> %res, %res1 2622 %res4 = add <8 x i16> %res3, %res2 2623 ret <8 x i16> %res4 2624 } 2625 2626 declare <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16) 2627 2628 define <16 x i16>@test_int_x86_avx512_mask_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) { 2629 ; X86-LABEL: test_int_x86_avx512_mask_psra_w_256: 2630 ; X86: # %bb.0: 2631 ; X86-NEXT: vpsraw %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe1,0xd9] 2632 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2633 ; X86-NEXT: vpsraw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe1,0xd1] 2634 ; X86-NEXT: vpsraw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe1,0xc1] 2635 ; X86-NEXT: vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] 2636 ; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 2637 ; X86-NEXT: retl # encoding: [0xc3] 2638 ; 2639 ; X64-LABEL: test_int_x86_avx512_mask_psra_w_256: 2640 ; X64: # %bb.0: 2641 ; X64-NEXT: vpsraw %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe1,0xd9] 2642 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2643 ; X64-NEXT: vpsraw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe1,0xd1] 2644 ; X64-NEXT: vpsraw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe1,0xc1] 2645 ; X64-NEXT: vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] 2646 ; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 2647 ; X64-NEXT: retq # encoding: [0xc3] 2648 %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) 2649 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 2650 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1) 2651 %res3 = add <16 x i16> %res, %res1 2652 %res4 = add <16 x i16> %res3, %res2 2653 ret <16 x i16> %res4 2654 } 2655 2656 declare <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2657 2658 define <8 x i16>@test_int_x86_avx512_mask_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2659 ; X86-LABEL: test_int_x86_avx512_mask_psll_w_128: 2660 ; X86: # %bb.0: 2661 ; X86-NEXT: vpsllw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xd9] 2662 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2663 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2664 ; X86-NEXT: vpsllw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf1,0xd1] 2665 ; X86-NEXT: vpsllw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf1,0xc1] 2666 ; X86-NEXT: vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] 2667 ; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 2668 ; X86-NEXT: retl # encoding: [0xc3] 2669 ; 2670 ; X64-LABEL: test_int_x86_avx512_mask_psll_w_128: 2671 ; X64: # %bb.0: 2672 ; X64-NEXT: vpsllw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xd9] 2673 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2674 ; X64-NEXT: vpsllw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf1,0xd1] 2675 ; X64-NEXT: vpsllw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf1,0xc1] 2676 ; X64-NEXT: vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] 2677 ; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 2678 ; X64-NEXT: retq # encoding: [0xc3] 2679 %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2680 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 2681 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2682 %res3 = add <8 x i16> %res, %res1 2683 %res4 = add <8 x i16> %res3, %res2 2684 ret <8 x i16> %res4 2685 } 2686 2687 declare <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16) 2688 2689 define <16 x i16>@test_int_x86_avx512_mask_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) { 2690 ; X86-LABEL: test_int_x86_avx512_mask_psll_w_256: 2691 ; X86: # %bb.0: 2692 ; X86-NEXT: vpsllw %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf1,0xd9] 2693 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2694 ; X86-NEXT: vpsllw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf1,0xd1] 2695 ; X86-NEXT: vpsllw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf1,0xc1] 2696 ; X86-NEXT: vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] 2697 ; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 2698 ; X86-NEXT: retl # encoding: [0xc3] 2699 ; 2700 ; X64-LABEL: test_int_x86_avx512_mask_psll_w_256: 2701 ; X64: # %bb.0: 2702 ; X64-NEXT: vpsllw %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf1,0xd9] 2703 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2704 ; X64-NEXT: vpsllw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf1,0xd1] 2705 ; X64-NEXT: vpsllw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf1,0xc1] 2706 ; X64-NEXT: vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] 2707 ; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 2708 ; X64-NEXT: retq # encoding: [0xc3] 2709 %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) 2710 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 2711 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1) 2712 %res3 = add <16 x i16> %res, %res1 2713 %res4 = add <16 x i16> %res3, %res2 2714 ret <16 x i16> %res4 2715 } 2716 2717 declare <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16>, i32, <8 x i16>, i8) 2718 2719 define <8 x i16>@test_int_x86_avx512_mask_psrl_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { 2720 ; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_128: 2721 ; X86: # %bb.0: 2722 ; X86-NEXT: vpsrlw $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xd0,0x03] 2723 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 2724 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2725 ; X86-NEXT: vpsrlw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xd0,0x03] 2726 ; X86-NEXT: vpsrlw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xd0,0x03] 2727 ; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 2728 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 2729 ; X86-NEXT: retl # encoding: [0xc3] 2730 ; 2731 ; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_128: 2732 ; X64: # %bb.0: 2733 ; X64-NEXT: vpsrlw $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xd0,0x03] 2734 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2735 ; X64-NEXT: vpsrlw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xd0,0x03] 2736 ; X64-NEXT: vpsrlw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xd0,0x03] 2737 ; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 2738 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 2739 ; X64-NEXT: retq # encoding: [0xc3] 2740 %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) 2741 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1) 2742 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3) 2743 %res3 = add <8 x i16> %res, %res1 2744 %res4 = add <8 x i16> %res2, %res3 2745 ret <8 x i16> %res4 2746 } 2747 2748 declare <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16>, i32, <16 x i16>, i16) 2749 2750 define <16 x i16>@test_int_x86_avx512_mask_psrl_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { 2751 ; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_256: 2752 ; X86: # %bb.0: 2753 ; X86-NEXT: vpsrlw $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xd0,0x03] 2754 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2755 ; X86-NEXT: vpsrlw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xd0,0x03] 2756 ; X86-NEXT: vpsrlw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xd0,0x03] 2757 ; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 2758 ; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 2759 ; X86-NEXT: retl # encoding: [0xc3] 2760 ; 2761 ; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_256: 2762 ; X64: # %bb.0: 2763 ; X64-NEXT: vpsrlw $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xd0,0x03] 2764 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2765 ; X64-NEXT: vpsrlw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xd0,0x03] 2766 ; X64-NEXT: vpsrlw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xd0,0x03] 2767 ; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 2768 ; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 2769 ; X64-NEXT: retq # encoding: [0xc3] 2770 %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) 2771 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1) 2772 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3) 2773 %res3 = add <16 x i16> %res, %res1 2774 %res4 = add <16 x i16> %res3, %res2 2775 ret <16 x i16> %res4 2776 } 2777 2778 declare <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16>, i32, <8 x i16>, i8) 2779 2780 define <8 x i16>@test_int_x86_avx512_mask_psra_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { 2781 ; X86-LABEL: test_int_x86_avx512_mask_psra_wi_128: 2782 ; X86: # %bb.0: 2783 ; X86-NEXT: vpsraw $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xe0,0x03] 2784 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 2785 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2786 ; X86-NEXT: vpsraw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xe0,0x03] 2787 ; X86-NEXT: vpsraw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xe0,0x03] 2788 ; X86-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] 2789 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 2790 ; X86-NEXT: retl # encoding: [0xc3] 2791 ; 2792 ; X64-LABEL: test_int_x86_avx512_mask_psra_wi_128: 2793 ; X64: # %bb.0: 2794 ; X64-NEXT: vpsraw $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xe0,0x03] 2795 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2796 ; X64-NEXT: vpsraw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xe0,0x03] 2797 ; X64-NEXT: vpsraw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xe0,0x03] 2798 ; X64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] 2799 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 2800 ; X64-NEXT: retq # encoding: [0xc3] 2801 %res = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) 2802 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3) 2803 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1) 2804 %res3 = add <8 x i16> %res, %res1 2805 %res4 = add <8 x i16> %res3, %res2 2806 ret <8 x i16> %res4 2807 } 2808 2809 declare <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16>, i32, <16 x i16>, i16) 2810 2811 define <16 x i16>@test_int_x86_avx512_mask_psra_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { 2812 ; X86-LABEL: test_int_x86_avx512_mask_psra_wi_256: 2813 ; X86: # %bb.0: 2814 ; X86-NEXT: vpsraw $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xe0,0x03] 2815 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2816 ; X86-NEXT: vpsraw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xe0,0x03] 2817 ; X86-NEXT: vpsraw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xe0,0x03] 2818 ; X86-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] 2819 ; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 2820 ; X86-NEXT: retl # encoding: [0xc3] 2821 ; 2822 ; X64-LABEL: test_int_x86_avx512_mask_psra_wi_256: 2823 ; X64: # %bb.0: 2824 ; X64-NEXT: vpsraw $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xe0,0x03] 2825 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2826 ; X64-NEXT: vpsraw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xe0,0x03] 2827 ; X64-NEXT: vpsraw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xe0,0x03] 2828 ; X64-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] 2829 ; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 2830 ; X64-NEXT: retq # encoding: [0xc3] 2831 %res = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) 2832 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3) 2833 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1) 2834 %res3 = add <16 x i16> %res, %res1 2835 %res4 = add <16 x i16> %res3, %res2 2836 ret <16 x i16> %res4 2837 } 2838 2839 declare <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16>, i32, <8 x i16>, i8) 2840 2841 define <8 x i16>@test_int_x86_avx512_mask_psll_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { 2842 ; X86-LABEL: test_int_x86_avx512_mask_psll_wi_128: 2843 ; X86: # %bb.0: 2844 ; X86-NEXT: vpsllw $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf0,0x03] 2845 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 2846 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2847 ; X86-NEXT: vpsllw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xf0,0x03] 2848 ; X86-NEXT: vpsllw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xf0,0x03] 2849 ; X86-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] 2850 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 2851 ; X86-NEXT: retl # encoding: [0xc3] 2852 ; 2853 ; X64-LABEL: test_int_x86_avx512_mask_psll_wi_128: 2854 ; X64: # %bb.0: 2855 ; X64-NEXT: vpsllw $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf0,0x03] 2856 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2857 ; X64-NEXT: vpsllw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xf0,0x03] 2858 ; X64-NEXT: vpsllw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xf0,0x03] 2859 ; X64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] 2860 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 2861 ; X64-NEXT: retq # encoding: [0xc3] 2862 %res = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) 2863 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3) 2864 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1) 2865 %res3 = add <8 x i16> %res, %res1 2866 %res4 = add <8 x i16> %res3, %res2 2867 ret <8 x i16> %res4 2868 } 2869 2870 declare <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16>, i32, <16 x i16>, i16) 2871 2872 define <16 x i16>@test_int_x86_avx512_mask_psll_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { 2873 ; X86-LABEL: test_int_x86_avx512_mask_psll_wi_256: 2874 ; X86: # %bb.0: 2875 ; X86-NEXT: vpsllw $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xf0,0x03] 2876 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2877 ; X86-NEXT: vpsllw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xf0,0x03] 2878 ; X86-NEXT: vpsllw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xf0,0x03] 2879 ; X86-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] 2880 ; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 2881 ; X86-NEXT: retl # encoding: [0xc3] 2882 ; 2883 ; X64-LABEL: test_int_x86_avx512_mask_psll_wi_256: 2884 ; X64: # %bb.0: 2885 ; X64-NEXT: vpsllw $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xf0,0x03] 2886 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2887 ; X64-NEXT: vpsllw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xf0,0x03] 2888 ; X64-NEXT: vpsllw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xf0,0x03] 2889 ; X64-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] 2890 ; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 2891 ; X64-NEXT: retq # encoding: [0xc3] 2892 %res = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) 2893 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3) 2894 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1) 2895 %res3 = add <16 x i16> %res, %res1 2896 %res4 = add <16 x i16> %res3, %res2 2897 ret <16 x i16> %res4 2898 } 2899 2900 declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 2901 2902 define <16 x i8>@test_int_x86_avx512_mask_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 2903 ; X86-LABEL: test_int_x86_avx512_mask_pshuf_b_128: 2904 ; X86: # %bb.0: 2905 ; X86-NEXT: vpshufb %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0xd9] 2906 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2907 ; X86-NEXT: vpshufb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1] 2908 ; X86-NEXT: vpaddb %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3] 2909 ; X86-NEXT: retl # encoding: [0xc3] 2910 ; 2911 ; X64-LABEL: test_int_x86_avx512_mask_pshuf_b_128: 2912 ; X64: # %bb.0: 2913 ; X64-NEXT: vpshufb %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0xd9] 2914 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2915 ; X64-NEXT: vpshufb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1] 2916 ; X64-NEXT: vpaddb %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3] 2917 ; X64-NEXT: retq # encoding: [0xc3] 2918 %res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 2919 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 2920 %res2 = add <16 x i8> %res, %res1 2921 ret <16 x i8> %res2 2922 } 2923 2924 declare <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 2925 2926 define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 2927 ; X86-LABEL: test_int_x86_avx512_mask_pshuf_b_256: 2928 ; X86: # %bb.0: 2929 ; X86-NEXT: vpshufb %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x00,0xd9] 2930 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2931 ; X86-NEXT: vpshufb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x00,0xd1] 2932 ; X86-NEXT: vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] 2933 ; X86-NEXT: retl # encoding: [0xc3] 2934 ; 2935 ; X64-LABEL: test_int_x86_avx512_mask_pshuf_b_256: 2936 ; X64: # %bb.0: 2937 ; X64-NEXT: vpshufb %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x00,0xd9] 2938 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2939 ; X64-NEXT: vpshufb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x00,0xd1] 2940 ; X64-NEXT: vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] 2941 ; X64-NEXT: retq # encoding: [0xc3] 2942 %res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 2943 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 2944 %res2 = add <32 x i8> %res, %res1 2945 ret <32 x i8> %res2 2946 } 2947 2948 declare <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8>, <8 x i16>, i8) 2949 2950 define <8 x i16>@test_int_x86_avx512_mask_pmovzxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) { 2951 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_w_128: 2952 ; X86: # %bb.0: 2953 ; X86-NEXT: vpmovzxbw %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xd0] 2954 ; X86-NEXT: # xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2955 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2956 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2957 ; X86-NEXT: vpmovzxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x30,0xc8] 2958 ; X86-NEXT: # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2959 ; X86-NEXT: vpmovzxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x30,0xc0] 2960 ; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2961 ; X86-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] 2962 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 2963 ; X86-NEXT: retl # encoding: [0xc3] 2964 ; 2965 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_w_128: 2966 ; X64: # %bb.0: 2967 ; X64-NEXT: vpmovzxbw %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xd0] 2968 ; X64-NEXT: # xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2969 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2970 ; X64-NEXT: vpmovzxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x30,0xc8] 2971 ; X64-NEXT: # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2972 ; X64-NEXT: vpmovzxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x30,0xc0] 2973 ; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2974 ; X64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] 2975 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 2976 ; X64-NEXT: retq # encoding: [0xc3] 2977 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) 2978 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2) 2979 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1) 2980 %res3 = add <8 x i16> %res, %res1 2981 %res4 = add <8 x i16> %res3, %res2 2982 ret <8 x i16> %res4 2983 } 2984 2985 declare <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8>, <16 x i16>, i16) 2986 2987 define <16 x i16>@test_int_x86_avx512_mask_pmovzxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) { 2988 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_w_256: 2989 ; X86: # %bb.0: 2990 ; X86-NEXT: vpmovzxbw %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xd0] 2991 ; X86-NEXT: # ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 2992 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2993 ; X86-NEXT: vpmovzxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x30,0xc8] 2994 ; X86-NEXT: # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 2995 ; X86-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x30,0xc0] 2996 ; X86-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 2997 ; X86-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] 2998 ; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 2999 ; X86-NEXT: retl # encoding: [0xc3] 3000 ; 3001 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_w_256: 3002 ; X64: # %bb.0: 3003 ; X64-NEXT: vpmovzxbw %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xd0] 3004 ; X64-NEXT: # ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 3005 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3006 ; X64-NEXT: vpmovzxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x30,0xc8] 3007 ; X64-NEXT: # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 3008 ; X64-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x30,0xc0] 3009 ; X64-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 3010 ; X64-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] 3011 ; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 3012 ; X64-NEXT: retq # encoding: [0xc3] 3013 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) 3014 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2) 3015 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1) 3016 %res3 = add <16 x i16> %res, %res1 3017 %res4 = add <16 x i16> %res3, %res2 3018 ret <16 x i16> %res4 3019 } 3020 3021 3022 declare <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8>, <8 x i16>, i8) 3023 3024 define <8 x i16>@test_int_x86_avx512_mask_pmovsxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) { 3025 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_w_128: 3026 ; X86: # %bb.0: 3027 ; X86-NEXT: vpmovsxbw %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xd0] 3028 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3029 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3030 ; X86-NEXT: vpmovsxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x20,0xc8] 3031 ; X86-NEXT: vpmovsxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x20,0xc0] 3032 ; X86-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] 3033 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 3034 ; X86-NEXT: retl # encoding: [0xc3] 3035 ; 3036 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_w_128: 3037 ; X64: # %bb.0: 3038 ; X64-NEXT: vpmovsxbw %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xd0] 3039 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3040 ; X64-NEXT: vpmovsxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x20,0xc8] 3041 ; X64-NEXT: vpmovsxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x20,0xc0] 3042 ; X64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] 3043 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 3044 ; X64-NEXT: retq # encoding: [0xc3] 3045 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) 3046 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2) 3047 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1) 3048 %res3 = add <8 x i16> %res, %res1 3049 %res4 = add <8 x i16> %res3, %res2 3050 ret <8 x i16> %res4 3051 } 3052 3053 declare <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8>, <16 x i16>, i16) 3054 3055 define <16 x i16>@test_int_x86_avx512_mask_pmovsxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) { 3056 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_w_256: 3057 ; X86: # %bb.0: 3058 ; X86-NEXT: vpmovsxbw %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x20,0xd0] 3059 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3060 ; X86-NEXT: vpmovsxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x20,0xc8] 3061 ; X86-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x20,0xc0] 3062 ; X86-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] 3063 ; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 3064 ; X86-NEXT: retl # encoding: [0xc3] 3065 ; 3066 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_w_256: 3067 ; X64: # %bb.0: 3068 ; X64-NEXT: vpmovsxbw %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x20,0xd0] 3069 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3070 ; X64-NEXT: vpmovsxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x20,0xc8] 3071 ; X64-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x20,0xc0] 3072 ; X64-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] 3073 ; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 3074 ; X64-NEXT: retq # encoding: [0xc3] 3075 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) 3076 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2) 3077 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1) 3078 %res3 = add <16 x i16> %res, %res1 3079 %res4 = add <16 x i16> %res3, %res2 3080 ret <16 x i16> %res4 3081 } 3082 3083 declare <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32>, <2 x i64>, i8) 3084 3085 define <2 x i64>@test_int_x86_avx512_mask_pmovsxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) { 3086 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxd_q_128: 3087 ; X86: # %bb.0: 3088 ; X86-NEXT: vpmovsxdq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xd0] 3089 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3090 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3091 ; X86-NEXT: vpmovsxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x25,0xc8] 3092 ; X86-NEXT: vpmovsxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x25,0xc0] 3093 ; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 3094 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 3095 ; X86-NEXT: retl # encoding: [0xc3] 3096 ; 3097 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxd_q_128: 3098 ; X64: # %bb.0: 3099 ; X64-NEXT: vpmovsxdq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xd0] 3100 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3101 ; X64-NEXT: vpmovsxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x25,0xc8] 3102 ; X64-NEXT: vpmovsxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x25,0xc0] 3103 ; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 3104 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 3105 ; X64-NEXT: retq # encoding: [0xc3] 3106 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) 3107 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2) 3108 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1) 3109 %res3 = add <2 x i64> %res, %res1 3110 %res4 = add <2 x i64> %res3, %res2 3111 ret <2 x i64> %res4 3112 } 3113 3114 declare <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32>, <4 x i64>, i8) 3115 3116 define <4 x i64>@test_int_x86_avx512_mask_pmovsxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) { 3117 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxd_q_256: 3118 ; X86: # %bb.0: 3119 ; X86-NEXT: vpmovsxdq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x25,0xd0] 3120 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3121 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3122 ; X86-NEXT: vpmovsxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x25,0xc8] 3123 ; X86-NEXT: vpmovsxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x25,0xc0] 3124 ; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 3125 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 3126 ; X86-NEXT: retl # encoding: [0xc3] 3127 ; 3128 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxd_q_256: 3129 ; X64: # %bb.0: 3130 ; X64-NEXT: vpmovsxdq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x25,0xd0] 3131 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3132 ; X64-NEXT: vpmovsxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x25,0xc8] 3133 ; X64-NEXT: vpmovsxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x25,0xc0] 3134 ; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 3135 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 3136 ; X64-NEXT: retq # encoding: [0xc3] 3137 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) 3138 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2) 3139 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1) 3140 %res3 = add <4 x i64> %res, %res1 3141 %res4 = add <4 x i64> %res3, %res2 3142 ret <4 x i64> %res4 3143 } 3144 3145 3146 declare <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16) 3147 3148 define <16 x i8>@test_int_x86_avx512_cvtmask2b_128(i16 %x0) { 3149 ; X86-LABEL: test_int_x86_avx512_cvtmask2b_128: 3150 ; X86: # %bb.0: 3151 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04] 3152 ; X86-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0] 3153 ; X86-NEXT: retl # encoding: [0xc3] 3154 ; 3155 ; X64-LABEL: test_int_x86_avx512_cvtmask2b_128: 3156 ; X64: # %bb.0: 3157 ; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7] 3158 ; X64-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0] 3159 ; X64-NEXT: retq # encoding: [0xc3] 3160 %res = call <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16 %x0) 3161 ret <16 x i8> %res 3162 } 3163 3164 declare <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32) 3165 3166 define <32 x i8>@test_int_x86_avx512_cvtmask2b_256(i32 %x0) { 3167 ; X86-LABEL: test_int_x86_avx512_cvtmask2b_256: 3168 ; X86: # %bb.0: 3169 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04] 3170 ; X86-NEXT: vpmovm2b %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x28,0xc0] 3171 ; X86-NEXT: retl # encoding: [0xc3] 3172 ; 3173 ; X64-LABEL: test_int_x86_avx512_cvtmask2b_256: 3174 ; X64: # %bb.0: 3175 ; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7] 3176 ; X64-NEXT: vpmovm2b %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x28,0xc0] 3177 ; X64-NEXT: retq # encoding: [0xc3] 3178 %res = call <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32 %x0) 3179 ret <32 x i8> %res 3180 } 3181 3182 declare <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8) 3183 3184 define <8 x i16>@test_int_x86_avx512_cvtmask2w_128(i8 %x0) { 3185 ; X86-LABEL: test_int_x86_avx512_cvtmask2w_128: 3186 ; X86: # %bb.0: 3187 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3188 ; X86-NEXT: kmovd %eax, %k0 # encoding: [0xc5,0xfb,0x92,0xc0] 3189 ; X86-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0] 3190 ; X86-NEXT: retl # encoding: [0xc3] 3191 ; 3192 ; X64-LABEL: test_int_x86_avx512_cvtmask2w_128: 3193 ; X64: # %bb.0: 3194 ; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7] 3195 ; X64-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0] 3196 ; X64-NEXT: retq # encoding: [0xc3] 3197 %res = call <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8 %x0) 3198 ret <8 x i16> %res 3199 } 3200 3201 declare <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16) 3202 3203 define <16 x i16>@test_int_x86_avx512_cvtmask2w_256(i16 %x0) { 3204 ; X86-LABEL: test_int_x86_avx512_cvtmask2w_256: 3205 ; X86: # %bb.0: 3206 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04] 3207 ; X86-NEXT: vpmovm2w %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x28,0xc0] 3208 ; X86-NEXT: retl # encoding: [0xc3] 3209 ; 3210 ; X64-LABEL: test_int_x86_avx512_cvtmask2w_256: 3211 ; X64: # %bb.0: 3212 ; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7] 3213 ; X64-NEXT: vpmovm2w %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x28,0xc0] 3214 ; X64-NEXT: retq # encoding: [0xc3] 3215 %res = call <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16 %x0) 3216 ret <16 x i16> %res 3217 } 3218 define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 3219 ; CHECK-LABEL: test_mask_packs_epi32_rr_128: 3220 ; CHECK: # %bb.0: 3221 ; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1] 3222 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3223 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 3224 ret <8 x i16> %res 3225 } 3226 3227 define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { 3228 ; X86-LABEL: test_mask_packs_epi32_rrk_128: 3229 ; X86: # %bb.0: 3230 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3231 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3232 ; X86-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1] 3233 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3234 ; X86-NEXT: retl # encoding: [0xc3] 3235 ; 3236 ; X64-LABEL: test_mask_packs_epi32_rrk_128: 3237 ; X64: # %bb.0: 3238 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3239 ; X64-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1] 3240 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3241 ; X64-NEXT: retq # encoding: [0xc3] 3242 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 3243 ret <8 x i16> %res 3244 } 3245 3246 define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 3247 ; X86-LABEL: test_mask_packs_epi32_rrkz_128: 3248 ; X86: # %bb.0: 3249 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3250 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3251 ; X86-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1] 3252 ; X86-NEXT: retl # encoding: [0xc3] 3253 ; 3254 ; X64-LABEL: test_mask_packs_epi32_rrkz_128: 3255 ; X64: # %bb.0: 3256 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3257 ; X64-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1] 3258 ; X64-NEXT: retq # encoding: [0xc3] 3259 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 3260 ret <8 x i16> %res 3261 } 3262 3263 define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 3264 ; X86-LABEL: test_mask_packs_epi32_rm_128: 3265 ; X86: # %bb.0: 3266 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3267 ; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x00] 3268 ; X86-NEXT: retl # encoding: [0xc3] 3269 ; 3270 ; X64-LABEL: test_mask_packs_epi32_rm_128: 3271 ; X64: # %bb.0: 3272 ; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x07] 3273 ; X64-NEXT: retq # encoding: [0xc3] 3274 %b = load <4 x i32>, <4 x i32>* %ptr_b 3275 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 3276 ret <8 x i16> %res 3277 } 3278 3279 define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 3280 ; X86-LABEL: test_mask_packs_epi32_rmk_128: 3281 ; X86: # %bb.0: 3282 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3283 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3284 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 3285 ; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x08] 3286 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3287 ; X86-NEXT: retl # encoding: [0xc3] 3288 ; 3289 ; X64-LABEL: test_mask_packs_epi32_rmk_128: 3290 ; X64: # %bb.0: 3291 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3292 ; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f] 3293 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3294 ; X64-NEXT: retq # encoding: [0xc3] 3295 %b = load <4 x i32>, <4 x i32>* %ptr_b 3296 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 3297 ret <8 x i16> %res 3298 } 3299 3300 define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 3301 ; X86-LABEL: test_mask_packs_epi32_rmkz_128: 3302 ; X86: # %bb.0: 3303 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3304 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3305 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 3306 ; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x00] 3307 ; X86-NEXT: retl # encoding: [0xc3] 3308 ; 3309 ; X64-LABEL: test_mask_packs_epi32_rmkz_128: 3310 ; X64: # %bb.0: 3311 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3312 ; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07] 3313 ; X64-NEXT: retq # encoding: [0xc3] 3314 %b = load <4 x i32>, <4 x i32>* %ptr_b 3315 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 3316 ret <8 x i16> %res 3317 } 3318 3319 define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 3320 ; X86-LABEL: test_mask_packs_epi32_rmb_128: 3321 ; X86: # %bb.0: 3322 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3323 ; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x00] 3324 ; X86-NEXT: retl # encoding: [0xc3] 3325 ; 3326 ; X64-LABEL: test_mask_packs_epi32_rmb_128: 3327 ; X64: # %bb.0: 3328 ; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07] 3329 ; X64-NEXT: retq # encoding: [0xc3] 3330 %q = load i32, i32* %ptr_b 3331 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3332 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3333 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 3334 ret <8 x i16> %res 3335 } 3336 3337 define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) { 3338 ; X86-LABEL: test_mask_packs_epi32_rmbk_128: 3339 ; X86: # %bb.0: 3340 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3341 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3342 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 3343 ; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x08] 3344 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3345 ; X86-NEXT: retl # encoding: [0xc3] 3346 ; 3347 ; X64-LABEL: test_mask_packs_epi32_rmbk_128: 3348 ; X64: # %bb.0: 3349 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3350 ; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f] 3351 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3352 ; X64-NEXT: retq # encoding: [0xc3] 3353 %q = load i32, i32* %ptr_b 3354 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3355 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3356 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 3357 ret <8 x i16> %res 3358 } 3359 3360 define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 3361 ; X86-LABEL: test_mask_packs_epi32_rmbkz_128: 3362 ; X86: # %bb.0: 3363 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3364 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3365 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 3366 ; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x00] 3367 ; X86-NEXT: retl # encoding: [0xc3] 3368 ; 3369 ; X64-LABEL: test_mask_packs_epi32_rmbkz_128: 3370 ; X64: # %bb.0: 3371 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3372 ; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07] 3373 ; X64-NEXT: retq # encoding: [0xc3] 3374 %q = load i32, i32* %ptr_b 3375 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3376 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3377 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 3378 ret <8 x i16> %res 3379 } 3380 3381 declare <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8) 3382 3383 define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 3384 ; CHECK-LABEL: test_mask_packs_epi32_rr_256: 3385 ; CHECK: # %bb.0: 3386 ; CHECK-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1] 3387 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3388 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 3389 ret <16 x i16> %res 3390 } 3391 3392 define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { 3393 ; X86-LABEL: test_mask_packs_epi32_rrk_256: 3394 ; X86: # %bb.0: 3395 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3396 ; X86-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1] 3397 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3398 ; X86-NEXT: retl # encoding: [0xc3] 3399 ; 3400 ; X64-LABEL: test_mask_packs_epi32_rrk_256: 3401 ; X64: # %bb.0: 3402 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3403 ; X64-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1] 3404 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3405 ; X64-NEXT: retq # encoding: [0xc3] 3406 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 3407 ret <16 x i16> %res 3408 } 3409 3410 define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { 3411 ; X86-LABEL: test_mask_packs_epi32_rrkz_256: 3412 ; X86: # %bb.0: 3413 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3414 ; X86-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1] 3415 ; X86-NEXT: retl # encoding: [0xc3] 3416 ; 3417 ; X64-LABEL: test_mask_packs_epi32_rrkz_256: 3418 ; X64: # %bb.0: 3419 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3420 ; X64-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1] 3421 ; X64-NEXT: retq # encoding: [0xc3] 3422 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 3423 ret <16 x i16> %res 3424 } 3425 3426 define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 3427 ; X86-LABEL: test_mask_packs_epi32_rm_256: 3428 ; X86: # %bb.0: 3429 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3430 ; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x00] 3431 ; X86-NEXT: retl # encoding: [0xc3] 3432 ; 3433 ; X64-LABEL: test_mask_packs_epi32_rm_256: 3434 ; X64: # %bb.0: 3435 ; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x07] 3436 ; X64-NEXT: retq # encoding: [0xc3] 3437 %b = load <8 x i32>, <8 x i32>* %ptr_b 3438 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 3439 ret <16 x i16> %res 3440 } 3441 3442 define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 3443 ; X86-LABEL: test_mask_packs_epi32_rmk_256: 3444 ; X86: # %bb.0: 3445 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3446 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3447 ; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x08] 3448 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3449 ; X86-NEXT: retl # encoding: [0xc3] 3450 ; 3451 ; X64-LABEL: test_mask_packs_epi32_rmk_256: 3452 ; X64: # %bb.0: 3453 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3454 ; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f] 3455 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3456 ; X64-NEXT: retq # encoding: [0xc3] 3457 %b = load <8 x i32>, <8 x i32>* %ptr_b 3458 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 3459 ret <16 x i16> %res 3460 } 3461 3462 define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) { 3463 ; X86-LABEL: test_mask_packs_epi32_rmkz_256: 3464 ; X86: # %bb.0: 3465 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3466 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3467 ; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x00] 3468 ; X86-NEXT: retl # encoding: [0xc3] 3469 ; 3470 ; X64-LABEL: test_mask_packs_epi32_rmkz_256: 3471 ; X64: # %bb.0: 3472 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3473 ; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07] 3474 ; X64-NEXT: retq # encoding: [0xc3] 3475 %b = load <8 x i32>, <8 x i32>* %ptr_b 3476 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 3477 ret <16 x i16> %res 3478 } 3479 3480 define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 3481 ; X86-LABEL: test_mask_packs_epi32_rmb_256: 3482 ; X86: # %bb.0: 3483 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3484 ; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x00] 3485 ; X86-NEXT: retl # encoding: [0xc3] 3486 ; 3487 ; X64-LABEL: test_mask_packs_epi32_rmb_256: 3488 ; X64: # %bb.0: 3489 ; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07] 3490 ; X64-NEXT: retq # encoding: [0xc3] 3491 %q = load i32, i32* %ptr_b 3492 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3493 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3494 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 3495 ret <16 x i16> %res 3496 } 3497 3498 define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) { 3499 ; X86-LABEL: test_mask_packs_epi32_rmbk_256: 3500 ; X86: # %bb.0: 3501 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3502 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3503 ; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x08] 3504 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3505 ; X86-NEXT: retl # encoding: [0xc3] 3506 ; 3507 ; X64-LABEL: test_mask_packs_epi32_rmbk_256: 3508 ; X64: # %bb.0: 3509 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3510 ; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f] 3511 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3512 ; X64-NEXT: retq # encoding: [0xc3] 3513 %q = load i32, i32* %ptr_b 3514 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3515 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3516 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 3517 ret <16 x i16> %res 3518 } 3519 3520 define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) { 3521 ; X86-LABEL: test_mask_packs_epi32_rmbkz_256: 3522 ; X86: # %bb.0: 3523 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3524 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3525 ; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x00] 3526 ; X86-NEXT: retl # encoding: [0xc3] 3527 ; 3528 ; X64-LABEL: test_mask_packs_epi32_rmbkz_256: 3529 ; X64: # %bb.0: 3530 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3531 ; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07] 3532 ; X64-NEXT: retq # encoding: [0xc3] 3533 %q = load i32, i32* %ptr_b 3534 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3535 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3536 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 3537 ret <16 x i16> %res 3538 } 3539 3540 declare <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16) 3541 3542 define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 3543 ; CHECK-LABEL: test_mask_packs_epi16_rr_128: 3544 ; CHECK: # %bb.0: 3545 ; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1] 3546 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3547 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) 3548 ret <16 x i8> %res 3549 } 3550 3551 define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { 3552 ; X86-LABEL: test_mask_packs_epi16_rrk_128: 3553 ; X86: # %bb.0: 3554 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3555 ; X86-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1] 3556 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3557 ; X86-NEXT: retl # encoding: [0xc3] 3558 ; 3559 ; X64-LABEL: test_mask_packs_epi16_rrk_128: 3560 ; X64: # %bb.0: 3561 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3562 ; X64-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1] 3563 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3564 ; X64-NEXT: retq # encoding: [0xc3] 3565 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) 3566 ret <16 x i8> %res 3567 } 3568 3569 define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { 3570 ; X86-LABEL: test_mask_packs_epi16_rrkz_128: 3571 ; X86: # %bb.0: 3572 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3573 ; X86-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1] 3574 ; X86-NEXT: retl # encoding: [0xc3] 3575 ; 3576 ; X64-LABEL: test_mask_packs_epi16_rrkz_128: 3577 ; X64: # %bb.0: 3578 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3579 ; X64-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1] 3580 ; X64-NEXT: retq # encoding: [0xc3] 3581 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) 3582 ret <16 x i8> %res 3583 } 3584 3585 define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 3586 ; X86-LABEL: test_mask_packs_epi16_rm_128: 3587 ; X86: # %bb.0: 3588 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3589 ; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x00] 3590 ; X86-NEXT: retl # encoding: [0xc3] 3591 ; 3592 ; X64-LABEL: test_mask_packs_epi16_rm_128: 3593 ; X64: # %bb.0: 3594 ; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x07] 3595 ; X64-NEXT: retq # encoding: [0xc3] 3596 %b = load <8 x i16>, <8 x i16>* %ptr_b 3597 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) 3598 ret <16 x i8> %res 3599 } 3600 3601 define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 3602 ; X86-LABEL: test_mask_packs_epi16_rmk_128: 3603 ; X86: # %bb.0: 3604 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3605 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3606 ; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x08] 3607 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3608 ; X86-NEXT: retl # encoding: [0xc3] 3609 ; 3610 ; X64-LABEL: test_mask_packs_epi16_rmk_128: 3611 ; X64: # %bb.0: 3612 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3613 ; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x0f] 3614 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3615 ; X64-NEXT: retq # encoding: [0xc3] 3616 %b = load <8 x i16>, <8 x i16>* %ptr_b 3617 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) 3618 ret <16 x i8> %res 3619 } 3620 3621 define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) { 3622 ; X86-LABEL: test_mask_packs_epi16_rmkz_128: 3623 ; X86: # %bb.0: 3624 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3625 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3626 ; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x00] 3627 ; X86-NEXT: retl # encoding: [0xc3] 3628 ; 3629 ; X64-LABEL: test_mask_packs_epi16_rmkz_128: 3630 ; X64: # %bb.0: 3631 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3632 ; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x07] 3633 ; X64-NEXT: retq # encoding: [0xc3] 3634 %b = load <8 x i16>, <8 x i16>* %ptr_b 3635 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) 3636 ret <16 x i8> %res 3637 } 3638 3639 declare <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16) 3640 3641 define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 3642 ; CHECK-LABEL: test_mask_packs_epi16_rr_256: 3643 ; CHECK: # %bb.0: 3644 ; CHECK-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1] 3645 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3646 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) 3647 ret <32 x i8> %res 3648 } 3649 3650 define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) { 3651 ; X86-LABEL: test_mask_packs_epi16_rrk_256: 3652 ; X86: # %bb.0: 3653 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3654 ; X86-NEXT: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1] 3655 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3656 ; X86-NEXT: retl # encoding: [0xc3] 3657 ; 3658 ; X64-LABEL: test_mask_packs_epi16_rrk_256: 3659 ; X64: # %bb.0: 3660 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3661 ; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1] 3662 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3663 ; X64-NEXT: retq # encoding: [0xc3] 3664 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) 3665 ret <32 x i8> %res 3666 } 3667 3668 define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) { 3669 ; X86-LABEL: test_mask_packs_epi16_rrkz_256: 3670 ; X86: # %bb.0: 3671 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3672 ; X86-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1] 3673 ; X86-NEXT: retl # encoding: [0xc3] 3674 ; 3675 ; X64-LABEL: test_mask_packs_epi16_rrkz_256: 3676 ; X64: # %bb.0: 3677 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3678 ; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1] 3679 ; X64-NEXT: retq # encoding: [0xc3] 3680 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) 3681 ret <32 x i8> %res 3682 } 3683 3684 define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 3685 ; X86-LABEL: test_mask_packs_epi16_rm_256: 3686 ; X86: # %bb.0: 3687 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3688 ; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x00] 3689 ; X86-NEXT: retl # encoding: [0xc3] 3690 ; 3691 ; X64-LABEL: test_mask_packs_epi16_rm_256: 3692 ; X64: # %bb.0: 3693 ; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x07] 3694 ; X64-NEXT: retq # encoding: [0xc3] 3695 %b = load <16 x i16>, <16 x i16>* %ptr_b 3696 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) 3697 ret <32 x i8> %res 3698 } 3699 3700 define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 3701 ; X86-LABEL: test_mask_packs_epi16_rmk_256: 3702 ; X86: # %bb.0: 3703 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3704 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 3705 ; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x08] 3706 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3707 ; X86-NEXT: retl # encoding: [0xc3] 3708 ; 3709 ; X64-LABEL: test_mask_packs_epi16_rmk_256: 3710 ; X64: # %bb.0: 3711 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3712 ; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x0f] 3713 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3714 ; X64-NEXT: retq # encoding: [0xc3] 3715 %b = load <16 x i16>, <16 x i16>* %ptr_b 3716 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) 3717 ret <32 x i8> %res 3718 } 3719 3720 define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) { 3721 ; X86-LABEL: test_mask_packs_epi16_rmkz_256: 3722 ; X86: # %bb.0: 3723 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3724 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 3725 ; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x00] 3726 ; X86-NEXT: retl # encoding: [0xc3] 3727 ; 3728 ; X64-LABEL: test_mask_packs_epi16_rmkz_256: 3729 ; X64: # %bb.0: 3730 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3731 ; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x07] 3732 ; X64-NEXT: retq # encoding: [0xc3] 3733 %b = load <16 x i16>, <16 x i16>* %ptr_b 3734 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) 3735 ret <32 x i8> %res 3736 } 3737 3738 declare <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32) 3739 3740 3741 define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 3742 ; CHECK-LABEL: test_mask_packus_epi32_rr_128: 3743 ; CHECK: # %bb.0: 3744 ; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0xc1] 3745 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3746 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 3747 ret <8 x i16> %res 3748 } 3749 3750 define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { 3751 ; X86-LABEL: test_mask_packus_epi32_rrk_128: 3752 ; X86: # %bb.0: 3753 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3754 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3755 ; X86-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1] 3756 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3757 ; X86-NEXT: retl # encoding: [0xc3] 3758 ; 3759 ; X64-LABEL: test_mask_packus_epi32_rrk_128: 3760 ; X64: # %bb.0: 3761 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3762 ; X64-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1] 3763 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3764 ; X64-NEXT: retq # encoding: [0xc3] 3765 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 3766 ret <8 x i16> %res 3767 } 3768 3769 define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 3770 ; X86-LABEL: test_mask_packus_epi32_rrkz_128: 3771 ; X86: # %bb.0: 3772 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3773 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3774 ; X86-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1] 3775 ; X86-NEXT: retl # encoding: [0xc3] 3776 ; 3777 ; X64-LABEL: test_mask_packus_epi32_rrkz_128: 3778 ; X64: # %bb.0: 3779 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3780 ; X64-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1] 3781 ; X64-NEXT: retq # encoding: [0xc3] 3782 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 3783 ret <8 x i16> %res 3784 } 3785 3786 define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 3787 ; X86-LABEL: test_mask_packus_epi32_rm_128: 3788 ; X86: # %bb.0: 3789 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3790 ; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x00] 3791 ; X86-NEXT: retl # encoding: [0xc3] 3792 ; 3793 ; X64-LABEL: test_mask_packus_epi32_rm_128: 3794 ; X64: # %bb.0: 3795 ; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x07] 3796 ; X64-NEXT: retq # encoding: [0xc3] 3797 %b = load <4 x i32>, <4 x i32>* %ptr_b 3798 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 3799 ret <8 x i16> %res 3800 } 3801 3802 define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 3803 ; X86-LABEL: test_mask_packus_epi32_rmk_128: 3804 ; X86: # %bb.0: 3805 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3806 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3807 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 3808 ; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x08] 3809 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3810 ; X86-NEXT: retl # encoding: [0xc3] 3811 ; 3812 ; X64-LABEL: test_mask_packus_epi32_rmk_128: 3813 ; X64: # %bb.0: 3814 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3815 ; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x0f] 3816 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3817 ; X64-NEXT: retq # encoding: [0xc3] 3818 %b = load <4 x i32>, <4 x i32>* %ptr_b 3819 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 3820 ret <8 x i16> %res 3821 } 3822 3823 define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 3824 ; X86-LABEL: test_mask_packus_epi32_rmkz_128: 3825 ; X86: # %bb.0: 3826 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3827 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3828 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 3829 ; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x00] 3830 ; X86-NEXT: retl # encoding: [0xc3] 3831 ; 3832 ; X64-LABEL: test_mask_packus_epi32_rmkz_128: 3833 ; X64: # %bb.0: 3834 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3835 ; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x07] 3836 ; X64-NEXT: retq # encoding: [0xc3] 3837 %b = load <4 x i32>, <4 x i32>* %ptr_b 3838 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 3839 ret <8 x i16> %res 3840 } 3841 3842 define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 3843 ; X86-LABEL: test_mask_packus_epi32_rmb_128: 3844 ; X86: # %bb.0: 3845 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3846 ; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x00] 3847 ; X86-NEXT: retl # encoding: [0xc3] 3848 ; 3849 ; X64-LABEL: test_mask_packus_epi32_rmb_128: 3850 ; X64: # %bb.0: 3851 ; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x07] 3852 ; X64-NEXT: retq # encoding: [0xc3] 3853 %q = load i32, i32* %ptr_b 3854 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3855 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3856 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 3857 ret <8 x i16> %res 3858 } 3859 3860 define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) { 3861 ; X86-LABEL: test_mask_packus_epi32_rmbk_128: 3862 ; X86: # %bb.0: 3863 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3864 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3865 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 3866 ; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x08] 3867 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3868 ; X86-NEXT: retl # encoding: [0xc3] 3869 ; 3870 ; X64-LABEL: test_mask_packus_epi32_rmbk_128: 3871 ; X64: # %bb.0: 3872 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3873 ; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x0f] 3874 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3875 ; X64-NEXT: retq # encoding: [0xc3] 3876 %q = load i32, i32* %ptr_b 3877 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3878 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3879 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 3880 ret <8 x i16> %res 3881 } 3882 3883 define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 3884 ; X86-LABEL: test_mask_packus_epi32_rmbkz_128: 3885 ; X86: # %bb.0: 3886 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3887 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3888 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 3889 ; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x00] 3890 ; X86-NEXT: retl # encoding: [0xc3] 3891 ; 3892 ; X64-LABEL: test_mask_packus_epi32_rmbkz_128: 3893 ; X64: # %bb.0: 3894 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3895 ; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x07] 3896 ; X64-NEXT: retq # encoding: [0xc3] 3897 %q = load i32, i32* %ptr_b 3898 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3899 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3900 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 3901 ret <8 x i16> %res 3902 } 3903 3904 declare <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8) 3905 3906 define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 3907 ; CHECK-LABEL: test_mask_packus_epi32_rr_256: 3908 ; CHECK: # %bb.0: 3909 ; CHECK-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1] 3910 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3911 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 3912 ret <16 x i16> %res 3913 } 3914 3915 define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { 3916 ; X86-LABEL: test_mask_packus_epi32_rrk_256: 3917 ; X86: # %bb.0: 3918 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3919 ; X86-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1] 3920 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3921 ; X86-NEXT: retl # encoding: [0xc3] 3922 ; 3923 ; X64-LABEL: test_mask_packus_epi32_rrk_256: 3924 ; X64: # %bb.0: 3925 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3926 ; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1] 3927 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3928 ; X64-NEXT: retq # encoding: [0xc3] 3929 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 3930 ret <16 x i16> %res 3931 } 3932 3933 define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { 3934 ; X86-LABEL: test_mask_packus_epi32_rrkz_256: 3935 ; X86: # %bb.0: 3936 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3937 ; X86-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1] 3938 ; X86-NEXT: retl # encoding: [0xc3] 3939 ; 3940 ; X64-LABEL: test_mask_packus_epi32_rrkz_256: 3941 ; X64: # %bb.0: 3942 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3943 ; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1] 3944 ; X64-NEXT: retq # encoding: [0xc3] 3945 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 3946 ret <16 x i16> %res 3947 } 3948 3949 define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 3950 ; X86-LABEL: test_mask_packus_epi32_rm_256: 3951 ; X86: # %bb.0: 3952 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3953 ; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x00] 3954 ; X86-NEXT: retl # encoding: [0xc3] 3955 ; 3956 ; X64-LABEL: test_mask_packus_epi32_rm_256: 3957 ; X64: # %bb.0: 3958 ; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x07] 3959 ; X64-NEXT: retq # encoding: [0xc3] 3960 %b = load <8 x i32>, <8 x i32>* %ptr_b 3961 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 3962 ret <16 x i16> %res 3963 } 3964 3965 define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 3966 ; X86-LABEL: test_mask_packus_epi32_rmk_256: 3967 ; X86: # %bb.0: 3968 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3969 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3970 ; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x08] 3971 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3972 ; X86-NEXT: retl # encoding: [0xc3] 3973 ; 3974 ; X64-LABEL: test_mask_packus_epi32_rmk_256: 3975 ; X64: # %bb.0: 3976 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3977 ; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x0f] 3978 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3979 ; X64-NEXT: retq # encoding: [0xc3] 3980 %b = load <8 x i32>, <8 x i32>* %ptr_b 3981 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 3982 ret <16 x i16> %res 3983 } 3984 3985 define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) { 3986 ; X86-LABEL: test_mask_packus_epi32_rmkz_256: 3987 ; X86: # %bb.0: 3988 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3989 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3990 ; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x00] 3991 ; X86-NEXT: retl # encoding: [0xc3] 3992 ; 3993 ; X64-LABEL: test_mask_packus_epi32_rmkz_256: 3994 ; X64: # %bb.0: 3995 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3996 ; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x07] 3997 ; X64-NEXT: retq # encoding: [0xc3] 3998 %b = load <8 x i32>, <8 x i32>* %ptr_b 3999 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 4000 ret <16 x i16> %res 4001 } 4002 4003 define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 4004 ; X86-LABEL: test_mask_packus_epi32_rmb_256: 4005 ; X86: # %bb.0: 4006 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4007 ; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x00] 4008 ; X86-NEXT: retl # encoding: [0xc3] 4009 ; 4010 ; X64-LABEL: test_mask_packus_epi32_rmb_256: 4011 ; X64: # %bb.0: 4012 ; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x07] 4013 ; X64-NEXT: retq # encoding: [0xc3] 4014 %q = load i32, i32* %ptr_b 4015 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4016 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4017 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 4018 ret <16 x i16> %res 4019 } 4020 4021 define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) { 4022 ; X86-LABEL: test_mask_packus_epi32_rmbk_256: 4023 ; X86: # %bb.0: 4024 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4025 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4026 ; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x08] 4027 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4028 ; X86-NEXT: retl # encoding: [0xc3] 4029 ; 4030 ; X64-LABEL: test_mask_packus_epi32_rmbk_256: 4031 ; X64: # %bb.0: 4032 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4033 ; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x0f] 4034 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4035 ; X64-NEXT: retq # encoding: [0xc3] 4036 %q = load i32, i32* %ptr_b 4037 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4038 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4039 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 4040 ret <16 x i16> %res 4041 } 4042 4043 define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) { 4044 ; X86-LABEL: test_mask_packus_epi32_rmbkz_256: 4045 ; X86: # %bb.0: 4046 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4047 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4048 ; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x00] 4049 ; X86-NEXT: retl # encoding: [0xc3] 4050 ; 4051 ; X64-LABEL: test_mask_packus_epi32_rmbkz_256: 4052 ; X64: # %bb.0: 4053 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4054 ; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x07] 4055 ; X64-NEXT: retq # encoding: [0xc3] 4056 %q = load i32, i32* %ptr_b 4057 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4058 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4059 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 4060 ret <16 x i16> %res 4061 } 4062 4063 declare <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16) 4064 4065 define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 4066 ; CHECK-LABEL: test_mask_packus_epi16_rr_128: 4067 ; CHECK: # %bb.0: 4068 ; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1] 4069 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4070 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) 4071 ret <16 x i8> %res 4072 } 4073 4074 define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { 4075 ; X86-LABEL: test_mask_packus_epi16_rrk_128: 4076 ; X86: # %bb.0: 4077 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4078 ; X86-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1] 4079 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4080 ; X86-NEXT: retl # encoding: [0xc3] 4081 ; 4082 ; X64-LABEL: test_mask_packus_epi16_rrk_128: 4083 ; X64: # %bb.0: 4084 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4085 ; X64-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1] 4086 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4087 ; X64-NEXT: retq # encoding: [0xc3] 4088 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) 4089 ret <16 x i8> %res 4090 } 4091 4092 define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { 4093 ; X86-LABEL: test_mask_packus_epi16_rrkz_128: 4094 ; X86: # %bb.0: 4095 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4096 ; X86-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1] 4097 ; X86-NEXT: retl # encoding: [0xc3] 4098 ; 4099 ; X64-LABEL: test_mask_packus_epi16_rrkz_128: 4100 ; X64: # %bb.0: 4101 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4102 ; X64-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1] 4103 ; X64-NEXT: retq # encoding: [0xc3] 4104 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) 4105 ret <16 x i8> %res 4106 } 4107 4108 define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 4109 ; X86-LABEL: test_mask_packus_epi16_rm_128: 4110 ; X86: # %bb.0: 4111 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4112 ; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x00] 4113 ; X86-NEXT: retl # encoding: [0xc3] 4114 ; 4115 ; X64-LABEL: test_mask_packus_epi16_rm_128: 4116 ; X64: # %bb.0: 4117 ; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x07] 4118 ; X64-NEXT: retq # encoding: [0xc3] 4119 %b = load <8 x i16>, <8 x i16>* %ptr_b 4120 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) 4121 ret <16 x i8> %res 4122 } 4123 4124 define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 4125 ; X86-LABEL: test_mask_packus_epi16_rmk_128: 4126 ; X86: # %bb.0: 4127 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4128 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4129 ; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x08] 4130 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4131 ; X86-NEXT: retl # encoding: [0xc3] 4132 ; 4133 ; X64-LABEL: test_mask_packus_epi16_rmk_128: 4134 ; X64: # %bb.0: 4135 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4136 ; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x0f] 4137 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4138 ; X64-NEXT: retq # encoding: [0xc3] 4139 %b = load <8 x i16>, <8 x i16>* %ptr_b 4140 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) 4141 ret <16 x i8> %res 4142 } 4143 4144 define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) { 4145 ; X86-LABEL: test_mask_packus_epi16_rmkz_128: 4146 ; X86: # %bb.0: 4147 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4148 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4149 ; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x00] 4150 ; X86-NEXT: retl # encoding: [0xc3] 4151 ; 4152 ; X64-LABEL: test_mask_packus_epi16_rmkz_128: 4153 ; X64: # %bb.0: 4154 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4155 ; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x07] 4156 ; X64-NEXT: retq # encoding: [0xc3] 4157 %b = load <8 x i16>, <8 x i16>* %ptr_b 4158 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) 4159 ret <16 x i8> %res 4160 } 4161 4162 declare <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16) 4163 4164 define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 4165 ; CHECK-LABEL: test_mask_packus_epi16_rr_256: 4166 ; CHECK: # %bb.0: 4167 ; CHECK-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1] 4168 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4169 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) 4170 ret <32 x i8> %res 4171 } 4172 4173 define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) { 4174 ; X86-LABEL: test_mask_packus_epi16_rrk_256: 4175 ; X86: # %bb.0: 4176 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 4177 ; X86-NEXT: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1] 4178 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4179 ; X86-NEXT: retl # encoding: [0xc3] 4180 ; 4181 ; X64-LABEL: test_mask_packus_epi16_rrk_256: 4182 ; X64: # %bb.0: 4183 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4184 ; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1] 4185 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4186 ; X64-NEXT: retq # encoding: [0xc3] 4187 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) 4188 ret <32 x i8> %res 4189 } 4190 4191 define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) { 4192 ; X86-LABEL: test_mask_packus_epi16_rrkz_256: 4193 ; X86: # %bb.0: 4194 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 4195 ; X86-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1] 4196 ; X86-NEXT: retl # encoding: [0xc3] 4197 ; 4198 ; X64-LABEL: test_mask_packus_epi16_rrkz_256: 4199 ; X64: # %bb.0: 4200 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4201 ; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1] 4202 ; X64-NEXT: retq # encoding: [0xc3] 4203 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) 4204 ret <32 x i8> %res 4205 } 4206 4207 define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 4208 ; X86-LABEL: test_mask_packus_epi16_rm_256: 4209 ; X86: # %bb.0: 4210 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4211 ; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x00] 4212 ; X86-NEXT: retl # encoding: [0xc3] 4213 ; 4214 ; X64-LABEL: test_mask_packus_epi16_rm_256: 4215 ; X64: # %bb.0: 4216 ; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x07] 4217 ; X64-NEXT: retq # encoding: [0xc3] 4218 %b = load <16 x i16>, <16 x i16>* %ptr_b 4219 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) 4220 ret <32 x i8> %res 4221 } 4222 4223 define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 4224 ; X86-LABEL: test_mask_packus_epi16_rmk_256: 4225 ; X86: # %bb.0: 4226 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4227 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 4228 ; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x08] 4229 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4230 ; X86-NEXT: retl # encoding: [0xc3] 4231 ; 4232 ; X64-LABEL: test_mask_packus_epi16_rmk_256: 4233 ; X64: # %bb.0: 4234 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4235 ; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x0f] 4236 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4237 ; X64-NEXT: retq # encoding: [0xc3] 4238 %b = load <16 x i16>, <16 x i16>* %ptr_b 4239 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) 4240 ret <32 x i8> %res 4241 } 4242 4243 define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) { 4244 ; X86-LABEL: test_mask_packus_epi16_rmkz_256: 4245 ; X86: # %bb.0: 4246 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4247 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 4248 ; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x00] 4249 ; X86-NEXT: retl # encoding: [0xc3] 4250 ; 4251 ; X64-LABEL: test_mask_packus_epi16_rmkz_256: 4252 ; X64: # %bb.0: 4253 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4254 ; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x07] 4255 ; X64-NEXT: retq # encoding: [0xc3] 4256 %b = load <16 x i16>, <16 x i16>* %ptr_b 4257 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) 4258 ret <32 x i8> %res 4259 } 4260 4261 declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32) 4262 4263 define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) { 4264 ; X86-LABEL: test_cmp_b_256: 4265 ; X86: # %bb.0: 4266 ; X86-NEXT: pushl %ebx # encoding: [0x53] 4267 ; X86-NEXT: .cfi_def_cfa_offset 8 4268 ; X86-NEXT: pushl %edi # encoding: [0x57] 4269 ; X86-NEXT: .cfi_def_cfa_offset 12 4270 ; X86-NEXT: pushl %esi # encoding: [0x56] 4271 ; X86-NEXT: .cfi_def_cfa_offset 16 4272 ; X86-NEXT: .cfi_offset %esi, -16 4273 ; X86-NEXT: .cfi_offset %edi, -12 4274 ; X86-NEXT: .cfi_offset %ebx, -8 4275 ; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 4276 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 4277 ; X86-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0] 4278 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 4279 ; X86-NEXT: vpcmpleb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02] 4280 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 4281 ; X86-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] 4282 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 4283 ; X86-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x05] 4284 ; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 4285 ; X86-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] 4286 ; X86-NEXT: kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8] 4287 ; X86-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 4288 ; X86-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01] 4289 ; X86-NEXT: vpinsrd $2, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x02] 4290 ; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 4291 ; X86-NEXT: vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08] 4292 ; X86-NEXT: # xmm0 = xmm0[0,1,2],xmm1[3] 4293 ; X86-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9] 4294 ; X86-NEXT: vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0] 4295 ; X86-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 4296 ; X86-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 4297 ; X86-NEXT: vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2] 4298 ; X86-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 4299 ; X86-NEXT: # xmm1 = xmm1[0],xmm2[0] 4300 ; X86-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 4301 ; X86-NEXT: popl %esi # encoding: [0x5e] 4302 ; X86-NEXT: .cfi_def_cfa_offset 12 4303 ; X86-NEXT: popl %edi # encoding: [0x5f] 4304 ; X86-NEXT: .cfi_def_cfa_offset 8 4305 ; X86-NEXT: popl %ebx # encoding: [0x5b] 4306 ; X86-NEXT: .cfi_def_cfa_offset 4 4307 ; X86-NEXT: retl # encoding: [0xc3] 4308 ; 4309 ; X64-LABEL: test_cmp_b_256: 4310 ; X64: # %bb.0: 4311 ; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 4312 ; X64-NEXT: kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0] 4313 ; X64-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0] 4314 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 4315 ; X64-NEXT: vpcmpleb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02] 4316 ; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 4317 ; X64-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] 4318 ; X64-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 4319 ; X64-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x05] 4320 ; X64-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 4321 ; X64-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] 4322 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 4323 ; X64-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 4324 ; X64-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01] 4325 ; X64-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x02] 4326 ; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 4327 ; X64-NEXT: vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08] 4328 ; X64-NEXT: # xmm0 = xmm0[0,1,2],xmm1[3] 4329 ; X64-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9] 4330 ; X64-NEXT: vmovd %r8d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0] 4331 ; X64-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 4332 ; X64-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 4333 ; X64-NEXT: vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2] 4334 ; X64-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 4335 ; X64-NEXT: # xmm1 = xmm1[0],xmm2[0] 4336 ; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 4337 ; X64-NEXT: retq # encoding: [0xc3] 4338 %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1) 4339 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 4340 %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1) 4341 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 4342 %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1) 4343 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 4344 %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1) 4345 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 4346 %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1) 4347 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 4348 %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1) 4349 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 4350 %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1) 4351 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 4352 %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1) 4353 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 4354 ret <8 x i32> %vec7 4355 } 4356 4357 define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) { 4358 ; X86-LABEL: test_mask_cmp_b_256: 4359 ; X86: # %bb.0: 4360 ; X86-NEXT: pushl %ebp # encoding: [0x55] 4361 ; X86-NEXT: .cfi_def_cfa_offset 8 4362 ; X86-NEXT: pushl %ebx # encoding: [0x53] 4363 ; X86-NEXT: .cfi_def_cfa_offset 12 4364 ; X86-NEXT: pushl %edi # encoding: [0x57] 4365 ; X86-NEXT: .cfi_def_cfa_offset 16 4366 ; X86-NEXT: pushl %esi # encoding: [0x56] 4367 ; X86-NEXT: .cfi_def_cfa_offset 20 4368 ; X86-NEXT: .cfi_offset %esi, -20 4369 ; X86-NEXT: .cfi_offset %edi, -16 4370 ; X86-NEXT: .cfi_offset %ebx, -12 4371 ; X86-NEXT: .cfi_offset %ebp, -8 4372 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 4373 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 4374 ; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1] 4375 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 4376 ; X86-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0] 4377 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 4378 ; X86-NEXT: vpcmpleb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02] 4379 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 4380 ; X86-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] 4381 ; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 4382 ; X86-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x05] 4383 ; X86-NEXT: kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8] 4384 ; X86-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1] 4385 ; X86-NEXT: kmovd %k0, %ebp # encoding: [0xc5,0xfb,0x93,0xe8] 4386 ; X86-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] 4387 ; X86-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x01] 4388 ; X86-NEXT: vpinsrd $2, %ebp, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc5,0x02] 4389 ; X86-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03] 4390 ; X86-NEXT: vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca] 4391 ; X86-NEXT: vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1] 4392 ; X86-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 4393 ; X86-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 4394 ; X86-NEXT: vmovd %esi, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd6] 4395 ; X86-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 4396 ; X86-NEXT: # xmm1 = xmm1[0],xmm2[0] 4397 ; X86-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 4398 ; X86-NEXT: popl %esi # encoding: [0x5e] 4399 ; X86-NEXT: .cfi_def_cfa_offset 16 4400 ; X86-NEXT: popl %edi # encoding: [0x5f] 4401 ; X86-NEXT: .cfi_def_cfa_offset 12 4402 ; X86-NEXT: popl %ebx # encoding: [0x5b] 4403 ; X86-NEXT: .cfi_def_cfa_offset 8 4404 ; X86-NEXT: popl %ebp # encoding: [0x5d] 4405 ; X86-NEXT: .cfi_def_cfa_offset 4 4406 ; X86-NEXT: retl # encoding: [0xc3] 4407 ; 4408 ; X64-LABEL: test_mask_cmp_b_256: 4409 ; X64: # %bb.0: 4410 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4411 ; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1] 4412 ; X64-NEXT: kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0] 4413 ; X64-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0] 4414 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 4415 ; X64-NEXT: vpcmpleb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02] 4416 ; X64-NEXT: kmovd %k0, %r9d # encoding: [0xc5,0x7b,0x93,0xc8] 4417 ; X64-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] 4418 ; X64-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 4419 ; X64-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x05] 4420 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 4421 ; X64-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1] 4422 ; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 4423 ; X64-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 4424 ; X64-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] 4425 ; X64-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] 4426 ; X64-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] 4427 ; X64-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9] 4428 ; X64-NEXT: vmovd %r8d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0] 4429 ; X64-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 4430 ; X64-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 4431 ; X64-NEXT: vmovd %r9d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd1] 4432 ; X64-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 4433 ; X64-NEXT: # xmm1 = xmm1[0],xmm2[0] 4434 ; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 4435 ; X64-NEXT: retq # encoding: [0xc3] 4436 %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) 4437 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 4438 %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask) 4439 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 4440 %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask) 4441 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 4442 %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask) 4443 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 4444 %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask) 4445 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 4446 %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask) 4447 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 4448 %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask) 4449 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 4450 %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask) 4451 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 4452 ret <8 x i32> %vec7 4453 } 4454 4455 declare i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone 4456 4457 define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) { 4458 ; X86-LABEL: test_ucmp_b_256: 4459 ; X86: # %bb.0: 4460 ; X86-NEXT: pushl %ebx # encoding: [0x53] 4461 ; X86-NEXT: .cfi_def_cfa_offset 8 4462 ; X86-NEXT: pushl %edi # encoding: [0x57] 4463 ; X86-NEXT: .cfi_def_cfa_offset 12 4464 ; X86-NEXT: pushl %esi # encoding: [0x56] 4465 ; X86-NEXT: .cfi_def_cfa_offset 16 4466 ; X86-NEXT: .cfi_offset %esi, -16 4467 ; X86-NEXT: .cfi_offset %edi, -12 4468 ; X86-NEXT: .cfi_offset %ebx, -8 4469 ; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 4470 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 4471 ; X86-NEXT: vpcmpltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01] 4472 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 4473 ; X86-NEXT: vpcmpleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02] 4474 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 4475 ; X86-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] 4476 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 4477 ; X86-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05] 4478 ; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 4479 ; X86-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06] 4480 ; X86-NEXT: kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8] 4481 ; X86-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 4482 ; X86-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01] 4483 ; X86-NEXT: vpinsrd $2, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x02] 4484 ; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 4485 ; X86-NEXT: vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08] 4486 ; X86-NEXT: # xmm0 = xmm0[0,1,2],xmm1[3] 4487 ; X86-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9] 4488 ; X86-NEXT: vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0] 4489 ; X86-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 4490 ; X86-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 4491 ; X86-NEXT: vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2] 4492 ; X86-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 4493 ; X86-NEXT: # xmm1 = xmm1[0],xmm2[0] 4494 ; X86-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 4495 ; X86-NEXT: popl %esi # encoding: [0x5e] 4496 ; X86-NEXT: .cfi_def_cfa_offset 12 4497 ; X86-NEXT: popl %edi # encoding: [0x5f] 4498 ; X86-NEXT: .cfi_def_cfa_offset 8 4499 ; X86-NEXT: popl %ebx # encoding: [0x5b] 4500 ; X86-NEXT: .cfi_def_cfa_offset 4 4501 ; X86-NEXT: retl # encoding: [0xc3] 4502 ; 4503 ; X64-LABEL: test_ucmp_b_256: 4504 ; X64: # %bb.0: 4505 ; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 4506 ; X64-NEXT: kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0] 4507 ; X64-NEXT: vpcmpltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01] 4508 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 4509 ; X64-NEXT: vpcmpleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02] 4510 ; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 4511 ; X64-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] 4512 ; X64-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 4513 ; X64-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05] 4514 ; X64-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 4515 ; X64-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06] 4516 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 4517 ; X64-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 4518 ; X64-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01] 4519 ; X64-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x02] 4520 ; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 4521 ; X64-NEXT: vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08] 4522 ; X64-NEXT: # xmm0 = xmm0[0,1,2],xmm1[3] 4523 ; X64-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9] 4524 ; X64-NEXT: vmovd %r8d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0] 4525 ; X64-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 4526 ; X64-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 4527 ; X64-NEXT: vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2] 4528 ; X64-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 4529 ; X64-NEXT: # xmm1 = xmm1[0],xmm2[0] 4530 ; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 4531 ; X64-NEXT: retq # encoding: [0xc3] 4532 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1) 4533 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 4534 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1) 4535 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 4536 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1) 4537 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 4538 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1) 4539 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 4540 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1) 4541 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 4542 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1) 4543 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 4544 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1) 4545 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 4546 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1) 4547 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 4548 ret <8 x i32> %vec7 4549 } 4550 4551 define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) { 4552 ; X86-LABEL: test_mask_ucmp_b_256: 4553 ; X86: # %bb.0: 4554 ; X86-NEXT: pushl %ebp # encoding: [0x55] 4555 ; X86-NEXT: .cfi_def_cfa_offset 8 4556 ; X86-NEXT: pushl %ebx # encoding: [0x53] 4557 ; X86-NEXT: .cfi_def_cfa_offset 12 4558 ; X86-NEXT: pushl %edi # encoding: [0x57] 4559 ; X86-NEXT: .cfi_def_cfa_offset 16 4560 ; X86-NEXT: pushl %esi # encoding: [0x56] 4561 ; X86-NEXT: .cfi_def_cfa_offset 20 4562 ; X86-NEXT: .cfi_offset %esi, -20 4563 ; X86-NEXT: .cfi_offset %edi, -16 4564 ; X86-NEXT: .cfi_offset %ebx, -12 4565 ; X86-NEXT: .cfi_offset %ebp, -8 4566 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 4567 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 4568 ; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1] 4569 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 4570 ; X86-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01] 4571 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 4572 ; X86-NEXT: vpcmpleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02] 4573 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 4574 ; X86-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] 4575 ; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 4576 ; X86-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05] 4577 ; X86-NEXT: kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8] 4578 ; X86-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06] 4579 ; X86-NEXT: kmovd %k0, %ebp # encoding: [0xc5,0xfb,0x93,0xe8] 4580 ; X86-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] 4581 ; X86-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x01] 4582 ; X86-NEXT: vpinsrd $2, %ebp, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc5,0x02] 4583 ; X86-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03] 4584 ; X86-NEXT: vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca] 4585 ; X86-NEXT: vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1] 4586 ; X86-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 4587 ; X86-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 4588 ; X86-NEXT: vmovd %esi, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd6] 4589 ; X86-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 4590 ; X86-NEXT: # xmm1 = xmm1[0],xmm2[0] 4591 ; X86-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 4592 ; X86-NEXT: popl %esi # encoding: [0x5e] 4593 ; X86-NEXT: .cfi_def_cfa_offset 16 4594 ; X86-NEXT: popl %edi # encoding: [0x5f] 4595 ; X86-NEXT: .cfi_def_cfa_offset 12 4596 ; X86-NEXT: popl %ebx # encoding: [0x5b] 4597 ; X86-NEXT: .cfi_def_cfa_offset 8 4598 ; X86-NEXT: popl %ebp # encoding: [0x5d] 4599 ; X86-NEXT: .cfi_def_cfa_offset 4 4600 ; X86-NEXT: retl # encoding: [0xc3] 4601 ; 4602 ; X64-LABEL: test_mask_ucmp_b_256: 4603 ; X64: # %bb.0: 4604 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4605 ; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1] 4606 ; X64-NEXT: kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0] 4607 ; X64-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01] 4608 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 4609 ; X64-NEXT: vpcmpleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02] 4610 ; X64-NEXT: kmovd %k0, %r9d # encoding: [0xc5,0x7b,0x93,0xc8] 4611 ; X64-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] 4612 ; X64-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 4613 ; X64-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05] 4614 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 4615 ; X64-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06] 4616 ; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 4617 ; X64-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 4618 ; X64-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] 4619 ; X64-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] 4620 ; X64-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] 4621 ; X64-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9] 4622 ; X64-NEXT: vmovd %r8d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0] 4623 ; X64-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 4624 ; X64-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 4625 ; X64-NEXT: vmovd %r9d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd1] 4626 ; X64-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 4627 ; X64-NEXT: # xmm1 = xmm1[0],xmm2[0] 4628 ; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 4629 ; X64-NEXT: retq # encoding: [0xc3] 4630 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) 4631 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 4632 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask) 4633 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 4634 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask) 4635 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 4636 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask) 4637 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 4638 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask) 4639 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 4640 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask) 4641 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 4642 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask) 4643 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 4644 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask) 4645 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 4646 ret <8 x i32> %vec7 4647 } 4648 4649 declare i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone 4650 4651 define <8 x i16> @test_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1) { 4652 ; CHECK-LABEL: test_cmp_w_256: 4653 ; CHECK: # %bb.0: 4654 ; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1] 4655 ; CHECK-NEXT: vpcmpgtw %ymm0, %ymm1, %k1 # encoding: [0x62,0xf1,0x75,0x28,0x65,0xc8] 4656 ; CHECK-NEXT: vpcmplew %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd1,0x02] 4657 ; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x04] 4658 ; CHECK-NEXT: vpcmpnltw %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xe1,0x05] 4659 ; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k5 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xe9] 4660 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 4661 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 4662 ; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 4663 ; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 4664 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 4665 ; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 4666 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 4667 ; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 4668 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 4669 ; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 4670 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 4671 ; CHECK-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 4672 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 4673 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 4674 ; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80] 4675 ; CHECK-NEXT: # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 4676 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 4677 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4678 %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1) 4679 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 4680 %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1) 4681 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 4682 %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1) 4683 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 4684 %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1) 4685 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 4686 %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1) 4687 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 4688 %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1) 4689 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 4690 %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1) 4691 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 4692 %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1) 4693 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 4694 ret <8 x i16> %vec7 4695 } 4696 4697 define <8 x i16> @test_mask_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) { 4698 ; X86-LABEL: test_mask_cmp_w_256: 4699 ; X86: # %bb.0: 4700 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4701 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 4702 ; X86-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1] 4703 ; X86-NEXT: vpcmpgtw %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x65,0xd0] 4704 ; X86-NEXT: vpcmplew %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd9,0x02] 4705 ; X86-NEXT: vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04] 4706 ; X86-NEXT: vpcmpnltw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x05] 4707 ; X86-NEXT: vpcmpgtw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc9] 4708 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 4709 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 4710 ; X86-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x00] 4711 ; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 4712 ; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01] 4713 ; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb] 4714 ; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02] 4715 ; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc] 4716 ; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 4717 ; X86-NEXT: kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd] 4718 ; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05] 4719 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 4720 ; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06] 4721 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 4722 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 4723 ; X86-NEXT: retl # encoding: [0xc3] 4724 ; 4725 ; X64-LABEL: test_mask_cmp_w_256: 4726 ; X64: # %bb.0: 4727 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4728 ; X64-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1] 4729 ; X64-NEXT: vpcmpgtw %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x65,0xd0] 4730 ; X64-NEXT: vpcmplew %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd9,0x02] 4731 ; X64-NEXT: vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04] 4732 ; X64-NEXT: vpcmpnltw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x05] 4733 ; X64-NEXT: vpcmpgtw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc9] 4734 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 4735 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 4736 ; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 4737 ; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 4738 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 4739 ; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 4740 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 4741 ; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 4742 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 4743 ; X64-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 4744 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 4745 ; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 4746 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 4747 ; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 4748 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 4749 ; X64-NEXT: retq # encoding: [0xc3] 4750 %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask) 4751 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 4752 %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask) 4753 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 4754 %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask) 4755 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 4756 %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask) 4757 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 4758 %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask) 4759 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 4760 %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask) 4761 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 4762 %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask) 4763 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 4764 %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask) 4765 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 4766 ret <8 x i16> %vec7 4767 } 4768 4769 declare i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone 4770 4771 define <8 x i16> @test_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1) { 4772 ; CHECK-LABEL: test_ucmp_w_256: 4773 ; CHECK: # %bb.0: 4774 ; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1] 4775 ; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc9,0x01] 4776 ; CHECK-NEXT: vpcmpleuw %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xd1,0x02] 4777 ; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x04] 4778 ; CHECK-NEXT: vpcmpnltuw %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe1,0x05] 4779 ; CHECK-NEXT: vpcmpnleuw %ymm1, %ymm0, %k5 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe9,0x06] 4780 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 4781 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 4782 ; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 4783 ; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 4784 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 4785 ; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 4786 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 4787 ; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 4788 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 4789 ; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 4790 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 4791 ; CHECK-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 4792 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 4793 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 4794 ; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80] 4795 ; CHECK-NEXT: # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 4796 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 4797 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4798 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1) 4799 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 4800 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1) 4801 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 4802 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1) 4803 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 4804 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1) 4805 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 4806 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1) 4807 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 4808 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1) 4809 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 4810 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1) 4811 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 4812 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1) 4813 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 4814 ret <8 x i16> %vec7 4815 } 4816 4817 define <8 x i16> @test_mask_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) { 4818 ; X86-LABEL: test_mask_ucmp_w_256: 4819 ; X86: # %bb.0: 4820 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4821 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 4822 ; X86-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1] 4823 ; X86-NEXT: vpcmpltuw %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd1,0x01] 4824 ; X86-NEXT: vpcmpleuw %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd9,0x02] 4825 ; X86-NEXT: vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04] 4826 ; X86-NEXT: vpcmpnltuw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x05] 4827 ; X86-NEXT: vpcmpnleuw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x06] 4828 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 4829 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 4830 ; X86-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x00] 4831 ; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 4832 ; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01] 4833 ; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb] 4834 ; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02] 4835 ; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc] 4836 ; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 4837 ; X86-NEXT: kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd] 4838 ; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05] 4839 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 4840 ; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06] 4841 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 4842 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 4843 ; X86-NEXT: retl # encoding: [0xc3] 4844 ; 4845 ; X64-LABEL: test_mask_ucmp_w_256: 4846 ; X64: # %bb.0: 4847 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4848 ; X64-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1] 4849 ; X64-NEXT: vpcmpltuw %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd1,0x01] 4850 ; X64-NEXT: vpcmpleuw %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd9,0x02] 4851 ; X64-NEXT: vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04] 4852 ; X64-NEXT: vpcmpnltuw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x05] 4853 ; X64-NEXT: vpcmpnleuw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x06] 4854 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 4855 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 4856 ; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 4857 ; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 4858 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 4859 ; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 4860 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 4861 ; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 4862 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 4863 ; X64-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 4864 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 4865 ; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 4866 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 4867 ; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 4868 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 4869 ; X64-NEXT: retq # encoding: [0xc3] 4870 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask) 4871 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 4872 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask) 4873 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 4874 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask) 4875 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 4876 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask) 4877 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 4878 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask) 4879 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 4880 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask) 4881 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 4882 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask) 4883 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 4884 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask) 4885 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 4886 ret <8 x i16> %vec7 4887 } 4888 4889 declare i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone 4890 4891 define <8 x i16> @test_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1) { 4892 ; CHECK-LABEL: test_cmp_b_128: 4893 ; CHECK: # %bb.0: 4894 ; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] 4895 ; CHECK-NEXT: vpcmpgtb %xmm0, %xmm1, %k1 # encoding: [0x62,0xf1,0x75,0x08,0x64,0xc8] 4896 ; CHECK-NEXT: vpcmpleb %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd1,0x02] 4897 ; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x04] 4898 ; CHECK-NEXT: vpcmpnltb %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xe1,0x05] 4899 ; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k5 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xe9] 4900 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 4901 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 4902 ; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 4903 ; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 4904 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 4905 ; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 4906 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 4907 ; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 4908 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 4909 ; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 4910 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 4911 ; CHECK-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 4912 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 4913 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 4914 ; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80] 4915 ; CHECK-NEXT: # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 4916 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4917 %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1) 4918 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 4919 %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1) 4920 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 4921 %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1) 4922 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 4923 %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1) 4924 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 4925 %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1) 4926 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 4927 %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1) 4928 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 4929 %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1) 4930 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 4931 %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1) 4932 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 4933 ret <8 x i16> %vec7 4934 } 4935 4936 define <8 x i16> @test_mask_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) { 4937 ; X86-LABEL: test_mask_cmp_b_128: 4938 ; X86: # %bb.0: 4939 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4940 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 4941 ; X86-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1] 4942 ; X86-NEXT: vpcmpgtb %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x64,0xd0] 4943 ; X86-NEXT: vpcmpleb %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd9,0x02] 4944 ; X86-NEXT: vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04] 4945 ; X86-NEXT: vpcmpnltb %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x05] 4946 ; X86-NEXT: vpcmpgtb %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc9] 4947 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 4948 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 4949 ; X86-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x00] 4950 ; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 4951 ; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01] 4952 ; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb] 4953 ; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02] 4954 ; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc] 4955 ; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 4956 ; X86-NEXT: kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd] 4957 ; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05] 4958 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 4959 ; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06] 4960 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 4961 ; X86-NEXT: retl # encoding: [0xc3] 4962 ; 4963 ; X64-LABEL: test_mask_cmp_b_128: 4964 ; X64: # %bb.0: 4965 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4966 ; X64-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1] 4967 ; X64-NEXT: vpcmpgtb %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x64,0xd0] 4968 ; X64-NEXT: vpcmpleb %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd9,0x02] 4969 ; X64-NEXT: vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04] 4970 ; X64-NEXT: vpcmpnltb %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x05] 4971 ; X64-NEXT: vpcmpgtb %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc9] 4972 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 4973 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 4974 ; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 4975 ; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 4976 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 4977 ; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 4978 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 4979 ; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 4980 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 4981 ; X64-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 4982 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 4983 ; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 4984 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 4985 ; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 4986 ; X64-NEXT: retq # encoding: [0xc3] 4987 %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask) 4988 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 4989 %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask) 4990 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 4991 %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask) 4992 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 4993 %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask) 4994 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 4995 %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask) 4996 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 4997 %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask) 4998 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 4999 %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask) 5000 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5001 %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask) 5002 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5003 ret <8 x i16> %vec7 5004 } 5005 5006 declare i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone 5007 5008 define <8 x i16> @test_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1) { 5009 ; CHECK-LABEL: test_ucmp_b_128: 5010 ; CHECK: # %bb.0: 5011 ; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] 5012 ; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc9,0x01] 5013 ; CHECK-NEXT: vpcmpleub %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xd1,0x02] 5014 ; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x04] 5015 ; CHECK-NEXT: vpcmpnltub %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe1,0x05] 5016 ; CHECK-NEXT: vpcmpnleub %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe9,0x06] 5017 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5018 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 5019 ; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 5020 ; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5021 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5022 ; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5023 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5024 ; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5025 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5026 ; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5027 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5028 ; CHECK-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5029 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5030 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 5031 ; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80] 5032 ; CHECK-NEXT: # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 5033 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5034 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1) 5035 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5036 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1) 5037 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5038 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1) 5039 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5040 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1) 5041 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5042 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1) 5043 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5044 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1) 5045 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5046 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1) 5047 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5048 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1) 5049 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5050 ret <8 x i16> %vec7 5051 } 5052 5053 define <8 x i16> @test_mask_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) { 5054 ; X86-LABEL: test_mask_ucmp_b_128: 5055 ; X86: # %bb.0: 5056 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5057 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5058 ; X86-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1] 5059 ; X86-NEXT: vpcmpltub %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd1,0x01] 5060 ; X86-NEXT: vpcmpleub %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd9,0x02] 5061 ; X86-NEXT: vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04] 5062 ; X86-NEXT: vpcmpnltub %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x05] 5063 ; X86-NEXT: vpcmpnleub %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x06] 5064 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5065 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 5066 ; X86-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x00] 5067 ; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 5068 ; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01] 5069 ; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb] 5070 ; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02] 5071 ; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc] 5072 ; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 5073 ; X86-NEXT: kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd] 5074 ; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05] 5075 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 5076 ; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06] 5077 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 5078 ; X86-NEXT: retl # encoding: [0xc3] 5079 ; 5080 ; X64-LABEL: test_mask_ucmp_b_128: 5081 ; X64: # %bb.0: 5082 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5083 ; X64-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1] 5084 ; X64-NEXT: vpcmpltub %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd1,0x01] 5085 ; X64-NEXT: vpcmpleub %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd9,0x02] 5086 ; X64-NEXT: vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04] 5087 ; X64-NEXT: vpcmpnltub %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x05] 5088 ; X64-NEXT: vpcmpnleub %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x06] 5089 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5090 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 5091 ; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 5092 ; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5093 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5094 ; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5095 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5096 ; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5097 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5098 ; X64-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5099 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5100 ; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5101 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5102 ; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 5103 ; X64-NEXT: retq # encoding: [0xc3] 5104 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask) 5105 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5106 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask) 5107 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5108 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask) 5109 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5110 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask) 5111 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5112 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask) 5113 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5114 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask) 5115 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5116 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask) 5117 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5118 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask) 5119 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5120 ret <8 x i16> %vec7 5121 } 5122 5123 declare i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone 5124 5125 define <8 x i8> @test_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1) { 5126 ; CHECK-LABEL: test_cmp_w_128: 5127 ; CHECK: # %bb.0: 5128 ; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] 5129 ; CHECK-NEXT: vpcmpgtw %xmm0, %xmm1, %k1 # encoding: [0x62,0xf1,0x75,0x08,0x65,0xc8] 5130 ; CHECK-NEXT: vpcmplew %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x02] 5131 ; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd9,0x04] 5132 ; CHECK-NEXT: vpcmpnltw %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xe1,0x05] 5133 ; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k5 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xe9] 5134 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5135 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 5136 ; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 5137 ; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5138 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5139 ; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5140 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5141 ; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5142 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5143 ; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5144 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5145 ; CHECK-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5146 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5147 ; CHECK-NEXT: movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00] 5148 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 5149 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5150 %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1) 5151 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 5152 %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1) 5153 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 5154 %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1) 5155 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 5156 %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1) 5157 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 5158 %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1) 5159 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 5160 %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1) 5161 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 5162 %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1) 5163 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 5164 %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1) 5165 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 5166 ret <8 x i8> %vec7 5167 } 5168 5169 define <8 x i8> @test_mask_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) { 5170 ; X86-LABEL: test_mask_cmp_w_128: 5171 ; X86: # %bb.0: 5172 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 5173 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5174 ; X86-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1] 5175 ; X86-NEXT: vpcmpgtw %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x65,0xd0] 5176 ; X86-NEXT: vpcmplew %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x02] 5177 ; X86-NEXT: vpcmpneqw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x04] 5178 ; X86-NEXT: vpcmpnltw %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe9,0x05] 5179 ; X86-NEXT: vpcmpgtw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc9] 5180 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5181 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 5182 ; X86-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x00] 5183 ; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 5184 ; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01] 5185 ; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb] 5186 ; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02] 5187 ; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc] 5188 ; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 5189 ; X86-NEXT: kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd] 5190 ; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05] 5191 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 5192 ; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06] 5193 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 5194 ; X86-NEXT: retl # encoding: [0xc3] 5195 ; 5196 ; X64-LABEL: test_mask_cmp_w_128: 5197 ; X64: # %bb.0: 5198 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5199 ; X64-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1] 5200 ; X64-NEXT: vpcmpgtw %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x65,0xd0] 5201 ; X64-NEXT: vpcmplew %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x02] 5202 ; X64-NEXT: vpcmpneqw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x04] 5203 ; X64-NEXT: vpcmpnltw %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe9,0x05] 5204 ; X64-NEXT: vpcmpgtw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc9] 5205 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5206 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 5207 ; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 5208 ; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5209 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5210 ; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5211 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5212 ; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5213 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5214 ; X64-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5215 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5216 ; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5217 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5218 ; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 5219 ; X64-NEXT: retq # encoding: [0xc3] 5220 %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask) 5221 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 5222 %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask) 5223 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 5224 %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask) 5225 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 5226 %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask) 5227 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 5228 %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask) 5229 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 5230 %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask) 5231 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 5232 %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask) 5233 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 5234 %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask) 5235 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 5236 ret <8 x i8> %vec7 5237 } 5238 5239 declare i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone 5240 5241 define <8 x i8> @test_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1) { 5242 ; CHECK-LABEL: test_ucmp_w_128: 5243 ; CHECK: # %bb.0: 5244 ; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] 5245 ; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc9,0x01] 5246 ; CHECK-NEXT: vpcmpleuw %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xd1,0x02] 5247 ; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd9,0x04] 5248 ; CHECK-NEXT: vpcmpnltuw %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe1,0x05] 5249 ; CHECK-NEXT: vpcmpnleuw %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe9,0x06] 5250 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5251 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 5252 ; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 5253 ; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5254 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5255 ; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5256 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5257 ; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5258 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5259 ; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5260 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5261 ; CHECK-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5262 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5263 ; CHECK-NEXT: movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00] 5264 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 5265 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5266 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1) 5267 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 5268 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1) 5269 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 5270 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1) 5271 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 5272 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1) 5273 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 5274 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1) 5275 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 5276 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1) 5277 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 5278 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1) 5279 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 5280 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1) 5281 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 5282 ret <8 x i8> %vec7 5283 } 5284 5285 define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) { 5286 ; X86-LABEL: test_mask_ucmp_w_128: 5287 ; X86: # %bb.0: 5288 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 5289 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5290 ; X86-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1] 5291 ; X86-NEXT: vpcmpltuw %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x01] 5292 ; X86-NEXT: vpcmpleuw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd9,0x02] 5293 ; X86-NEXT: vpcmpneqw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x04] 5294 ; X86-NEXT: vpcmpnltuw %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe9,0x05] 5295 ; X86-NEXT: vpcmpnleuw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x06] 5296 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5297 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 5298 ; X86-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x00] 5299 ; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 5300 ; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01] 5301 ; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb] 5302 ; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02] 5303 ; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc] 5304 ; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 5305 ; X86-NEXT: kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd] 5306 ; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05] 5307 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 5308 ; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06] 5309 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 5310 ; X86-NEXT: retl # encoding: [0xc3] 5311 ; 5312 ; X64-LABEL: test_mask_ucmp_w_128: 5313 ; X64: # %bb.0: 5314 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5315 ; X64-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1] 5316 ; X64-NEXT: vpcmpltuw %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x01] 5317 ; X64-NEXT: vpcmpleuw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd9,0x02] 5318 ; X64-NEXT: vpcmpneqw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x04] 5319 ; X64-NEXT: vpcmpnltuw %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe9,0x05] 5320 ; X64-NEXT: vpcmpnleuw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x06] 5321 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5322 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 5323 ; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 5324 ; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5325 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5326 ; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5327 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5328 ; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5329 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5330 ; X64-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5331 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5332 ; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5333 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5334 ; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 5335 ; X64-NEXT: retq # encoding: [0xc3] 5336 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask) 5337 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 5338 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask) 5339 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 5340 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask) 5341 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 5342 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask) 5343 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 5344 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask) 5345 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 5346 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask) 5347 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 5348 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask) 5349 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 5350 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask) 5351 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 5352 ret <8 x i8> %vec7 5353 } 5354 5355 declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone 5356 5357 define <16 x i8>@mm_mask_avg_epu8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 5358 ; X86-LABEL: mm_mask_avg_epu8: 5359 ; X86: # %bb.0: 5360 ; X86-NEXT: vpavgb %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xd9] 5361 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 5362 ; X86-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1] 5363 ; X86-NEXT: vpaddb %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3] 5364 ; X86-NEXT: retl # encoding: [0xc3] 5365 ; 5366 ; X64-LABEL: mm_mask_avg_epu8: 5367 ; X64: # %bb.0: 5368 ; X64-NEXT: vpavgb %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xd9] 5369 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5370 ; X64-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1] 5371 ; X64-NEXT: vpaddb %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3] 5372 ; X64-NEXT: retq # encoding: [0xc3] 5373 %res = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 5374 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 5375 %res2 = add <16 x i8> %res, %res1 5376 ret <16 x i8> %res2 5377 } 5378 5379 declare <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8>, <16 x i8>, i16) 5380 5381 define <16 x i8>@test_int_x86_avx512_mask_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { 5382 ; X86-LABEL: test_int_x86_avx512_mask_pabs_b_128: 5383 ; X86: # %bb.0: 5384 ; X86-NEXT: vpabsb %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xd0] 5385 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 5386 ; X86-NEXT: vpabsb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1c,0xc8] 5387 ; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc2] 5388 ; X86-NEXT: retl # encoding: [0xc3] 5389 ; 5390 ; X64-LABEL: test_int_x86_avx512_mask_pabs_b_128: 5391 ; X64: # %bb.0: 5392 ; X64-NEXT: vpabsb %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xd0] 5393 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5394 ; X64-NEXT: vpabsb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1c,0xc8] 5395 ; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc2] 5396 ; X64-NEXT: retq # encoding: [0xc3] 5397 %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) 5398 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1) 5399 %res2 = add <16 x i8> %res, %res1 5400 ret <16 x i8> %res2 5401 } 5402 5403 declare <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 5404 5405 define <32 x i8>@mm256_mask_avg_epu8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 5406 ; X86-LABEL: mm256_mask_avg_epu8: 5407 ; X86: # %bb.0: 5408 ; X86-NEXT: vpavgb %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe0,0xd9] 5409 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 5410 ; X86-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1] 5411 ; X86-NEXT: vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] 5412 ; X86-NEXT: retl # encoding: [0xc3] 5413 ; 5414 ; X64-LABEL: mm256_mask_avg_epu8: 5415 ; X64: # %bb.0: 5416 ; X64-NEXT: vpavgb %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe0,0xd9] 5417 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5418 ; X64-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1] 5419 ; X64-NEXT: vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] 5420 ; X64-NEXT: retq # encoding: [0xc3] 5421 %res = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 5422 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 5423 %res2 = add <32 x i8> %res, %res1 5424 ret <32 x i8> %res2 5425 } 5426 5427 declare <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8>, <32 x i8>, i32) 5428 5429 define <32 x i8>@test_int_x86_avx512_mask_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) { 5430 ; X86-LABEL: test_int_x86_avx512_mask_pabs_b_256: 5431 ; X86: # %bb.0: 5432 ; X86-NEXT: vpabsb %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1c,0xd0] 5433 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 5434 ; X86-NEXT: vpabsb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1c,0xc8] 5435 ; X86-NEXT: vpaddb %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc2] 5436 ; X86-NEXT: retl # encoding: [0xc3] 5437 ; 5438 ; X64-LABEL: test_int_x86_avx512_mask_pabs_b_256: 5439 ; X64: # %bb.0: 5440 ; X64-NEXT: vpabsb %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1c,0xd0] 5441 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5442 ; X64-NEXT: vpabsb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1c,0xc8] 5443 ; X64-NEXT: vpaddb %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc2] 5444 ; X64-NEXT: retq # encoding: [0xc3] 5445 %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) 5446 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 -1) 5447 %res2 = add <32 x i8> %res, %res1 5448 ret <32 x i8> %res2 5449 } 5450 5451 declare <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 5452 5453 define <8 x i16>@mm_mask_avg_epu16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 5454 ; X86-LABEL: mm_mask_avg_epu16: 5455 ; X86: # %bb.0: 5456 ; X86-NEXT: vpavgw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xd9] 5457 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5458 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5459 ; X86-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1] 5460 ; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 5461 ; X86-NEXT: retl # encoding: [0xc3] 5462 ; 5463 ; X64-LABEL: mm_mask_avg_epu16: 5464 ; X64: # %bb.0: 5465 ; X64-NEXT: vpavgw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xd9] 5466 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5467 ; X64-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1] 5468 ; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 5469 ; X64-NEXT: retq # encoding: [0xc3] 5470 %res = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 5471 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 5472 %res2 = add <8 x i16> %res, %res1 5473 ret <8 x i16> %res2 5474 } 5475 5476 declare <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16>, <8 x i16>, i8) 5477 5478 define <8 x i16>@test_int_x86_avx512_mask_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { 5479 ; X86-LABEL: test_int_x86_avx512_mask_pabs_w_128: 5480 ; X86: # %bb.0: 5481 ; X86-NEXT: vpabsw %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xd0] 5482 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5483 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5484 ; X86-NEXT: vpabsw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1d,0xc8] 5485 ; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc2] 5486 ; X86-NEXT: retl # encoding: [0xc3] 5487 ; 5488 ; X64-LABEL: test_int_x86_avx512_mask_pabs_w_128: 5489 ; X64: # %bb.0: 5490 ; X64-NEXT: vpabsw %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xd0] 5491 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5492 ; X64-NEXT: vpabsw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1d,0xc8] 5493 ; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc2] 5494 ; X64-NEXT: retq # encoding: [0xc3] 5495 %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) 5496 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1) 5497 %res2 = add <8 x i16> %res, %res1 5498 ret <8 x i16> %res2 5499 } 5500 5501 declare <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 5502 5503 define <16 x i16>@mm256_mask_avg_epu16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 5504 ; X86-LABEL: mm256_mask_avg_epu16: 5505 ; X86: # %bb.0: 5506 ; X86-NEXT: vpavgw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe3,0xd9] 5507 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 5508 ; X86-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1] 5509 ; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 5510 ; X86-NEXT: retl # encoding: [0xc3] 5511 ; 5512 ; X64-LABEL: mm256_mask_avg_epu16: 5513 ; X64: # %bb.0: 5514 ; X64-NEXT: vpavgw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe3,0xd9] 5515 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5516 ; X64-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1] 5517 ; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 5518 ; X64-NEXT: retq # encoding: [0xc3] 5519 %res = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 5520 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 5521 %res2 = add <16 x i16> %res, %res1 5522 ret <16 x i16> %res2 5523 } 5524 5525 declare <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16>, <16 x i16>, i16) 5526 5527 define <16 x i16>@test_int_x86_avx512_mask_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { 5528 ; X86-LABEL: test_int_x86_avx512_mask_pabs_w_256: 5529 ; X86: # %bb.0: 5530 ; X86-NEXT: vpabsw %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1d,0xd0] 5531 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 5532 ; X86-NEXT: vpabsw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1d,0xc8] 5533 ; X86-NEXT: vpaddw %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc2] 5534 ; X86-NEXT: retl # encoding: [0xc3] 5535 ; 5536 ; X64-LABEL: test_int_x86_avx512_mask_pabs_w_256: 5537 ; X64: # %bb.0: 5538 ; X64-NEXT: vpabsw %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1d,0xd0] 5539 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5540 ; X64-NEXT: vpabsw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1d,0xc8] 5541 ; X64-NEXT: vpaddw %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc2] 5542 ; X64-NEXT: retq # encoding: [0xc3] 5543 %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) 5544 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 -1) 5545 %res2 = add <16 x i16> %res, %res1 5546 ret <16 x i16> %res2 5547 } 5548 5549 declare <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 5550 5551 declare i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8>, <16 x i8>, i16) 5552 5553 define i16@test_int_x86_avx512_ptestm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { 5554 ; X86-LABEL: test_int_x86_avx512_ptestm_b_128: 5555 ; X86: # %bb.0: 5556 ; X86-NEXT: vptestmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1] 5557 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5558 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 5559 ; X86-NEXT: andw %cx, %ax # encoding: [0x66,0x21,0xc8] 5560 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 5561 ; X86-NEXT: # kill: def $ax killed $ax killed $eax 5562 ; X86-NEXT: retl # encoding: [0xc3] 5563 ; 5564 ; X64-LABEL: test_int_x86_avx512_ptestm_b_128: 5565 ; X64: # %bb.0: 5566 ; X64-NEXT: vptestmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1] 5567 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5568 ; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 5569 ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 5570 ; X64-NEXT: # kill: def $ax killed $ax killed $eax 5571 ; X64-NEXT: retq # encoding: [0xc3] 5572 %res = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) 5573 %res1 = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1) 5574 %res2 = add i16 %res, %res1 5575 ret i16 %res2 5576 } 5577 5578 declare i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8>, <32 x i8>, i32) 5579 5580 define i32@test_int_x86_avx512_ptestm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) { 5581 ; X86-LABEL: test_int_x86_avx512_ptestm_b_256: 5582 ; X86: # %bb.0: 5583 ; X86-NEXT: vptestmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1] 5584 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5585 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5586 ; X86-NEXT: andl %ecx, %eax # encoding: [0x21,0xc8] 5587 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 5588 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5589 ; X86-NEXT: retl # encoding: [0xc3] 5590 ; 5591 ; X64-LABEL: test_int_x86_avx512_ptestm_b_256: 5592 ; X64: # %bb.0: 5593 ; X64-NEXT: vptestmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1] 5594 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5595 ; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 5596 ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 5597 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5598 ; X64-NEXT: retq # encoding: [0xc3] 5599 %res = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) 5600 %res1 = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1) 5601 %res2 = add i32 %res, %res1 5602 ret i32 %res2 5603 } 5604 5605 declare i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16>, <8 x i16>, i8) 5606 5607 define i8@test_int_x86_avx512_ptestm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { 5608 ; X86-LABEL: test_int_x86_avx512_ptestm_w_128: 5609 ; X86: # %bb.0: 5610 ; X86-NEXT: vptestmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1] 5611 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5612 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] 5613 ; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8] 5614 ; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 5615 ; X86-NEXT: retl # encoding: [0xc3] 5616 ; 5617 ; X64-LABEL: test_int_x86_avx512_ptestm_w_128: 5618 ; X64: # %bb.0: 5619 ; X64-NEXT: vptestmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1] 5620 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5621 ; X64-NEXT: andb %al, %dil # encoding: [0x40,0x20,0xc7] 5622 ; X64-NEXT: addb %dil, %al # encoding: [0x40,0x00,0xf8] 5623 ; X64-NEXT: # kill: def $al killed $al killed $eax 5624 ; X64-NEXT: retq # encoding: [0xc3] 5625 %res = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) 5626 %res1 = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1) 5627 %res2 = add i8 %res, %res1 5628 ret i8 %res2 5629 } 5630 5631 declare i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16>, <16 x i16>, i16) 5632 5633 define i16@test_int_x86_avx512_ptestm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { 5634 ; X86-LABEL: test_int_x86_avx512_ptestm_w_256: 5635 ; X86: # %bb.0: 5636 ; X86-NEXT: vptestmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1] 5637 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5638 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 5639 ; X86-NEXT: andw %cx, %ax # encoding: [0x66,0x21,0xc8] 5640 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 5641 ; X86-NEXT: # kill: def $ax killed $ax killed $eax 5642 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5643 ; X86-NEXT: retl # encoding: [0xc3] 5644 ; 5645 ; X64-LABEL: test_int_x86_avx512_ptestm_w_256: 5646 ; X64: # %bb.0: 5647 ; X64-NEXT: vptestmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1] 5648 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5649 ; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 5650 ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 5651 ; X64-NEXT: # kill: def $ax killed $ax killed $eax 5652 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5653 ; X64-NEXT: retq # encoding: [0xc3] 5654 %res = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) 5655 %res1 = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1) 5656 %res2 = add i16 %res, %res1 5657 ret i16 %res2 5658 } 5659 5660 declare i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8>, <16 x i8>, i16) 5661 5662 define i16@test_int_x86_avx512_ptestnm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { 5663 ; X86-LABEL: test_int_x86_avx512_ptestnm_b_128: 5664 ; X86: # %bb.0: 5665 ; X86-NEXT: vptestnmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1] 5666 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5667 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 5668 ; X86-NEXT: andw %cx, %ax # encoding: [0x66,0x21,0xc8] 5669 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 5670 ; X86-NEXT: # kill: def $ax killed $ax killed $eax 5671 ; X86-NEXT: retl # encoding: [0xc3] 5672 ; 5673 ; X64-LABEL: test_int_x86_avx512_ptestnm_b_128: 5674 ; X64: # %bb.0: 5675 ; X64-NEXT: vptestnmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1] 5676 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5677 ; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 5678 ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 5679 ; X64-NEXT: # kill: def $ax killed $ax killed $eax 5680 ; X64-NEXT: retq # encoding: [0xc3] 5681 %res = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) 5682 %res1 = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1) 5683 %res2 = add i16 %res, %res1 5684 ret i16 %res2 5685 } 5686 5687 declare i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8>, <32 x i8>, i32) 5688 5689 define i32@test_int_x86_avx512_ptestnm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) { 5690 ; X86-LABEL: test_int_x86_avx512_ptestnm_b_256: 5691 ; X86: # %bb.0: 5692 ; X86-NEXT: vptestnmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1] 5693 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5694 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5695 ; X86-NEXT: andl %ecx, %eax # encoding: [0x21,0xc8] 5696 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 5697 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5698 ; X86-NEXT: retl # encoding: [0xc3] 5699 ; 5700 ; X64-LABEL: test_int_x86_avx512_ptestnm_b_256: 5701 ; X64: # %bb.0: 5702 ; X64-NEXT: vptestnmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1] 5703 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5704 ; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 5705 ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 5706 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5707 ; X64-NEXT: retq # encoding: [0xc3] 5708 %res = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) 5709 %res1 = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1) 5710 %res2 = add i32 %res, %res1 5711 ret i32 %res2 5712 } 5713 5714 declare i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16>, <8 x i16>, i8 %x2) 5715 5716 define i8@test_int_x86_avx512_ptestnm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { 5717 ; X86-LABEL: test_int_x86_avx512_ptestnm_w_128: 5718 ; X86: # %bb.0: 5719 ; X86-NEXT: vptestnmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1] 5720 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5721 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] 5722 ; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8] 5723 ; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 5724 ; X86-NEXT: retl # encoding: [0xc3] 5725 ; 5726 ; X64-LABEL: test_int_x86_avx512_ptestnm_w_128: 5727 ; X64: # %bb.0: 5728 ; X64-NEXT: vptestnmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1] 5729 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5730 ; X64-NEXT: andb %al, %dil # encoding: [0x40,0x20,0xc7] 5731 ; X64-NEXT: addb %dil, %al # encoding: [0x40,0x00,0xf8] 5732 ; X64-NEXT: # kill: def $al killed $al killed $eax 5733 ; X64-NEXT: retq # encoding: [0xc3] 5734 %res = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) 5735 %res1 = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1) 5736 %res2 = add i8 %res, %res1 5737 ret i8 %res2 5738 } 5739 5740 declare i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16>, <16 x i16>, i16 %x2) 5741 5742 define i16@test_int_x86_avx512_ptestnm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { 5743 ; X86-LABEL: test_int_x86_avx512_ptestnm_w_256: 5744 ; X86: # %bb.0: 5745 ; X86-NEXT: vptestnmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1] 5746 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5747 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 5748 ; X86-NEXT: andw %cx, %ax # encoding: [0x66,0x21,0xc8] 5749 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 5750 ; X86-NEXT: # kill: def $ax killed $ax killed $eax 5751 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5752 ; X86-NEXT: retl # encoding: [0xc3] 5753 ; 5754 ; X64-LABEL: test_int_x86_avx512_ptestnm_w_256: 5755 ; X64: # %bb.0: 5756 ; X64-NEXT: vptestnmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1] 5757 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5758 ; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 5759 ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 5760 ; X64-NEXT: # kill: def $ax killed $ax killed $eax 5761 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5762 ; X64-NEXT: retq # encoding: [0xc3] 5763 %res = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) 5764 %res1 = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1) 5765 %res2 = add i16 %res, %res1 5766 ret i16 %res2 5767 } 5768 5769 declare i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8>) 5770 5771 define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) { 5772 ; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128: 5773 ; CHECK: # %bb.0: 5774 ; CHECK-NEXT: vpmovb2m %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x29,0xc0] 5775 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5776 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 5777 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5778 %res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0) 5779 ret i16 %res 5780 } 5781 5782 declare i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8>) 5783 5784 define i32@test_int_x86_avx512_cvtb2mask_256(<32 x i8> %x0) { 5785 ; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_256: 5786 ; CHECK: # %bb.0: 5787 ; CHECK-NEXT: vpmovb2m %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x29,0xc0] 5788 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5789 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5790 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5791 %res = call i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8> %x0) 5792 ret i32 %res 5793 } 5794 5795 declare i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16>) 5796 5797 define i8@test_int_x86_avx512_cvtw2mask_128(<8 x i16> %x0) { 5798 ; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_128: 5799 ; CHECK: # %bb.0: 5800 ; CHECK-NEXT: vpmovw2m %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x29,0xc0] 5801 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5802 ; CHECK-NEXT: # kill: def $al killed $al killed $eax 5803 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5804 %res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0) 5805 ret i8 %res 5806 } 5807 5808 declare i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16>) 5809 5810 define i16@test_int_x86_avx512_cvtw2mask_256(<16 x i16> %x0) { 5811 ; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_256: 5812 ; CHECK: # %bb.0: 5813 ; CHECK-NEXT: vpmovw2m %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x29,0xc0] 5814 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5815 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 5816 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5817 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5818 %res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0) 5819 ret i16 %res 5820 } 5821 5822 declare <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 5823 5824 define <8 x i16>@test_int_x86_avx512_mask_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 5825 ; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_128: 5826 ; X86: # %bb.0: 5827 ; X86-NEXT: vpmulhuw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xd9] 5828 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5829 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5830 ; X86-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1] 5831 ; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 5832 ; X86-NEXT: retl # encoding: [0xc3] 5833 ; 5834 ; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_128: 5835 ; X64: # %bb.0: 5836 ; X64-NEXT: vpmulhuw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xd9] 5837 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5838 ; X64-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1] 5839 ; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 5840 ; X64-NEXT: retq # encoding: [0xc3] 5841 %res = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 5842 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 5843 %res2 = add <8 x i16> %res, %res1 5844 ret <8 x i16> %res2 5845 } 5846 5847 declare <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 5848 5849 define <16 x i16>@test_int_x86_avx512_mask_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 5850 ; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_256: 5851 ; X86: # %bb.0: 5852 ; X86-NEXT: vpmulhuw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xd9] 5853 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 5854 ; X86-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1] 5855 ; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 5856 ; X86-NEXT: retl # encoding: [0xc3] 5857 ; 5858 ; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_256: 5859 ; X64: # %bb.0: 5860 ; X64-NEXT: vpmulhuw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xd9] 5861 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5862 ; X64-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1] 5863 ; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 5864 ; X64-NEXT: retq # encoding: [0xc3] 5865 %res = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 5866 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 5867 %res2 = add <16 x i16> %res, %res1 5868 ret <16 x i16> %res2 5869 } 5870 5871 declare <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 5872 5873 define <8 x i16>@test_int_x86_avx512_mask_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 5874 ; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_128: 5875 ; X86: # %bb.0: 5876 ; X86-NEXT: vpmulhw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xd9] 5877 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5878 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5879 ; X86-NEXT: vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1] 5880 ; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 5881 ; X86-NEXT: retl # encoding: [0xc3] 5882 ; 5883 ; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_128: 5884 ; X64: # %bb.0: 5885 ; X64-NEXT: vpmulhw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xd9] 5886 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5887 ; X64-NEXT: vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1] 5888 ; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 5889 ; X64-NEXT: retq # encoding: [0xc3] 5890 %res = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 5891 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 5892 %res2 = add <8 x i16> %res, %res1 5893 ret <8 x i16> %res2 5894 } 5895 5896 declare <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 5897 5898 define <16 x i16>@test_int_x86_avx512_mask_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 5899 ; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_256: 5900 ; X86: # %bb.0: 5901 ; X86-NEXT: vpmulhw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xd9] 5902 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 5903 ; X86-NEXT: vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1] 5904 ; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 5905 ; X86-NEXT: retl # encoding: [0xc3] 5906 ; 5907 ; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_256: 5908 ; X64: # %bb.0: 5909 ; X64-NEXT: vpmulhw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xd9] 5910 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5911 ; X64-NEXT: vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1] 5912 ; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 5913 ; X64-NEXT: retq # encoding: [0xc3] 5914 %res = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 5915 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 5916 %res2 = add <16 x i16> %res, %res1 5917 ret <16 x i16> %res2 5918 } 5919 5920 declare <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 5921 5922 define <8 x i16>@test_int_x86_avx512_mask_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 5923 ; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128: 5924 ; X86: # %bb.0: 5925 ; X86-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0b,0xd9] 5926 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5927 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5928 ; X86-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1] 5929 ; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 5930 ; X86-NEXT: retl # encoding: [0xc3] 5931 ; 5932 ; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128: 5933 ; X64: # %bb.0: 5934 ; X64-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0b,0xd9] 5935 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5936 ; X64-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1] 5937 ; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 5938 ; X64-NEXT: retq # encoding: [0xc3] 5939 %res = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 5940 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 5941 %res2 = add <8 x i16> %res, %res1 5942 ret <8 x i16> %res2 5943 } 5944 5945 declare <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 5946 5947 define <16 x i16>@test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 5948 ; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256: 5949 ; X86: # %bb.0: 5950 ; X86-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xd9] 5951 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 5952 ; X86-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1] 5953 ; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 5954 ; X86-NEXT: retl # encoding: [0xc3] 5955 ; 5956 ; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256: 5957 ; X64: # %bb.0: 5958 ; X64-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xd9] 5959 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5960 ; X64-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1] 5961 ; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 5962 ; X64-NEXT: retq # encoding: [0xc3] 5963 %res = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 5964 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 5965 %res2 = add <16 x i16> %res, %res1 5966 ret <16 x i16> %res2 5967 } 5968 5969 declare <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8>, <16 x i8>, <8 x i16>, i8) 5970 5971 define <8 x i16>@test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) { 5972 ; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128: 5973 ; X86: # %bb.0: 5974 ; X86-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xd9] 5975 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5976 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5977 ; X86-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1] 5978 ; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 5979 ; X86-NEXT: retl # encoding: [0xc3] 5980 ; 5981 ; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128: 5982 ; X64: # %bb.0: 5983 ; X64-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xd9] 5984 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5985 ; X64-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1] 5986 ; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 5987 ; X64-NEXT: retq # encoding: [0xc3] 5988 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) 5989 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 -1) 5990 %res2 = add <8 x i16> %res, %res1 5991 ret <8 x i16> %res2 5992 } 5993 5994 declare <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8>, <32 x i8>, <16 x i16>, i16) 5995 5996 define <16 x i16>@test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) { 5997 ; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256: 5998 ; X86: # %bb.0: 5999 ; X86-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xd9] 6000 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6001 ; X86-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1] 6002 ; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 6003 ; X86-NEXT: retl # encoding: [0xc3] 6004 ; 6005 ; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256: 6006 ; X64: # %bb.0: 6007 ; X64-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xd9] 6008 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6009 ; X64-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1] 6010 ; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 6011 ; X64-NEXT: retq # encoding: [0xc3] 6012 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) 6013 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 -1) 6014 %res2 = add <16 x i16> %res, %res1 6015 ret <16 x i16> %res2 6016 } 6017 6018 declare <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16>, <8 x i16>, <4 x i32>, i8) 6019 6020 define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) { 6021 ; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_128: 6022 ; X86: # %bb.0: 6023 ; X86-NEXT: vpmaddwd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xd9] 6024 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6025 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6026 ; X86-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1] 6027 ; X86-NEXT: vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3] 6028 ; X86-NEXT: retl # encoding: [0xc3] 6029 ; 6030 ; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_128: 6031 ; X64: # %bb.0: 6032 ; X64-NEXT: vpmaddwd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xd9] 6033 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6034 ; X64-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1] 6035 ; X64-NEXT: vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3] 6036 ; X64-NEXT: retq # encoding: [0xc3] 6037 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) 6038 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 -1) 6039 %res2 = add <4 x i32> %res, %res1 6040 ret <4 x i32> %res2 6041 } 6042 6043 declare <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16>, <16 x i16>, <8 x i32>, i8) 6044 6045 define <8 x i32>@test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) { 6046 ; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_256: 6047 ; X86: # %bb.0: 6048 ; X86-NEXT: vpmaddwd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xd9] 6049 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6050 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6051 ; X86-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1] 6052 ; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 6053 ; X86-NEXT: retl # encoding: [0xc3] 6054 ; 6055 ; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_256: 6056 ; X64: # %bb.0: 6057 ; X64-NEXT: vpmaddwd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xd9] 6058 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6059 ; X64-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1] 6060 ; X64-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 6061 ; X64-NEXT: retq # encoding: [0xc3] 6062 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) 6063 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 -1) 6064 %res2 = add <8 x i32> %res, %res1 6065 ret <8 x i32> %res2 6066 } 6067 6068 declare <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 6069 6070 define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6071 ; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_128: 6072 ; X86: # %bb.0: 6073 ; X86-NEXT: vpermw %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xd8] 6074 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6075 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6076 ; X86-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0] 6077 ; X86-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0] 6078 ; X86-NEXT: vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] 6079 ; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 6080 ; X86-NEXT: retl # encoding: [0xc3] 6081 ; 6082 ; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_128: 6083 ; X64: # %bb.0: 6084 ; X64-NEXT: vpermw %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xd8] 6085 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6086 ; X64-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0] 6087 ; X64-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0] 6088 ; X64-NEXT: vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] 6089 ; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 6090 ; X64-NEXT: retq # encoding: [0xc3] 6091 %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 6092 %res1 = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 6093 %res2 = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 6094 %res3 = add <8 x i16> %res, %res1 6095 %res4 = add <8 x i16> %res3, %res2 6096 ret <8 x i16> %res4 6097 } 6098 6099 declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 6100 6101 define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 6102 ; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_256: 6103 ; X86: # %bb.0: 6104 ; X86-NEXT: vpermw %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xd8] 6105 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6106 ; X86-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0] 6107 ; X86-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0] 6108 ; X86-NEXT: vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] 6109 ; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 6110 ; X86-NEXT: retl # encoding: [0xc3] 6111 ; 6112 ; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_256: 6113 ; X64: # %bb.0: 6114 ; X64-NEXT: vpermw %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xd8] 6115 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6116 ; X64-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0] 6117 ; X64-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0] 6118 ; X64-NEXT: vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] 6119 ; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 6120 ; X64-NEXT: retq # encoding: [0xc3] 6121 %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 6122 %res1 = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 6123 %res2 = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 6124 %res3 = add <16 x i16> %res, %res1 6125 %res4 = add <16 x i16> %res3, %res2 6126 ret <16 x i16> %res4 6127 } 6128 6129 declare <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 6130 6131 define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6132 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128: 6133 ; X86: # %bb.0: 6134 ; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 6135 ; X86-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xda] 6136 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6137 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6138 ; X86-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca] 6139 ; X86-NEXT: vpaddw %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3] 6140 ; X86-NEXT: retl # encoding: [0xc3] 6141 ; 6142 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128: 6143 ; X64: # %bb.0: 6144 ; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 6145 ; X64-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xda] 6146 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6147 ; X64-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca] 6148 ; X64-NEXT: vpaddw %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3] 6149 ; X64-NEXT: retq # encoding: [0xc3] 6150 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 6151 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 6152 %res2 = add <8 x i16> %res, %res1 6153 ret <8 x i16> %res2 6154 } 6155 6156 declare <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 6157 6158 define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6159 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128: 6160 ; X86: # %bb.0: 6161 ; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 6162 ; X86-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xda] 6163 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6164 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6165 ; X86-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7d,0xca] 6166 ; X86-NEXT: vpaddw %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3] 6167 ; X86-NEXT: retl # encoding: [0xc3] 6168 ; 6169 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128: 6170 ; X64: # %bb.0: 6171 ; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 6172 ; X64-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xda] 6173 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6174 ; X64-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7d,0xca] 6175 ; X64-NEXT: vpaddw %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3] 6176 ; X64-NEXT: retq # encoding: [0xc3] 6177 %res = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 6178 %res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 6179 %res2 = add <8 x i16> %res, %res1 6180 ret <8 x i16> %res2 6181 } 6182 6183 declare <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 6184 6185 define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 6186 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256: 6187 ; X86: # %bb.0: 6188 ; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 6189 ; X86-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xda] 6190 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6191 ; X86-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca] 6192 ; X86-NEXT: vpaddw %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3] 6193 ; X86-NEXT: retl # encoding: [0xc3] 6194 ; 6195 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256: 6196 ; X64: # %bb.0: 6197 ; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 6198 ; X64-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xda] 6199 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6200 ; X64-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca] 6201 ; X64-NEXT: vpaddw %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3] 6202 ; X64-NEXT: retq # encoding: [0xc3] 6203 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 6204 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 6205 %res2 = add <16 x i16> %res, %res1 6206 ret <16 x i16> %res2 6207 } 6208 6209 declare <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 6210 6211 define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 6212 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256: 6213 ; X86: # %bb.0: 6214 ; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 6215 ; X86-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xda] 6216 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6217 ; X86-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7d,0xca] 6218 ; X86-NEXT: vpaddw %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3] 6219 ; X86-NEXT: retl # encoding: [0xc3] 6220 ; 6221 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256: 6222 ; X64: # %bb.0: 6223 ; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 6224 ; X64-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xda] 6225 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6226 ; X64-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7d,0xca] 6227 ; X64-NEXT: vpaddw %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3] 6228 ; X64-NEXT: retq # encoding: [0xc3] 6229 %res = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 6230 %res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 6231 %res2 = add <16 x i16> %res, %res1 6232 ret <16 x i16> %res2 6233 } 6234 6235 declare <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 6236 6237 define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6238 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128: 6239 ; X86: # %bb.0: 6240 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 6241 ; X86-NEXT: vpermt2w %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xda] 6242 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6243 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6244 ; X86-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca] 6245 ; X86-NEXT: vpaddw %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3] 6246 ; X86-NEXT: retl # encoding: [0xc3] 6247 ; 6248 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128: 6249 ; X64: # %bb.0: 6250 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 6251 ; X64-NEXT: vpermt2w %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xda] 6252 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6253 ; X64-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca] 6254 ; X64-NEXT: vpaddw %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3] 6255 ; X64-NEXT: retq # encoding: [0xc3] 6256 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 6257 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 6258 %res2 = add <8 x i16> %res, %res1 6259 ret <8 x i16> %res2 6260 } 6261 6262 declare <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 6263 6264 define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 6265 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256: 6266 ; X86: # %bb.0: 6267 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 6268 ; X86-NEXT: vpermt2w %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xda] 6269 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6270 ; X86-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca] 6271 ; X86-NEXT: vpaddw %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3] 6272 ; X86-NEXT: retl # encoding: [0xc3] 6273 ; 6274 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256: 6275 ; X64: # %bb.0: 6276 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 6277 ; X64-NEXT: vpermt2w %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xda] 6278 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6279 ; X64-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca] 6280 ; X64-NEXT: vpaddw %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3] 6281 ; X64-NEXT: retq # encoding: [0xc3] 6282 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 6283 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 6284 %res2 = add <16 x i16> %res, %res1 6285 ret <16 x i16> %res2 6286 } 6287 6288 declare <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8>, <16 x i8>, i32, <8 x i16>, i8) 6289 6290 define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) { 6291 ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: 6292 ; X86: # %bb.0: 6293 ; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd9,0x02] 6294 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6295 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6296 ; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] 6297 ; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xc1,0x02] 6298 ; X86-NEXT: vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] 6299 ; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 6300 ; X86-NEXT: retl # encoding: [0xc3] 6301 ; 6302 ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: 6303 ; X64: # %bb.0: 6304 ; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd9,0x02] 6305 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6306 ; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] 6307 ; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xc1,0x02] 6308 ; X64-NEXT: vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] 6309 ; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 6310 ; X64-NEXT: retq # encoding: [0xc3] 6311 %res = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4) 6312 %res1 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> zeroinitializer, i8 %x4) 6313 %res2 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 -1) 6314 %res3 = add <8 x i16> %res, %res1 6315 %res4 = add <8 x i16> %res2, %res3 6316 ret <8 x i16> %res4 6317 } 6318 6319 declare <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8>, <32 x i8>, i32, <16 x i16>, i16) 6320 6321 define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) { 6322 ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: 6323 ; X86: # %bb.0: 6324 ; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd9,0x02] 6325 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6326 ; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] 6327 ; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xc1,0x02] 6328 ; X86-NEXT: vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] 6329 ; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 6330 ; X86-NEXT: retl # encoding: [0xc3] 6331 ; 6332 ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: 6333 ; X64: # %bb.0: 6334 ; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd9,0x02] 6335 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6336 ; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] 6337 ; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xc1,0x02] 6338 ; X64-NEXT: vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] 6339 ; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 6340 ; X64-NEXT: retq # encoding: [0xc3] 6341 %res = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 %x4) 6342 %res1 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> zeroinitializer, i16 %x4) 6343 %res2 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 -1) 6344 %res3 = add <16 x i16> %res, %res1 6345 %res4 = add <16 x i16> %res3, %res2 6346 ret <16 x i16> %res4 6347 } 6348