1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone 6 7 define i16 @unpckbw_test(i16 %a0, i16 %a1) { 8 ; X86-LABEL: unpckbw_test: 9 ; X86: ## %bb.0: 10 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 11 ; X86-NEXT: kmovw %eax, %k0 ## encoding: [0xc5,0xf8,0x92,0xc0] 12 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] 13 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 14 ; X86-NEXT: kunpckbw %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x4b,0xc1] 15 ; X86-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 16 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax 17 ; X86-NEXT: retl ## encoding: [0xc3] 18 ; 19 ; X64-LABEL: unpckbw_test: 20 ; X64: ## %bb.0: 21 ; X64-NEXT: kmovw %edi, %k0 ## encoding: [0xc5,0xf8,0x92,0xc7] 22 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 23 ; X64-NEXT: kunpckbw %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x4b,0xc1] 24 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 25 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax 26 ; X64-NEXT: retq ## encoding: [0xc3] 27 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1) 28 ret i16 %res 29 } 30 31 define <16 x i32>@test_int_x86_avx512_mask_pbroadcastd_gpr_512(i32 %x0, <16 x i32> %x1, i16 %mask) { 32 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512: 33 ; X86: ## %bb.0: 34 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 35 ; X86-NEXT: vpbroadcastd %eax, %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x7c,0xc8] 36 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 37 ; X86-NEXT: vpbroadcastd %eax, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x7c,0xc0] 38 ; X86-NEXT: vpbroadcastd %eax, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7c,0xd0] 39 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 40 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 41 ; X86-NEXT: retl ## encoding: [0xc3] 42 ; 43 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512: 44 ; X64: ## %bb.0: 45 ; X64-NEXT: vpbroadcastd %edi, %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x7c,0xcf] 46 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 47 ; X64-NEXT: vpbroadcastd %edi, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x7c,0xc7] 48 ; X64-NEXT: vpbroadcastd %edi, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7c,0xd7] 49 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 50 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 51 ; X64-NEXT: retq ## encoding: [0xc3] 52 %res = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 -1) 53 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 %mask) 54 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> zeroinitializer, i16 %mask) 55 %res3 = add <16 x i32> %res, %res1 56 %res4 = add <16 x i32> %res2, %res3 57 ret <16 x i32> %res4 58 } 59 declare <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32, <16 x i32>, i16) 60 61 62 define <8 x i64>@test_int_x86_avx512_mask_pbroadcastq_gpr_512(i64 %x0, <8 x i64> %x1, i8 %mask) { 63 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512: 64 ; X86: ## %bb.0: 65 ; X86-NEXT: vmovq {{[0-9]+}}(%esp), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x4c,0x24,0x04] 66 ; X86-NEXT: ## xmm1 = mem[0],zero 67 ; X86-NEXT: vpbroadcastq %xmm1, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd1] 68 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c] 69 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 70 ; X86-NEXT: vpbroadcastq %xmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x59,0xc1] 71 ; X86-NEXT: vpbroadcastq %xmm1, %zmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x59,0xc9] 72 ; X86-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1] 73 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 74 ; X86-NEXT: retl ## encoding: [0xc3] 75 ; 76 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512: 77 ; X64: ## %bb.0: 78 ; X64-NEXT: vpbroadcastq %rdi, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x7c,0xcf] 79 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 80 ; X64-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x7c,0xc7] 81 ; X64-NEXT: vpbroadcastq %rdi, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x7c,0xd7] 82 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 83 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 84 ; X64-NEXT: retq ## encoding: [0xc3] 85 %res = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 -1) 86 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 %mask) 87 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> zeroinitializer,i8 %mask) 88 %res3 = add <8 x i64> %res, %res1 89 %res4 = add <8 x i64> %res2, %res3 90 ret <8 x i64> %res4 91 } 92 declare <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64, <8 x i64>, i8) 93 94 95 declare <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float>, <16 x float>, i16) nounwind readonly 96 97 define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0, <16 x float> %a1, i16 %mask ) { 98 ; X86-LABEL: test_x86_vbroadcast_ss_ps_512: 99 ; X86: ## %bb.0: 100 ; X86-NEXT: vbroadcastss %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0xd0] 101 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 102 ; X86-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x18,0xc8] 103 ; X86-NEXT: vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9] 104 ; X86-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x18,0xc0] 105 ; X86-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1] 106 ; X86-NEXT: retl ## encoding: [0xc3] 107 ; 108 ; X64-LABEL: test_x86_vbroadcast_ss_ps_512: 109 ; X64: ## %bb.0: 110 ; X64-NEXT: vbroadcastss %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0xd0] 111 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 112 ; X64-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x18,0xc8] 113 ; X64-NEXT: vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9] 114 ; X64-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x18,0xc0] 115 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1] 116 ; X64-NEXT: retq ## encoding: [0xc3] 117 118 %res = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> zeroinitializer, i16 -1) 119 %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> %a1, i16 %mask) 120 %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> zeroinitializer, i16 %mask) 121 %res3 = fadd <16 x float> %res, %res1 122 %res4 = fadd <16 x float> %res2, %res3 123 ret <16 x float> %res4 124 } 125 126 declare <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double>, <8 x double>, i8) nounwind readonly 127 128 define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0, <8 x double> %a1, i8 %mask ) { 129 ; X86-LABEL: test_x86_vbroadcast_sd_pd_512: 130 ; X86: ## %bb.0: 131 ; X86-NEXT: vbroadcastsd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0xd0] 132 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 133 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 134 ; X86-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x19,0xc8] 135 ; X86-NEXT: vaddpd %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc9] 136 ; X86-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x19,0xc0] 137 ; X86-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1] 138 ; X86-NEXT: retl ## encoding: [0xc3] 139 ; 140 ; X64-LABEL: test_x86_vbroadcast_sd_pd_512: 141 ; X64: ## %bb.0: 142 ; X64-NEXT: vbroadcastsd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0xd0] 143 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 144 ; X64-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x19,0xc8] 145 ; X64-NEXT: vaddpd %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc9] 146 ; X64-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x19,0xc0] 147 ; X64-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1] 148 ; X64-NEXT: retq ## encoding: [0xc3] 149 150 %res = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> zeroinitializer, i8 -1) 151 %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> %a1, i8 %mask) 152 %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> zeroinitializer, i8 %mask) 153 %res3 = fadd <8 x double> %res, %res1 154 %res4 = fadd <8 x double> %res2, %res3 155 ret <8 x double> %res4 156 } 157 158 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16) 159 160 define <16 x i32>@test_int_x86_avx512_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) { 161 ; X86-LABEL: test_int_x86_avx512_pbroadcastd_512: 162 ; X86: ## %bb.0: 163 ; X86-NEXT: vpbroadcastd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0xd0] 164 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 165 ; X86-NEXT: vpbroadcastd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x58,0xc8] 166 ; X86-NEXT: vpbroadcastd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x58,0xc0] 167 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 168 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 169 ; X86-NEXT: retl ## encoding: [0xc3] 170 ; 171 ; X64-LABEL: test_int_x86_avx512_pbroadcastd_512: 172 ; X64: ## %bb.0: 173 ; X64-NEXT: vpbroadcastd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0xd0] 174 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 175 ; X64-NEXT: vpbroadcastd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x58,0xc8] 176 ; X64-NEXT: vpbroadcastd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x58,0xc0] 177 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 178 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 179 ; X64-NEXT: retq ## encoding: [0xc3] 180 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 -1) 181 %res1 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) 182 %res2 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask) 183 %res3 = add <16 x i32> %res, %res1 184 %res4 = add <16 x i32> %res2, %res3 185 ret <16 x i32> %res4 186 } 187 188 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8) 189 190 define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) { 191 ; X86-LABEL: test_int_x86_avx512_pbroadcastq_512: 192 ; X86: ## %bb.0: 193 ; X86-NEXT: vpbroadcastq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd0] 194 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 195 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 196 ; X86-NEXT: vpbroadcastq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x59,0xc8] 197 ; X86-NEXT: vpbroadcastq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x59,0xc0] 198 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 199 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 200 ; X86-NEXT: retl ## encoding: [0xc3] 201 ; 202 ; X64-LABEL: test_int_x86_avx512_pbroadcastq_512: 203 ; X64: ## %bb.0: 204 ; X64-NEXT: vpbroadcastq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd0] 205 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 206 ; X64-NEXT: vpbroadcastq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x59,0xc8] 207 ; X64-NEXT: vpbroadcastq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x59,0xc0] 208 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 209 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 210 ; X64-NEXT: retq ## encoding: [0xc3] 211 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 -1) 212 %res1 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 %mask) 213 %res2 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> zeroinitializer,i8 %mask) 214 %res3 = add <8 x i64> %res, %res1 215 %res4 = add <8 x i64> %res2, %res3 216 ret <8 x i64> %res4 217 } 218 219 declare <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float>, <16 x float>, i16) 220 221 define <16 x float>@test_int_x86_avx512_mask_movsldup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) { 222 ; X86-LABEL: test_int_x86_avx512_mask_movsldup_512: 223 ; X86: ## %bb.0: 224 ; X86-NEXT: vmovsldup %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0x12,0xd0] 225 ; X86-NEXT: ## zmm2 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 226 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 227 ; X86-NEXT: vmovsldup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x12,0xc8] 228 ; X86-NEXT: ## zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 229 ; X86-NEXT: vaddps %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xca] 230 ; X86-NEXT: vmovsldup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x12,0xc0] 231 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 232 ; X86-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1] 233 ; X86-NEXT: retl ## encoding: [0xc3] 234 ; 235 ; X64-LABEL: test_int_x86_avx512_mask_movsldup_512: 236 ; X64: ## %bb.0: 237 ; X64-NEXT: vmovsldup %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0x12,0xd0] 238 ; X64-NEXT: ## zmm2 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 239 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 240 ; X64-NEXT: vmovsldup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x12,0xc8] 241 ; X64-NEXT: ## zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 242 ; X64-NEXT: vaddps %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xca] 243 ; X64-NEXT: vmovsldup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x12,0xc0] 244 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 245 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1] 246 ; X64-NEXT: retq ## encoding: [0xc3] 247 %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2) 248 %res1 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 -1) 249 %res2 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2) 250 %res3 = fadd <16 x float> %res, %res1 251 %res4 = fadd <16 x float> %res2, %res3 252 ret <16 x float> %res4 253 } 254 255 declare <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float>, <16 x float>, i16) 256 257 define <16 x float>@test_int_x86_avx512_mask_movshdup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) { 258 ; X86-LABEL: test_int_x86_avx512_mask_movshdup_512: 259 ; X86: ## %bb.0: 260 ; X86-NEXT: vmovshdup %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0x16,0xd0] 261 ; X86-NEXT: ## zmm2 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 262 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 263 ; X86-NEXT: vmovshdup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x16,0xc8] 264 ; X86-NEXT: ## zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 265 ; X86-NEXT: vaddps %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xca] 266 ; X86-NEXT: vmovshdup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x16,0xc0] 267 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 268 ; X86-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1] 269 ; X86-NEXT: retl ## encoding: [0xc3] 270 ; 271 ; X64-LABEL: test_int_x86_avx512_mask_movshdup_512: 272 ; X64: ## %bb.0: 273 ; X64-NEXT: vmovshdup %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0x16,0xd0] 274 ; X64-NEXT: ## zmm2 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 275 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 276 ; X64-NEXT: vmovshdup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x16,0xc8] 277 ; X64-NEXT: ## zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 278 ; X64-NEXT: vaddps %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xca] 279 ; X64-NEXT: vmovshdup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x16,0xc0] 280 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 281 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1] 282 ; X64-NEXT: retq ## encoding: [0xc3] 283 %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2) 284 %res1 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 -1) 285 %res2 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2) 286 %res3 = fadd <16 x float> %res, %res1 287 %res4 = fadd <16 x float> %res2, %res3 288 ret <16 x float> %res4 289 } 290 291 declare <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double>, <8 x double>, i8) 292 293 define <8 x double>@test_int_x86_avx512_mask_movddup_512(<8 x double> %x0, <8 x double> %x1, i8 %x2) { 294 ; X86-LABEL: test_int_x86_avx512_mask_movddup_512: 295 ; X86: ## %bb.0: 296 ; X86-NEXT: vmovddup %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xff,0x48,0x12,0xd0] 297 ; X86-NEXT: ## zmm2 = zmm0[0,0,2,2,4,4,6,6] 298 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 299 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 300 ; X86-NEXT: vmovddup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x49,0x12,0xc8] 301 ; X86-NEXT: ## zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6] 302 ; X86-NEXT: vaddpd %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xca] 303 ; X86-NEXT: vmovddup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xc9,0x12,0xc0] 304 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] 305 ; X86-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1] 306 ; X86-NEXT: retl ## encoding: [0xc3] 307 ; 308 ; X64-LABEL: test_int_x86_avx512_mask_movddup_512: 309 ; X64: ## %bb.0: 310 ; X64-NEXT: vmovddup %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xff,0x48,0x12,0xd0] 311 ; X64-NEXT: ## zmm2 = zmm0[0,0,2,2,4,4,6,6] 312 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 313 ; X64-NEXT: vmovddup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x49,0x12,0xc8] 314 ; X64-NEXT: ## zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6] 315 ; X64-NEXT: vaddpd %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xca] 316 ; X64-NEXT: vmovddup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xc9,0x12,0xc0] 317 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] 318 ; X64-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1] 319 ; X64-NEXT: retq ## encoding: [0xc3] 320 %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 %x2) 321 %res1 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 -1) 322 %res2 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> zeroinitializer, i8 %x2) 323 %res3 = fadd <8 x double> %res, %res1 324 %res4 = fadd <8 x double> %res2, %res3 325 ret <8 x double> %res4 326 } 327 328 declare <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double>, i32, <8 x double>, i8) 329 330 define <8 x double>@test_int_x86_avx512_mask_perm_df_512(<8 x double> %x0, i32 %x1, <8 x double> %x2, i8 %x3) { 331 ; X86-LABEL: test_int_x86_avx512_mask_perm_df_512: 332 ; X86: ## %bb.0: 333 ; X86-NEXT: vpermpd $3, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x01,0xd0,0x03] 334 ; X86-NEXT: ## zmm2 = zmm0[3,0,0,0,7,4,4,4] 335 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] 336 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 337 ; X86-NEXT: vpermpd $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x01,0xc8,0x03] 338 ; X86-NEXT: ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4] 339 ; X86-NEXT: vpermpd $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x01,0xc0,0x03] 340 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4] 341 ; X86-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0] 342 ; X86-NEXT: vaddpd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc2] 343 ; X86-NEXT: retl ## encoding: [0xc3] 344 ; 345 ; X64-LABEL: test_int_x86_avx512_mask_perm_df_512: 346 ; X64: ## %bb.0: 347 ; X64-NEXT: vpermpd $3, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x01,0xd0,0x03] 348 ; X64-NEXT: ## zmm2 = zmm0[3,0,0,0,7,4,4,4] 349 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 350 ; X64-NEXT: vpermpd $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x01,0xc8,0x03] 351 ; X64-NEXT: ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4] 352 ; X64-NEXT: vpermpd $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x01,0xc0,0x03] 353 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4] 354 ; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0] 355 ; X64-NEXT: vaddpd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc2] 356 ; X64-NEXT: retq ## encoding: [0xc3] 357 %res = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> %x2, i8 %x3) 358 %res1 = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> zeroinitializer, i8 %x3) 359 %res2 = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> %x2, i8 -1) 360 %res3 = fadd <8 x double> %res, %res1 361 %res4 = fadd <8 x double> %res3, %res2 362 ret <8 x double> %res4 363 } 364 365 declare <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64>, i32, <8 x i64>, i8) 366 367 define <8 x i64>@test_int_x86_avx512_mask_perm_di_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) { 368 ; X86-LABEL: test_int_x86_avx512_mask_perm_di_512: 369 ; X86: ## %bb.0: 370 ; X86-NEXT: vpermq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x00,0xd0,0x03] 371 ; X86-NEXT: ## zmm2 = zmm0[3,0,0,0,7,4,4,4] 372 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] 373 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 374 ; X86-NEXT: vpermq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x00,0xc8,0x03] 375 ; X86-NEXT: ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4] 376 ; X86-NEXT: vpermq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x00,0xc0,0x03] 377 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4] 378 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 379 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 380 ; X86-NEXT: retl ## encoding: [0xc3] 381 ; 382 ; X64-LABEL: test_int_x86_avx512_mask_perm_di_512: 383 ; X64: ## %bb.0: 384 ; X64-NEXT: vpermq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x00,0xd0,0x03] 385 ; X64-NEXT: ## zmm2 = zmm0[3,0,0,0,7,4,4,4] 386 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 387 ; X64-NEXT: vpermq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x00,0xc8,0x03] 388 ; X64-NEXT: ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4] 389 ; X64-NEXT: vpermq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x00,0xc0,0x03] 390 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4] 391 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 392 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 393 ; X64-NEXT: retq ## encoding: [0xc3] 394 %res = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3) 395 %res1 = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3) 396 %res2 = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1) 397 %res3 = add <8 x i64> %res, %res1 398 %res4 = add <8 x i64> %res3, %res2 399 ret <8 x i64> %res4 400 } 401 402 define void @test_store1(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) { 403 ; X86-LABEL: test_store1: 404 ; X86: ## %bb.0: 405 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08] 406 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04] 407 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] 408 ; X86-NEXT: vmovups %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x11,0x01] 409 ; X86-NEXT: vmovups %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x00] 410 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 411 ; X86-NEXT: retl ## encoding: [0xc3] 412 ; 413 ; X64-LABEL: test_store1: 414 ; X64: ## %bb.0: 415 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 416 ; X64-NEXT: vmovups %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07] 417 ; X64-NEXT: vmovups %zmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x06] 418 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 419 ; X64-NEXT: retq ## encoding: [0xc3] 420 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask) 421 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr2, <16 x float> %data, i16 -1) 422 ret void 423 } 424 425 declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 ) 426 427 define void @test_store2(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) { 428 ; X86-LABEL: test_store2: 429 ; X86: ## %bb.0: 430 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08] 431 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04] 432 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c] 433 ; X86-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 434 ; X86-NEXT: vmovupd %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x11,0x01] 435 ; X86-NEXT: vmovupd %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x48,0x11,0x00] 436 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 437 ; X86-NEXT: retl ## encoding: [0xc3] 438 ; 439 ; X64-LABEL: test_store2: 440 ; X64: ## %bb.0: 441 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 442 ; X64-NEXT: vmovupd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07] 443 ; X64-NEXT: vmovupd %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x48,0x11,0x06] 444 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 445 ; X64-NEXT: retq ## encoding: [0xc3] 446 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask) 447 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr2, <8 x double> %data, i8 -1) 448 ret void 449 } 450 451 declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8) 452 453 define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) { 454 ; X86-LABEL: test_mask_store_aligned_ps: 455 ; X86: ## %bb.0: 456 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08] 457 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04] 458 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] 459 ; X86-NEXT: vmovaps %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x29,0x01] 460 ; X86-NEXT: vmovaps %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x00] 461 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 462 ; X86-NEXT: retl ## encoding: [0xc3] 463 ; 464 ; X64-LABEL: test_mask_store_aligned_ps: 465 ; X64: ## %bb.0: 466 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 467 ; X64-NEXT: vmovaps %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x29,0x07] 468 ; X64-NEXT: vmovaps %zmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x06] 469 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 470 ; X64-NEXT: retq ## encoding: [0xc3] 471 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask) 472 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr2, <16 x float> %data, i16 -1) 473 ret void 474 } 475 476 declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 ) 477 478 define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) { 479 ; X86-LABEL: test_mask_store_aligned_pd: 480 ; X86: ## %bb.0: 481 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08] 482 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04] 483 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c] 484 ; X86-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 485 ; X86-NEXT: vmovapd %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x29,0x01] 486 ; X86-NEXT: vmovapd %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x48,0x29,0x00] 487 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 488 ; X86-NEXT: retl ## encoding: [0xc3] 489 ; 490 ; X64-LABEL: test_mask_store_aligned_pd: 491 ; X64: ## %bb.0: 492 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 493 ; X64-NEXT: vmovapd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x29,0x07] 494 ; X64-NEXT: vmovapd %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x48,0x29,0x06] 495 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 496 ; X64-NEXT: retq ## encoding: [0xc3] 497 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask) 498 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr2, <8 x double> %data, i8 -1) 499 ret void 500 } 501 502 declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8) 503 504 define void@test_int_x86_avx512_mask_storeu_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) { 505 ; X86-LABEL: test_int_x86_avx512_mask_storeu_q_512: 506 ; X86: ## %bb.0: 507 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08] 508 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04] 509 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c] 510 ; X86-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 511 ; X86-NEXT: vmovdqu64 %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x7f,0x01] 512 ; X86-NEXT: vmovdqu64 %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00] 513 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 514 ; X86-NEXT: retl ## encoding: [0xc3] 515 ; 516 ; X64-LABEL: test_int_x86_avx512_mask_storeu_q_512: 517 ; X64: ## %bb.0: 518 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 519 ; X64-NEXT: vmovdqu64 %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x7f,0x07] 520 ; X64-NEXT: vmovdqu64 %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06] 521 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 522 ; X64-NEXT: retq ## encoding: [0xc3] 523 call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2) 524 call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1) 525 ret void 526 } 527 528 declare void @llvm.x86.avx512.mask.storeu.q.512(i8*, <8 x i64>, i8) 529 530 define void@test_int_x86_avx512_mask_storeu_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) { 531 ; X86-LABEL: test_int_x86_avx512_mask_storeu_d_512: 532 ; X86: ## %bb.0: 533 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08] 534 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04] 535 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] 536 ; X86-NEXT: vmovdqu32 %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x7f,0x01] 537 ; X86-NEXT: vmovdqu64 %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00] 538 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 539 ; X86-NEXT: retl ## encoding: [0xc3] 540 ; 541 ; X64-LABEL: test_int_x86_avx512_mask_storeu_d_512: 542 ; X64: ## %bb.0: 543 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 544 ; X64-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x7f,0x07] 545 ; X64-NEXT: vmovdqu64 %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06] 546 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 547 ; X64-NEXT: retq ## encoding: [0xc3] 548 call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2) 549 call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1) 550 ret void 551 } 552 553 declare void @llvm.x86.avx512.mask.storeu.d.512(i8*, <16 x i32>, i16) 554 555 define void@test_int_x86_avx512_mask_store_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) { 556 ; X86-LABEL: test_int_x86_avx512_mask_store_q_512: 557 ; X86: ## %bb.0: 558 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08] 559 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04] 560 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c] 561 ; X86-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 562 ; X86-NEXT: vmovdqa64 %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x7f,0x01] 563 ; X86-NEXT: vmovdqa64 %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x00] 564 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 565 ; X86-NEXT: retl ## encoding: [0xc3] 566 ; 567 ; X64-LABEL: test_int_x86_avx512_mask_store_q_512: 568 ; X64: ## %bb.0: 569 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 570 ; X64-NEXT: vmovdqa64 %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x7f,0x07] 571 ; X64-NEXT: vmovdqa64 %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x06] 572 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 573 ; X64-NEXT: retq ## encoding: [0xc3] 574 call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2) 575 call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1) 576 ret void 577 } 578 579 declare void @llvm.x86.avx512.mask.store.q.512(i8*, <8 x i64>, i8) 580 581 define void@test_int_x86_avx512_mask_store_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) { 582 ; X86-LABEL: test_int_x86_avx512_mask_store_d_512: 583 ; X86: ## %bb.0: 584 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08] 585 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04] 586 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] 587 ; X86-NEXT: vmovdqa32 %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x7f,0x01] 588 ; X86-NEXT: vmovdqa64 %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x00] 589 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 590 ; X86-NEXT: retl ## encoding: [0xc3] 591 ; 592 ; X64-LABEL: test_int_x86_avx512_mask_store_d_512: 593 ; X64: ## %bb.0: 594 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 595 ; X64-NEXT: vmovdqa32 %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x7f,0x07] 596 ; X64-NEXT: vmovdqa64 %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x06] 597 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 598 ; X64-NEXT: retq ## encoding: [0xc3] 599 call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2) 600 call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1) 601 ret void 602 } 603 604 declare void @llvm.x86.avx512.mask.store.d.512(i8*, <16 x i32>, i16) 605 606 define <16 x float> @test_mask_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) { 607 ; X86-LABEL: test_mask_load_aligned_ps: 608 ; X86: ## %bb.0: 609 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 610 ; X86-NEXT: vmovaps (%eax), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x00] 611 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 612 ; X86-NEXT: vmovaps (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0x00] 613 ; X86-NEXT: vmovaps (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x08] 614 ; X86-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0] 615 ; X86-NEXT: retl ## encoding: [0xc3] 616 ; 617 ; X64-LABEL: test_mask_load_aligned_ps: 618 ; X64: ## %bb.0: 619 ; X64-NEXT: vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07] 620 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 621 ; X64-NEXT: vmovaps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0x07] 622 ; X64-NEXT: vmovaps (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x0f] 623 ; X64-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0] 624 ; X64-NEXT: retq ## encoding: [0xc3] 625 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1) 626 %res1 = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> %res, i16 %mask) 627 %res2 = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask) 628 %res4 = fadd <16 x float> %res2, %res1 629 ret <16 x float> %res4 630 } 631 632 declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16) 633 634 define <16 x float> @test_mask_load_unaligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) { 635 ; X86-LABEL: test_mask_load_unaligned_ps: 636 ; X86: ## %bb.0: 637 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 638 ; X86-NEXT: vmovups (%eax), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x00] 639 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 640 ; X86-NEXT: vmovups (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x10,0x00] 641 ; X86-NEXT: vmovups (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x10,0x08] 642 ; X86-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0] 643 ; X86-NEXT: retl ## encoding: [0xc3] 644 ; 645 ; X64-LABEL: test_mask_load_unaligned_ps: 646 ; X64: ## %bb.0: 647 ; X64-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07] 648 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 649 ; X64-NEXT: vmovups (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x10,0x07] 650 ; X64-NEXT: vmovups (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x10,0x0f] 651 ; X64-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0] 652 ; X64-NEXT: retq ## encoding: [0xc3] 653 %res = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1) 654 %res1 = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> %res, i16 %mask) 655 %res2 = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask) 656 %res4 = fadd <16 x float> %res2, %res1 657 ret <16 x float> %res4 658 } 659 660 declare <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8*, <16 x float>, i16) 661 662 define <8 x double> @test_mask_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) { 663 ; X86-LABEL: test_mask_load_aligned_pd: 664 ; X86: ## %bb.0: 665 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 666 ; X86-NEXT: vmovapd (%eax), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0x00] 667 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 668 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 669 ; X86-NEXT: vmovapd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x28,0x00] 670 ; X86-NEXT: vmovapd (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x28,0x08] 671 ; X86-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0] 672 ; X86-NEXT: retl ## encoding: [0xc3] 673 ; 674 ; X64-LABEL: test_mask_load_aligned_pd: 675 ; X64: ## %bb.0: 676 ; X64-NEXT: vmovapd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0x07] 677 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 678 ; X64-NEXT: vmovapd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x28,0x07] 679 ; X64-NEXT: vmovapd (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x28,0x0f] 680 ; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0] 681 ; X64-NEXT: retq ## encoding: [0xc3] 682 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1) 683 %res1 = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> %res, i8 %mask) 684 %res2 = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask) 685 %res4 = fadd <8 x double> %res2, %res1 686 ret <8 x double> %res4 687 } 688 689 declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8) 690 691 define <8 x double> @test_mask_load_unaligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) { 692 ; X86-LABEL: test_mask_load_unaligned_pd: 693 ; X86: ## %bb.0: 694 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 695 ; X86-NEXT: vmovupd (%eax), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x00] 696 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 697 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 698 ; X86-NEXT: vmovupd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x10,0x00] 699 ; X86-NEXT: vmovupd (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x10,0x08] 700 ; X86-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0] 701 ; X86-NEXT: retl ## encoding: [0xc3] 702 ; 703 ; X64-LABEL: test_mask_load_unaligned_pd: 704 ; X64: ## %bb.0: 705 ; X64-NEXT: vmovupd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x07] 706 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 707 ; X64-NEXT: vmovupd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x10,0x07] 708 ; X64-NEXT: vmovupd (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x10,0x0f] 709 ; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0] 710 ; X64-NEXT: retq ## encoding: [0xc3] 711 %res = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1) 712 %res1 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> %res, i8 %mask) 713 %res2 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask) 714 %res4 = fadd <8 x double> %res2, %res1 715 ret <8 x double> %res4 716 } 717 718 declare <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8*, <8 x double>, i8) 719 720 declare <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8*, <16 x i32>, i16) 721 722 define <16 x i32> @test_mask_load_unaligned_d(i8* %ptr, i8* %ptr2, <16 x i32> %data, i16 %mask) { 723 ; X86-LABEL: test_mask_load_unaligned_d: 724 ; X86: ## %bb.0: 725 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08] 726 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04] 727 ; X86-NEXT: vmovdqu64 (%ecx), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01] 728 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] 729 ; X86-NEXT: vmovdqu32 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x6f,0x00] 730 ; X86-NEXT: vmovdqu32 (%ecx), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x6f,0x09] 731 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 732 ; X86-NEXT: retl ## encoding: [0xc3] 733 ; 734 ; X64-LABEL: test_mask_load_unaligned_d: 735 ; X64: ## %bb.0: 736 ; X64-NEXT: vmovdqu64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07] 737 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 738 ; X64-NEXT: vmovdqu32 (%rsi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x6f,0x06] 739 ; X64-NEXT: vmovdqu32 (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x6f,0x0f] 740 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 741 ; X64-NEXT: retq ## encoding: [0xc3] 742 %res = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 -1) 743 %res1 = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8* %ptr2, <16 x i32> %res, i16 %mask) 744 %res2 = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 %mask) 745 %res4 = add <16 x i32> %res2, %res1 746 ret <16 x i32> %res4 747 } 748 749 declare <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8*, <8 x i64>, i8) 750 751 define <8 x i64> @test_mask_load_unaligned_q(i8* %ptr, i8* %ptr2, <8 x i64> %data, i8 %mask) { 752 ; X86-LABEL: test_mask_load_unaligned_q: 753 ; X86: ## %bb.0: 754 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08] 755 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04] 756 ; X86-NEXT: vmovdqu64 (%ecx), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01] 757 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c] 758 ; X86-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 759 ; X86-NEXT: vmovdqu64 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x6f,0x00] 760 ; X86-NEXT: vmovdqu64 (%ecx), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xc9,0x6f,0x09] 761 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 762 ; X86-NEXT: retl ## encoding: [0xc3] 763 ; 764 ; X64-LABEL: test_mask_load_unaligned_q: 765 ; X64: ## %bb.0: 766 ; X64-NEXT: vmovdqu64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07] 767 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 768 ; X64-NEXT: vmovdqu64 (%rsi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x6f,0x06] 769 ; X64-NEXT: vmovdqu64 (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xc9,0x6f,0x0f] 770 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 771 ; X64-NEXT: retq ## encoding: [0xc3] 772 %res = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 -1) 773 %res1 = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8* %ptr2, <8 x i64> %res, i8 %mask) 774 %res2 = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 %mask) 775 %res4 = add <8 x i64> %res2, %res1 776 ret <8 x i64> %res4 777 } 778 779 declare <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8*, <16 x i32>, i16) 780 781 define <16 x i32> @test_mask_load_aligned_d(<16 x i32> %data, i8* %ptr, i16 %mask) { 782 ; X86-LABEL: test_mask_load_aligned_d: 783 ; X86: ## %bb.0: 784 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 785 ; X86-NEXT: vmovdqa64 (%eax), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x00] 786 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 787 ; X86-NEXT: vmovdqa32 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0x00] 788 ; X86-NEXT: vmovdqa32 (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0x08] 789 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 790 ; X86-NEXT: retl ## encoding: [0xc3] 791 ; 792 ; X64-LABEL: test_mask_load_aligned_d: 793 ; X64: ## %bb.0: 794 ; X64-NEXT: vmovdqa64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07] 795 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 796 ; X64-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0x07] 797 ; X64-NEXT: vmovdqa32 (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0x0f] 798 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 799 ; X64-NEXT: retq ## encoding: [0xc3] 800 %res = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 -1) 801 %res1 = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8* %ptr, <16 x i32> %res, i16 %mask) 802 %res2 = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 %mask) 803 %res4 = add <16 x i32> %res2, %res1 804 ret <16 x i32> %res4 805 } 806 807 declare <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8*, <8 x i64>, i8) 808 809 define <8 x i64> @test_mask_load_aligned_q(<8 x i64> %data, i8* %ptr, i8 %mask) { 810 ; X86-LABEL: test_mask_load_aligned_q: 811 ; X86: ## %bb.0: 812 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 813 ; X86-NEXT: vmovdqa64 (%eax), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x00] 814 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 815 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 816 ; X86-NEXT: vmovdqa64 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0x00] 817 ; X86-NEXT: vmovdqa64 (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0x08] 818 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 819 ; X86-NEXT: retl ## encoding: [0xc3] 820 ; 821 ; X64-LABEL: test_mask_load_aligned_q: 822 ; X64: ## %bb.0: 823 ; X64-NEXT: vmovdqa64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07] 824 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 825 ; X64-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0x07] 826 ; X64-NEXT: vmovdqa64 (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0x0f] 827 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 828 ; X64-NEXT: retq ## encoding: [0xc3] 829 %res = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 -1) 830 %res1 = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8* %ptr, <8 x i64> %res, i8 %mask) 831 %res2 = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 %mask) 832 %res4 = add <8 x i64> %res2, %res1 833 ret <8 x i64> %res4 834 } 835 836 declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8) 837 838 define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) { 839 ; X86-LABEL: test_int_x86_avx512_mask_vpermil_pd_512: 840 ; X86: ## %bb.0: 841 ; X86-NEXT: vpermilpd $22, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x05,0xd0,0x16] 842 ; X86-NEXT: ## zmm2 = zmm0[0,1,3,2,5,4,6,6] 843 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 844 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 845 ; X86-NEXT: vpermilpd $22, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x05,0xc8,0x16] 846 ; X86-NEXT: ## zmm1 {%k1} = zmm0[0,1,3,2,5,4,6,6] 847 ; X86-NEXT: vpermilpd $22, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x05,0xc0,0x16] 848 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[0,1,3,2,5,4,6,6] 849 ; X86-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0] 850 ; X86-NEXT: vaddpd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc2] 851 ; X86-NEXT: retl ## encoding: [0xc3] 852 ; 853 ; X64-LABEL: test_int_x86_avx512_mask_vpermil_pd_512: 854 ; X64: ## %bb.0: 855 ; X64-NEXT: vpermilpd $22, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x05,0xd0,0x16] 856 ; X64-NEXT: ## zmm2 = zmm0[0,1,3,2,5,4,6,6] 857 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 858 ; X64-NEXT: vpermilpd $22, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x05,0xc8,0x16] 859 ; X64-NEXT: ## zmm1 {%k1} = zmm0[0,1,3,2,5,4,6,6] 860 ; X64-NEXT: vpermilpd $22, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x05,0xc0,0x16] 861 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[0,1,3,2,5,4,6,6] 862 ; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0] 863 ; X64-NEXT: vaddpd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc2] 864 ; X64-NEXT: retq ## encoding: [0xc3] 865 %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3) 866 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3) 867 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1) 868 %res3 = fadd <8 x double> %res, %res1 869 %res4 = fadd <8 x double> %res3, %res2 870 ret <8 x double> %res4 871 } 872 873 declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16) 874 875 define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) { 876 ; X86-LABEL: test_int_x86_avx512_mask_vpermil_ps_512: 877 ; X86: ## %bb.0: 878 ; X86-NEXT: vpermilps $22, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0x7d,0x48,0x04,0xd0,0x16] 879 ; X86-NEXT: ## zmm2 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] 880 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 881 ; X86-NEXT: vpermilps $22, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x04,0xc8,0x16] 882 ; X86-NEXT: ## zmm1 {%k1} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] 883 ; X86-NEXT: vpermilps $22, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x04,0xc0,0x16] 884 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] 885 ; X86-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0] 886 ; X86-NEXT: vaddps %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc2] 887 ; X86-NEXT: retl ## encoding: [0xc3] 888 ; 889 ; X64-LABEL: test_int_x86_avx512_mask_vpermil_ps_512: 890 ; X64: ## %bb.0: 891 ; X64-NEXT: vpermilps $22, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0x7d,0x48,0x04,0xd0,0x16] 892 ; X64-NEXT: ## zmm2 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] 893 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 894 ; X64-NEXT: vpermilps $22, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x04,0xc8,0x16] 895 ; X64-NEXT: ## zmm1 {%k1} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] 896 ; X64-NEXT: vpermilps $22, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x04,0xc0,0x16] 897 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] 898 ; X64-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0] 899 ; X64-NEXT: vaddps %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc2] 900 ; X64-NEXT: retq ## encoding: [0xc3] 901 %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3) 902 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3) 903 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1) 904 %res3 = fadd <16 x float> %res, %res1 905 %res4 = fadd <16 x float> %res3, %res2 906 ret <16 x float> %res4 907 } 908 909 declare <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32>, i32, <16 x i32>, i16) 910 911 define <16 x i32>@test_int_x86_avx512_mask_pshuf_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { 912 ; X86-LABEL: test_int_x86_avx512_mask_pshuf_d_512: 913 ; X86: ## %bb.0: 914 ; X86-NEXT: vpshufd $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x7d,0x48,0x70,0xd0,0x03] 915 ; X86-NEXT: ## zmm2 = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12] 916 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 917 ; X86-NEXT: vpshufd $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x70,0xc8,0x03] 918 ; X86-NEXT: ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12] 919 ; X86-NEXT: vpshufd $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x70,0xc0,0x03] 920 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12] 921 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 922 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 923 ; X86-NEXT: retl ## encoding: [0xc3] 924 ; 925 ; X64-LABEL: test_int_x86_avx512_mask_pshuf_d_512: 926 ; X64: ## %bb.0: 927 ; X64-NEXT: vpshufd $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x7d,0x48,0x70,0xd0,0x03] 928 ; X64-NEXT: ## zmm2 = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12] 929 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 930 ; X64-NEXT: vpshufd $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x70,0xc8,0x03] 931 ; X64-NEXT: ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12] 932 ; X64-NEXT: vpshufd $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x70,0xc0,0x03] 933 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12] 934 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 935 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 936 ; X64-NEXT: retq ## encoding: [0xc3] 937 %res = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3) 938 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3) 939 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1) 940 %res3 = add <16 x i32> %res, %res1 941 %res4 = add <16 x i32> %res3, %res2 942 ret <16 x i32> %res4 943 } 944 945 define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) { 946 ; CHECK-LABEL: test_pcmpeq_d: 947 ; CHECK: ## %bb.0: 948 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1] 949 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 950 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax 951 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 952 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 953 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) 954 ret i16 %res 955 } 956 957 define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 958 ; X86-LABEL: test_mask_pcmpeq_d: 959 ; X86: ## %bb.0: 960 ; X86-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1] 961 ; X86-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 962 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x23,0x44,0x24,0x04] 963 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax 964 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 965 ; X86-NEXT: retl ## encoding: [0xc3] 966 ; 967 ; X64-LABEL: test_mask_pcmpeq_d: 968 ; X64: ## %bb.0: 969 ; X64-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1] 970 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 971 ; X64-NEXT: andl %edi, %eax ## encoding: [0x21,0xf8] 972 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax 973 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 974 ; X64-NEXT: retq ## encoding: [0xc3] 975 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) 976 ret i16 %res 977 } 978 979 declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16) 980 981 define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) { 982 ; CHECK-LABEL: test_pcmpeq_q: 983 ; CHECK: ## %bb.0: 984 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1] 985 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 986 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax 987 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 988 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 989 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) 990 ret i8 %res 991 } 992 993 define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 994 ; X86-LABEL: test_mask_pcmpeq_q: 995 ; X86: ## %bb.0: 996 ; X86-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1] 997 ; X86-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 998 ; X86-NEXT: andb {{[0-9]+}}(%esp), %al ## encoding: [0x22,0x44,0x24,0x04] 999 ; X86-NEXT: ## kill: def $al killed $al killed $eax 1000 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1001 ; X86-NEXT: retl ## encoding: [0xc3] 1002 ; 1003 ; X64-LABEL: test_mask_pcmpeq_q: 1004 ; X64: ## %bb.0: 1005 ; X64-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1] 1006 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1007 ; X64-NEXT: andb %dil, %al ## encoding: [0x40,0x20,0xf8] 1008 ; X64-NEXT: ## kill: def $al killed $al killed $eax 1009 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1010 ; X64-NEXT: retq ## encoding: [0xc3] 1011 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 1012 ret i8 %res 1013 } 1014 1015 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8) 1016 1017 define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) { 1018 ; CHECK-LABEL: test_pcmpgt_d: 1019 ; CHECK: ## %bb.0: 1020 ; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1] 1021 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1022 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax 1023 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1024 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1025 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) 1026 ret i16 %res 1027 } 1028 1029 define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 1030 ; X86-LABEL: test_mask_pcmpgt_d: 1031 ; X86: ## %bb.0: 1032 ; X86-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1] 1033 ; X86-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1034 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x23,0x44,0x24,0x04] 1035 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax 1036 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1037 ; X86-NEXT: retl ## encoding: [0xc3] 1038 ; 1039 ; X64-LABEL: test_mask_pcmpgt_d: 1040 ; X64: ## %bb.0: 1041 ; X64-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1] 1042 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1043 ; X64-NEXT: andl %edi, %eax ## encoding: [0x21,0xf8] 1044 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax 1045 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1046 ; X64-NEXT: retq ## encoding: [0xc3] 1047 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) 1048 ret i16 %res 1049 } 1050 1051 declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16) 1052 1053 define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) { 1054 ; CHECK-LABEL: test_pcmpgt_q: 1055 ; CHECK: ## %bb.0: 1056 ; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1] 1057 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1058 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax 1059 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1060 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1061 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) 1062 ret i8 %res 1063 } 1064 1065 define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 1066 ; X86-LABEL: test_mask_pcmpgt_q: 1067 ; X86: ## %bb.0: 1068 ; X86-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1] 1069 ; X86-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1070 ; X86-NEXT: andb {{[0-9]+}}(%esp), %al ## encoding: [0x22,0x44,0x24,0x04] 1071 ; X86-NEXT: ## kill: def $al killed $al killed $eax 1072 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1073 ; X86-NEXT: retl ## encoding: [0xc3] 1074 ; 1075 ; X64-LABEL: test_mask_pcmpgt_q: 1076 ; X64: ## %bb.0: 1077 ; X64-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1] 1078 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1079 ; X64-NEXT: andb %dil, %al ## encoding: [0x40,0x20,0xf8] 1080 ; X64-NEXT: ## kill: def $al killed $al killed $eax 1081 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1082 ; X64-NEXT: retq ## encoding: [0xc3] 1083 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 1084 ret i8 %res 1085 } 1086 1087 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8) 1088 1089 declare <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double>, <8 x double>, <8 x double>, i8) 1090 1091 define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) { 1092 ; X86-LABEL: test_int_x86_avx512_mask_unpckh_pd_512: 1093 ; X86: ## %bb.0: 1094 ; X86-NEXT: vunpckhpd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x15,0xd9] 1095 ; X86-NEXT: ## zmm3 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 1096 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1097 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1098 ; X86-NEXT: vunpckhpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x15,0xd1] 1099 ; X86-NEXT: ## zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 1100 ; X86-NEXT: vaddpd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc3] 1101 ; X86-NEXT: retl ## encoding: [0xc3] 1102 ; 1103 ; X64-LABEL: test_int_x86_avx512_mask_unpckh_pd_512: 1104 ; X64: ## %bb.0: 1105 ; X64-NEXT: vunpckhpd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x15,0xd9] 1106 ; X64-NEXT: ## zmm3 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 1107 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1108 ; X64-NEXT: vunpckhpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x15,0xd1] 1109 ; X64-NEXT: ## zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 1110 ; X64-NEXT: vaddpd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc3] 1111 ; X64-NEXT: retq ## encoding: [0xc3] 1112 %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) 1113 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1) 1114 %res2 = fadd <8 x double> %res, %res1 1115 ret <8 x double> %res2 1116 } 1117 1118 declare <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) 1119 1120 define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { 1121 ; X86-LABEL: test_int_x86_avx512_mask_unpckh_ps_512: 1122 ; X86: ## %bb.0: 1123 ; X86-NEXT: vunpckhps %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x15,0xd9] 1124 ; X86-NEXT: ## zmm3 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 1125 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1126 ; X86-NEXT: vunpckhps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x15,0xd1] 1127 ; X86-NEXT: ## zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 1128 ; X86-NEXT: vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3] 1129 ; X86-NEXT: retl ## encoding: [0xc3] 1130 ; 1131 ; X64-LABEL: test_int_x86_avx512_mask_unpckh_ps_512: 1132 ; X64: ## %bb.0: 1133 ; X64-NEXT: vunpckhps %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x15,0xd9] 1134 ; X64-NEXT: ## zmm3 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 1135 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1136 ; X64-NEXT: vunpckhps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x15,0xd1] 1137 ; X64-NEXT: ## zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 1138 ; X64-NEXT: vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3] 1139 ; X64-NEXT: retq ## encoding: [0xc3] 1140 %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) 1141 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1) 1142 %res2 = fadd <16 x float> %res, %res1 1143 ret <16 x float> %res2 1144 } 1145 1146 declare <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double>, <8 x double>, <8 x double>, i8) 1147 1148 define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) { 1149 ; X86-LABEL: test_int_x86_avx512_mask_unpckl_pd_512: 1150 ; X86: ## %bb.0: 1151 ; X86-NEXT: vunpcklpd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x14,0xd9] 1152 ; X86-NEXT: ## zmm3 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1153 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1154 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1155 ; X86-NEXT: vunpcklpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x14,0xd1] 1156 ; X86-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1157 ; X86-NEXT: vaddpd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc3] 1158 ; X86-NEXT: retl ## encoding: [0xc3] 1159 ; 1160 ; X64-LABEL: test_int_x86_avx512_mask_unpckl_pd_512: 1161 ; X64: ## %bb.0: 1162 ; X64-NEXT: vunpcklpd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x14,0xd9] 1163 ; X64-NEXT: ## zmm3 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1164 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1165 ; X64-NEXT: vunpcklpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x14,0xd1] 1166 ; X64-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1167 ; X64-NEXT: vaddpd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc3] 1168 ; X64-NEXT: retq ## encoding: [0xc3] 1169 %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) 1170 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1) 1171 %res2 = fadd <8 x double> %res, %res1 1172 ret <8 x double> %res2 1173 } 1174 1175 declare <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) 1176 1177 define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { 1178 ; X86-LABEL: test_int_x86_avx512_mask_unpckl_ps_512: 1179 ; X86: ## %bb.0: 1180 ; X86-NEXT: vunpcklps %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x14,0xd9] 1181 ; X86-NEXT: ## zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 1182 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1183 ; X86-NEXT: vunpcklps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x14,0xd1] 1184 ; X86-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 1185 ; X86-NEXT: vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3] 1186 ; X86-NEXT: retl ## encoding: [0xc3] 1187 ; 1188 ; X64-LABEL: test_int_x86_avx512_mask_unpckl_ps_512: 1189 ; X64: ## %bb.0: 1190 ; X64-NEXT: vunpcklps %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x14,0xd9] 1191 ; X64-NEXT: ## zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 1192 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1193 ; X64-NEXT: vunpcklps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x14,0xd1] 1194 ; X64-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 1195 ; X64-NEXT: vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3] 1196 ; X64-NEXT: retq ## encoding: [0xc3] 1197 %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) 1198 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1) 1199 %res2 = fadd <16 x float> %res, %res1 1200 ret <16 x float> %res2 1201 } 1202 1203 declare <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 1204 1205 define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 1206 ; X86-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512: 1207 ; X86: ## %bb.0: 1208 ; X86-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6c,0xd9] 1209 ; X86-NEXT: ## zmm3 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1210 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1211 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1212 ; X86-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6c,0xd1] 1213 ; X86-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1214 ; X86-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6c,0xc1] 1215 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1216 ; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] 1217 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 1218 ; X86-NEXT: retl ## encoding: [0xc3] 1219 ; 1220 ; X64-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512: 1221 ; X64: ## %bb.0: 1222 ; X64-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6c,0xd9] 1223 ; X64-NEXT: ## zmm3 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1224 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1225 ; X64-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6c,0xd1] 1226 ; X64-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1227 ; X64-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6c,0xc1] 1228 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 1229 ; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] 1230 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 1231 ; X64-NEXT: retq ## encoding: [0xc3] 1232 %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 1233 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 1234 %res2 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer,i8 %x3) 1235 %res3 = add <8 x i64> %res, %res1 1236 %res4 = add <8 x i64> %res2, %res3 1237 ret <8 x i64> %res4 1238 } 1239 1240 declare <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 1241 1242 define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 1243 ; X86-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512: 1244 ; X86: ## %bb.0: 1245 ; X86-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6d,0xd9] 1246 ; X86-NEXT: ## zmm3 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 1247 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1248 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1249 ; X86-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6d,0xd1] 1250 ; X86-NEXT: ## zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 1251 ; X86-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3] 1252 ; X86-NEXT: retl ## encoding: [0xc3] 1253 ; 1254 ; X64-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512: 1255 ; X64: ## %bb.0: 1256 ; X64-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6d,0xd9] 1257 ; X64-NEXT: ## zmm3 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 1258 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1259 ; X64-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6d,0xd1] 1260 ; X64-NEXT: ## zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 1261 ; X64-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3] 1262 ; X64-NEXT: retq ## encoding: [0xc3] 1263 %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 1264 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 1265 %res2 = add <8 x i64> %res, %res1 1266 ret <8 x i64> %res2 1267 } 1268 1269 declare <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 1270 1271 define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 1272 ; X86-LABEL: test_int_x86_avx512_mask_punpckhd_q_512: 1273 ; X86: ## %bb.0: 1274 ; X86-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7d,0x48,0x6a,0xd9] 1275 ; X86-NEXT: ## zmm3 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 1276 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1277 ; X86-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6a,0xd1] 1278 ; X86-NEXT: ## zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 1279 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 1280 ; X86-NEXT: retl ## encoding: [0xc3] 1281 ; 1282 ; X64-LABEL: test_int_x86_avx512_mask_punpckhd_q_512: 1283 ; X64: ## %bb.0: 1284 ; X64-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7d,0x48,0x6a,0xd9] 1285 ; X64-NEXT: ## zmm3 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 1286 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1287 ; X64-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6a,0xd1] 1288 ; X64-NEXT: ## zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 1289 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 1290 ; X64-NEXT: retq ## encoding: [0xc3] 1291 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 1292 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 1293 %res2 = add <16 x i32> %res, %res1 1294 ret <16 x i32> %res2 1295 } 1296 1297 declare <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 1298 1299 define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 1300 ; X86-LABEL: test_int_x86_avx512_mask_punpckld_q_512: 1301 ; X86: ## %bb.0: 1302 ; X86-NEXT: vpunpckldq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7d,0x48,0x62,0xd9] 1303 ; X86-NEXT: ## zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 1304 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1305 ; X86-NEXT: vpunpckldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x62,0xd1] 1306 ; X86-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 1307 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 1308 ; X86-NEXT: retl ## encoding: [0xc3] 1309 ; 1310 ; X64-LABEL: test_int_x86_avx512_mask_punpckld_q_512: 1311 ; X64: ## %bb.0: 1312 ; X64-NEXT: vpunpckldq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7d,0x48,0x62,0xd9] 1313 ; X64-NEXT: ## zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 1314 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1315 ; X64-NEXT: vpunpckldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x62,0xd1] 1316 ; X64-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 1317 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 1318 ; X64-NEXT: retq ## encoding: [0xc3] 1319 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 1320 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 1321 %res2 = add <16 x i32> %res, %res1 1322 ret <16 x i32> %res2 1323 } 1324 1325 define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) { 1326 ; CHECK-LABEL: test_x86_avx512_pslli_d: 1327 ; CHECK: ## %bb.0: 1328 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xf0,0x07] 1329 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1330 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) 1331 ret <16 x i32> %res 1332 } 1333 1334 define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1335 ; X86-LABEL: test_x86_avx512_mask_pslli_d: 1336 ; X86: ## %bb.0: 1337 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1338 ; X86-NEXT: vpslld $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x07] 1339 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1340 ; X86-NEXT: retl ## encoding: [0xc3] 1341 ; 1342 ; X64-LABEL: test_x86_avx512_mask_pslli_d: 1343 ; X64: ## %bb.0: 1344 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1345 ; X64-NEXT: vpslld $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x07] 1346 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1347 ; X64-NEXT: retq ## encoding: [0xc3] 1348 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) 1349 ret <16 x i32> %res 1350 } 1351 1352 define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) { 1353 ; X86-LABEL: test_x86_avx512_maskz_pslli_d: 1354 ; X86: ## %bb.0: 1355 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1356 ; X86-NEXT: vpslld $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xf0,0x07] 1357 ; X86-NEXT: retl ## encoding: [0xc3] 1358 ; 1359 ; X64-LABEL: test_x86_avx512_maskz_pslli_d: 1360 ; X64: ## %bb.0: 1361 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1362 ; X64-NEXT: vpslld $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xf0,0x07] 1363 ; X64-NEXT: retq ## encoding: [0xc3] 1364 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) 1365 ret <16 x i32> %res 1366 } 1367 1368 declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone 1369 1370 define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) { 1371 ; CHECK-LABEL: test_x86_avx512_pslli_q: 1372 ; CHECK: ## %bb.0: 1373 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x73,0xf0,0x07] 1374 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1375 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) 1376 ret <8 x i64> %res 1377 } 1378 1379 define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1380 ; X86-LABEL: test_x86_avx512_mask_pslli_q: 1381 ; X86: ## %bb.0: 1382 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1383 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1384 ; X86-NEXT: vpsllq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x07] 1385 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1386 ; X86-NEXT: retl ## encoding: [0xc3] 1387 ; 1388 ; X64-LABEL: test_x86_avx512_mask_pslli_q: 1389 ; X64: ## %bb.0: 1390 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1391 ; X64-NEXT: vpsllq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x07] 1392 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1393 ; X64-NEXT: retq ## encoding: [0xc3] 1394 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) 1395 ret <8 x i64> %res 1396 } 1397 1398 define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) { 1399 ; X86-LABEL: test_x86_avx512_maskz_pslli_q: 1400 ; X86: ## %bb.0: 1401 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1402 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1403 ; X86-NEXT: vpsllq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xf0,0x07] 1404 ; X86-NEXT: retl ## encoding: [0xc3] 1405 ; 1406 ; X64-LABEL: test_x86_avx512_maskz_pslli_q: 1407 ; X64: ## %bb.0: 1408 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1409 ; X64-NEXT: vpsllq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xf0,0x07] 1410 ; X64-NEXT: retq ## encoding: [0xc3] 1411 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) 1412 ret <8 x i64> %res 1413 } 1414 1415 declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone 1416 1417 define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) { 1418 ; CHECK-LABEL: test_x86_avx512_psrli_d: 1419 ; CHECK: ## %bb.0: 1420 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xd0,0x07] 1421 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1422 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) 1423 ret <16 x i32> %res 1424 } 1425 1426 define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1427 ; X86-LABEL: test_x86_avx512_mask_psrli_d: 1428 ; X86: ## %bb.0: 1429 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1430 ; X86-NEXT: vpsrld $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x07] 1431 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1432 ; X86-NEXT: retl ## encoding: [0xc3] 1433 ; 1434 ; X64-LABEL: test_x86_avx512_mask_psrli_d: 1435 ; X64: ## %bb.0: 1436 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1437 ; X64-NEXT: vpsrld $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x07] 1438 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1439 ; X64-NEXT: retq ## encoding: [0xc3] 1440 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) 1441 ret <16 x i32> %res 1442 } 1443 1444 define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) { 1445 ; X86-LABEL: test_x86_avx512_maskz_psrli_d: 1446 ; X86: ## %bb.0: 1447 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1448 ; X86-NEXT: vpsrld $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x07] 1449 ; X86-NEXT: retl ## encoding: [0xc3] 1450 ; 1451 ; X64-LABEL: test_x86_avx512_maskz_psrli_d: 1452 ; X64: ## %bb.0: 1453 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1454 ; X64-NEXT: vpsrld $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x07] 1455 ; X64-NEXT: retq ## encoding: [0xc3] 1456 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) 1457 ret <16 x i32> %res 1458 } 1459 1460 declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone 1461 1462 define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) { 1463 ; CHECK-LABEL: test_x86_avx512_psrli_q: 1464 ; CHECK: ## %bb.0: 1465 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x73,0xd0,0x07] 1466 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1467 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) 1468 ret <8 x i64> %res 1469 } 1470 1471 define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1472 ; X86-LABEL: test_x86_avx512_mask_psrli_q: 1473 ; X86: ## %bb.0: 1474 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1475 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1476 ; X86-NEXT: vpsrlq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x07] 1477 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1478 ; X86-NEXT: retl ## encoding: [0xc3] 1479 ; 1480 ; X64-LABEL: test_x86_avx512_mask_psrli_q: 1481 ; X64: ## %bb.0: 1482 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1483 ; X64-NEXT: vpsrlq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x07] 1484 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1485 ; X64-NEXT: retq ## encoding: [0xc3] 1486 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) 1487 ret <8 x i64> %res 1488 } 1489 1490 define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) { 1491 ; X86-LABEL: test_x86_avx512_maskz_psrli_q: 1492 ; X86: ## %bb.0: 1493 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1494 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1495 ; X86-NEXT: vpsrlq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x07] 1496 ; X86-NEXT: retl ## encoding: [0xc3] 1497 ; 1498 ; X64-LABEL: test_x86_avx512_maskz_psrli_q: 1499 ; X64: ## %bb.0: 1500 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1501 ; X64-NEXT: vpsrlq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x07] 1502 ; X64-NEXT: retq ## encoding: [0xc3] 1503 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) 1504 ret <8 x i64> %res 1505 } 1506 1507 declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone 1508 1509 define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) { 1510 ; CHECK-LABEL: test_x86_avx512_psrai_d: 1511 ; CHECK: ## %bb.0: 1512 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xe0,0x07] 1513 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1514 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) 1515 ret <16 x i32> %res 1516 } 1517 1518 define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1519 ; X86-LABEL: test_x86_avx512_mask_psrai_d: 1520 ; X86: ## %bb.0: 1521 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1522 ; X86-NEXT: vpsrad $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x07] 1523 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1524 ; X86-NEXT: retl ## encoding: [0xc3] 1525 ; 1526 ; X64-LABEL: test_x86_avx512_mask_psrai_d: 1527 ; X64: ## %bb.0: 1528 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1529 ; X64-NEXT: vpsrad $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x07] 1530 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1531 ; X64-NEXT: retq ## encoding: [0xc3] 1532 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) 1533 ret <16 x i32> %res 1534 } 1535 1536 define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) { 1537 ; X86-LABEL: test_x86_avx512_maskz_psrai_d: 1538 ; X86: ## %bb.0: 1539 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1540 ; X86-NEXT: vpsrad $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xe0,0x07] 1541 ; X86-NEXT: retl ## encoding: [0xc3] 1542 ; 1543 ; X64-LABEL: test_x86_avx512_maskz_psrai_d: 1544 ; X64: ## %bb.0: 1545 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1546 ; X64-NEXT: vpsrad $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xe0,0x07] 1547 ; X64-NEXT: retq ## encoding: [0xc3] 1548 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) 1549 ret <16 x i32> %res 1550 } 1551 1552 declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone 1553 1554 define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) { 1555 ; CHECK-LABEL: test_x86_avx512_psrai_q: 1556 ; CHECK: ## %bb.0: 1557 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xe0,0x07] 1558 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1559 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) 1560 ret <8 x i64> %res 1561 } 1562 1563 define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1564 ; X86-LABEL: test_x86_avx512_mask_psrai_q: 1565 ; X86: ## %bb.0: 1566 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1567 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1568 ; X86-NEXT: vpsraq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x07] 1569 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1570 ; X86-NEXT: retl ## encoding: [0xc3] 1571 ; 1572 ; X64-LABEL: test_x86_avx512_mask_psrai_q: 1573 ; X64: ## %bb.0: 1574 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1575 ; X64-NEXT: vpsraq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x07] 1576 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1577 ; X64-NEXT: retq ## encoding: [0xc3] 1578 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) 1579 ret <8 x i64> %res 1580 } 1581 1582 define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) { 1583 ; X86-LABEL: test_x86_avx512_maskz_psrai_q: 1584 ; X86: ## %bb.0: 1585 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1586 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1587 ; X86-NEXT: vpsraq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xe0,0x07] 1588 ; X86-NEXT: retl ## encoding: [0xc3] 1589 ; 1590 ; X64-LABEL: test_x86_avx512_maskz_psrai_q: 1591 ; X64: ## %bb.0: 1592 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1593 ; X64-NEXT: vpsraq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xe0,0x07] 1594 ; X64-NEXT: retq ## encoding: [0xc3] 1595 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) 1596 ret <8 x i64> %res 1597 } 1598 1599 declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone 1600 1601 declare void @llvm.x86.avx512.storent.q.512(i8*, <8 x i64>) 1602 1603 define void@test_storent_q_512(<8 x i64> %data, i8* %ptr) { 1604 ; X86-LABEL: test_storent_q_512: 1605 ; X86: ## %bb.0: 1606 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 1607 ; X86-NEXT: vmovntps %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x00] 1608 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1609 ; X86-NEXT: retl ## encoding: [0xc3] 1610 ; 1611 ; X64-LABEL: test_storent_q_512: 1612 ; X64: ## %bb.0: 1613 ; X64-NEXT: vmovntps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x07] 1614 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1615 ; X64-NEXT: retq ## encoding: [0xc3] 1616 call void @llvm.x86.avx512.storent.q.512(i8* %ptr, <8 x i64> %data) 1617 ret void 1618 } 1619 1620 declare void @llvm.x86.avx512.storent.pd.512(i8*, <8 x double>) 1621 1622 define void @test_storent_pd_512(<8 x double> %data, i8* %ptr) { 1623 ; X86-LABEL: test_storent_pd_512: 1624 ; X86: ## %bb.0: 1625 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 1626 ; X86-NEXT: vmovntps %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x00] 1627 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1628 ; X86-NEXT: retl ## encoding: [0xc3] 1629 ; 1630 ; X64-LABEL: test_storent_pd_512: 1631 ; X64: ## %bb.0: 1632 ; X64-NEXT: vmovntps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x07] 1633 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1634 ; X64-NEXT: retq ## encoding: [0xc3] 1635 call void @llvm.x86.avx512.storent.pd.512(i8* %ptr, <8 x double> %data) 1636 ret void 1637 } 1638 1639 declare void @llvm.x86.avx512.storent.ps.512(i8*, <16 x float>) 1640 1641 define void @test_storent_ps_512(<16 x float> %data, i8* %ptr) { 1642 ; X86-LABEL: test_storent_ps_512: 1643 ; X86: ## %bb.0: 1644 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 1645 ; X86-NEXT: vmovntps %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x00] 1646 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1647 ; X86-NEXT: retl ## encoding: [0xc3] 1648 ; 1649 ; X64-LABEL: test_storent_ps_512: 1650 ; X64: ## %bb.0: 1651 ; X64-NEXT: vmovntps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x07] 1652 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 1653 ; X64-NEXT: retq ## encoding: [0xc3] 1654 call void @llvm.x86.avx512.storent.ps.512(i8* %ptr, <16 x float> %data) 1655 ret void 1656 } 1657 1658 define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) { 1659 ; CHECK-LABEL: test_xor_epi32: 1660 ; CHECK: ## %bb.0: 1661 ; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1] 1662 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1663 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) 1664 ret < 16 x i32> %res 1665 } 1666 1667 define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 1668 ; X86-LABEL: test_mask_xor_epi32: 1669 ; X86: ## %bb.0: 1670 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1671 ; X86-NEXT: vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1] 1672 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1673 ; X86-NEXT: retl ## encoding: [0xc3] 1674 ; 1675 ; X64-LABEL: test_mask_xor_epi32: 1676 ; X64: ## %bb.0: 1677 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1678 ; X64-NEXT: vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1] 1679 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1680 ; X64-NEXT: retq ## encoding: [0xc3] 1681 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 1682 ret < 16 x i32> %res 1683 } 1684 1685 declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 1686 1687 define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) { 1688 ; CHECK-LABEL: test_or_epi32: 1689 ; CHECK: ## %bb.0: 1690 ; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1] 1691 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1692 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) 1693 ret < 16 x i32> %res 1694 } 1695 1696 define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 1697 ; X86-LABEL: test_mask_or_epi32: 1698 ; X86: ## %bb.0: 1699 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1700 ; X86-NEXT: vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1] 1701 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1702 ; X86-NEXT: retl ## encoding: [0xc3] 1703 ; 1704 ; X64-LABEL: test_mask_or_epi32: 1705 ; X64: ## %bb.0: 1706 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1707 ; X64-NEXT: vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1] 1708 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1709 ; X64-NEXT: retq ## encoding: [0xc3] 1710 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 1711 ret < 16 x i32> %res 1712 } 1713 1714 declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 1715 1716 define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) { 1717 ; CHECK-LABEL: test_and_epi32: 1718 ; CHECK: ## %bb.0: 1719 ; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1] 1720 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1721 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) 1722 ret < 16 x i32> %res 1723 } 1724 1725 define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 1726 ; X86-LABEL: test_mask_and_epi32: 1727 ; X86: ## %bb.0: 1728 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1729 ; X86-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1] 1730 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1731 ; X86-NEXT: retl ## encoding: [0xc3] 1732 ; 1733 ; X64-LABEL: test_mask_and_epi32: 1734 ; X64: ## %bb.0: 1735 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1736 ; X64-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1] 1737 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1738 ; X64-NEXT: retq ## encoding: [0xc3] 1739 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 1740 ret < 16 x i32> %res 1741 } 1742 1743 declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 1744 1745 define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) { 1746 ; CHECK-LABEL: test_xor_epi64: 1747 ; CHECK: ## %bb.0: 1748 ; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1] 1749 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1750 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) 1751 ret < 8 x i64> %res 1752 } 1753 1754 define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 1755 ; X86-LABEL: test_mask_xor_epi64: 1756 ; X86: ## %bb.0: 1757 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1758 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1759 ; X86-NEXT: vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1] 1760 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1761 ; X86-NEXT: retl ## encoding: [0xc3] 1762 ; 1763 ; X64-LABEL: test_mask_xor_epi64: 1764 ; X64: ## %bb.0: 1765 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1766 ; X64-NEXT: vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1] 1767 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1768 ; X64-NEXT: retq ## encoding: [0xc3] 1769 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 1770 ret < 8 x i64> %res 1771 } 1772 1773 declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 1774 1775 define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) { 1776 ; CHECK-LABEL: test_or_epi64: 1777 ; CHECK: ## %bb.0: 1778 ; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1] 1779 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1780 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) 1781 ret < 8 x i64> %res 1782 } 1783 1784 define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 1785 ; X86-LABEL: test_mask_or_epi64: 1786 ; X86: ## %bb.0: 1787 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1788 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1789 ; X86-NEXT: vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1] 1790 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1791 ; X86-NEXT: retl ## encoding: [0xc3] 1792 ; 1793 ; X64-LABEL: test_mask_or_epi64: 1794 ; X64: ## %bb.0: 1795 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1796 ; X64-NEXT: vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1] 1797 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1798 ; X64-NEXT: retq ## encoding: [0xc3] 1799 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 1800 ret < 8 x i64> %res 1801 } 1802 1803 declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 1804 1805 define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) { 1806 ; CHECK-LABEL: test_and_epi64: 1807 ; CHECK: ## %bb.0: 1808 ; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1] 1809 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1810 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) 1811 ret < 8 x i64> %res 1812 } 1813 1814 define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 1815 ; X86-LABEL: test_mask_and_epi64: 1816 ; X86: ## %bb.0: 1817 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 1818 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 1819 ; X86-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1] 1820 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1821 ; X86-NEXT: retl ## encoding: [0xc3] 1822 ; 1823 ; X64-LABEL: test_mask_and_epi64: 1824 ; X64: ## %bb.0: 1825 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1826 ; X64-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1] 1827 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1828 ; X64-NEXT: retq ## encoding: [0xc3] 1829 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 1830 ret < 8 x i64> %res 1831 } 1832 1833 declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 1834 1835 define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) { 1836 ; CHECK-LABEL: test_mask_add_epi32_rr: 1837 ; CHECK: ## %bb.0: 1838 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1] 1839 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1840 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 1841 ret < 16 x i32> %res 1842 } 1843 1844 define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 1845 ; X86-LABEL: test_mask_add_epi32_rrk: 1846 ; X86: ## %bb.0: 1847 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1848 ; X86-NEXT: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1] 1849 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1850 ; X86-NEXT: retl ## encoding: [0xc3] 1851 ; 1852 ; X64-LABEL: test_mask_add_epi32_rrk: 1853 ; X64: ## %bb.0: 1854 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1855 ; X64-NEXT: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1] 1856 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1857 ; X64-NEXT: retq ## encoding: [0xc3] 1858 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 1859 ret < 16 x i32> %res 1860 } 1861 1862 define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 1863 ; X86-LABEL: test_mask_add_epi32_rrkz: 1864 ; X86: ## %bb.0: 1865 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1866 ; X86-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1] 1867 ; X86-NEXT: retl ## encoding: [0xc3] 1868 ; 1869 ; X64-LABEL: test_mask_add_epi32_rrkz: 1870 ; X64: ## %bb.0: 1871 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1872 ; X64-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1] 1873 ; X64-NEXT: retq ## encoding: [0xc3] 1874 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 1875 ret < 16 x i32> %res 1876 } 1877 1878 define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 1879 ; X86-LABEL: test_mask_add_epi32_rm: 1880 ; X86: ## %bb.0: 1881 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 1882 ; X86-NEXT: vpaddd (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x00] 1883 ; X86-NEXT: retl ## encoding: [0xc3] 1884 ; 1885 ; X64-LABEL: test_mask_add_epi32_rm: 1886 ; X64: ## %bb.0: 1887 ; X64-NEXT: vpaddd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x07] 1888 ; X64-NEXT: retq ## encoding: [0xc3] 1889 %b = load <16 x i32>, <16 x i32>* %ptr_b 1890 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 1891 ret < 16 x i32> %res 1892 } 1893 1894 define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) { 1895 ; X86-LABEL: test_mask_add_epi32_rmk: 1896 ; X86: ## %bb.0: 1897 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 1898 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1899 ; X86-NEXT: vpaddd (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x08] 1900 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1901 ; X86-NEXT: retl ## encoding: [0xc3] 1902 ; 1903 ; X64-LABEL: test_mask_add_epi32_rmk: 1904 ; X64: ## %bb.0: 1905 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1906 ; X64-NEXT: vpaddd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x0f] 1907 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1908 ; X64-NEXT: retq ## encoding: [0xc3] 1909 %b = load <16 x i32>, <16 x i32>* %ptr_b 1910 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 1911 ret < 16 x i32> %res 1912 } 1913 1914 define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) { 1915 ; X86-LABEL: test_mask_add_epi32_rmkz: 1916 ; X86: ## %bb.0: 1917 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 1918 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1919 ; X86-NEXT: vpaddd (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x00] 1920 ; X86-NEXT: retl ## encoding: [0xc3] 1921 ; 1922 ; X64-LABEL: test_mask_add_epi32_rmkz: 1923 ; X64: ## %bb.0: 1924 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1925 ; X64-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x07] 1926 ; X64-NEXT: retq ## encoding: [0xc3] 1927 %b = load <16 x i32>, <16 x i32>* %ptr_b 1928 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 1929 ret < 16 x i32> %res 1930 } 1931 1932 define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) { 1933 ; X86-LABEL: test_mask_add_epi32_rmb: 1934 ; X86: ## %bb.0: 1935 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 1936 ; X86-NEXT: vpaddd (%eax){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x00] 1937 ; X86-NEXT: retl ## encoding: [0xc3] 1938 ; 1939 ; X64-LABEL: test_mask_add_epi32_rmb: 1940 ; X64: ## %bb.0: 1941 ; X64-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x07] 1942 ; X64-NEXT: retq ## encoding: [0xc3] 1943 %q = load i32, i32* %ptr_b 1944 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1945 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1946 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 1947 ret < 16 x i32> %res 1948 } 1949 1950 define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) { 1951 ; X86-LABEL: test_mask_add_epi32_rmbk: 1952 ; X86: ## %bb.0: 1953 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 1954 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1955 ; X86-NEXT: vpaddd (%eax){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x08] 1956 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1957 ; X86-NEXT: retl ## encoding: [0xc3] 1958 ; 1959 ; X64-LABEL: test_mask_add_epi32_rmbk: 1960 ; X64: ## %bb.0: 1961 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1962 ; X64-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x0f] 1963 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1964 ; X64-NEXT: retq ## encoding: [0xc3] 1965 %q = load i32, i32* %ptr_b 1966 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1967 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1968 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 1969 ret < 16 x i32> %res 1970 } 1971 1972 define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) { 1973 ; X86-LABEL: test_mask_add_epi32_rmbkz: 1974 ; X86: ## %bb.0: 1975 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 1976 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1977 ; X86-NEXT: vpaddd (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x00] 1978 ; X86-NEXT: retl ## encoding: [0xc3] 1979 ; 1980 ; X64-LABEL: test_mask_add_epi32_rmbkz: 1981 ; X64: ## %bb.0: 1982 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1983 ; X64-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x07] 1984 ; X64-NEXT: retq ## encoding: [0xc3] 1985 %q = load i32, i32* %ptr_b 1986 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1987 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1988 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 1989 ret < 16 x i32> %res 1990 } 1991 1992 declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 1993 1994 define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) { 1995 ; CHECK-LABEL: test_mask_sub_epi32_rr: 1996 ; CHECK: ## %bb.0: 1997 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc1] 1998 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1999 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2000 ret < 16 x i32> %res 2001 } 2002 2003 define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 2004 ; X86-LABEL: test_mask_sub_epi32_rrk: 2005 ; X86: ## %bb.0: 2006 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2007 ; X86-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1] 2008 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2009 ; X86-NEXT: retl ## encoding: [0xc3] 2010 ; 2011 ; X64-LABEL: test_mask_sub_epi32_rrk: 2012 ; X64: ## %bb.0: 2013 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2014 ; X64-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1] 2015 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2016 ; X64-NEXT: retq ## encoding: [0xc3] 2017 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2018 ret < 16 x i32> %res 2019 } 2020 2021 define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 2022 ; X86-LABEL: test_mask_sub_epi32_rrkz: 2023 ; X86: ## %bb.0: 2024 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2025 ; X86-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1] 2026 ; X86-NEXT: retl ## encoding: [0xc3] 2027 ; 2028 ; X64-LABEL: test_mask_sub_epi32_rrkz: 2029 ; X64: ## %bb.0: 2030 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2031 ; X64-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1] 2032 ; X64-NEXT: retq ## encoding: [0xc3] 2033 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2034 ret < 16 x i32> %res 2035 } 2036 2037 define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 2038 ; X86-LABEL: test_mask_sub_epi32_rm: 2039 ; X86: ## %bb.0: 2040 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2041 ; X86-NEXT: vpsubd (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x00] 2042 ; X86-NEXT: retl ## encoding: [0xc3] 2043 ; 2044 ; X64-LABEL: test_mask_sub_epi32_rm: 2045 ; X64: ## %bb.0: 2046 ; X64-NEXT: vpsubd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x07] 2047 ; X64-NEXT: retq ## encoding: [0xc3] 2048 %b = load <16 x i32>, <16 x i32>* %ptr_b 2049 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2050 ret < 16 x i32> %res 2051 } 2052 2053 define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) { 2054 ; X86-LABEL: test_mask_sub_epi32_rmk: 2055 ; X86: ## %bb.0: 2056 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2057 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2058 ; X86-NEXT: vpsubd (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x08] 2059 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2060 ; X86-NEXT: retl ## encoding: [0xc3] 2061 ; 2062 ; X64-LABEL: test_mask_sub_epi32_rmk: 2063 ; X64: ## %bb.0: 2064 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2065 ; X64-NEXT: vpsubd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x0f] 2066 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2067 ; X64-NEXT: retq ## encoding: [0xc3] 2068 %b = load <16 x i32>, <16 x i32>* %ptr_b 2069 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2070 ret < 16 x i32> %res 2071 } 2072 2073 define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) { 2074 ; X86-LABEL: test_mask_sub_epi32_rmkz: 2075 ; X86: ## %bb.0: 2076 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2077 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2078 ; X86-NEXT: vpsubd (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x00] 2079 ; X86-NEXT: retl ## encoding: [0xc3] 2080 ; 2081 ; X64-LABEL: test_mask_sub_epi32_rmkz: 2082 ; X64: ## %bb.0: 2083 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2084 ; X64-NEXT: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x07] 2085 ; X64-NEXT: retq ## encoding: [0xc3] 2086 %b = load <16 x i32>, <16 x i32>* %ptr_b 2087 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2088 ret < 16 x i32> %res 2089 } 2090 2091 define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) { 2092 ; X86-LABEL: test_mask_sub_epi32_rmb: 2093 ; X86: ## %bb.0: 2094 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2095 ; X86-NEXT: vpsubd (%eax){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x00] 2096 ; X86-NEXT: retl ## encoding: [0xc3] 2097 ; 2098 ; X64-LABEL: test_mask_sub_epi32_rmb: 2099 ; X64: ## %bb.0: 2100 ; X64-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x07] 2101 ; X64-NEXT: retq ## encoding: [0xc3] 2102 %q = load i32, i32* %ptr_b 2103 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2104 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2105 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2106 ret < 16 x i32> %res 2107 } 2108 2109 define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) { 2110 ; X86-LABEL: test_mask_sub_epi32_rmbk: 2111 ; X86: ## %bb.0: 2112 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2113 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2114 ; X86-NEXT: vpsubd (%eax){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x08] 2115 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2116 ; X86-NEXT: retl ## encoding: [0xc3] 2117 ; 2118 ; X64-LABEL: test_mask_sub_epi32_rmbk: 2119 ; X64: ## %bb.0: 2120 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2121 ; X64-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x0f] 2122 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2123 ; X64-NEXT: retq ## encoding: [0xc3] 2124 %q = load i32, i32* %ptr_b 2125 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2126 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2127 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2128 ret < 16 x i32> %res 2129 } 2130 2131 define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) { 2132 ; X86-LABEL: test_mask_sub_epi32_rmbkz: 2133 ; X86: ## %bb.0: 2134 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2135 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2136 ; X86-NEXT: vpsubd (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x00] 2137 ; X86-NEXT: retl ## encoding: [0xc3] 2138 ; 2139 ; X64-LABEL: test_mask_sub_epi32_rmbkz: 2140 ; X64: ## %bb.0: 2141 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2142 ; X64-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x07] 2143 ; X64-NEXT: retq ## encoding: [0xc3] 2144 %q = load i32, i32* %ptr_b 2145 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2146 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2147 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2148 ret < 16 x i32> %res 2149 } 2150 2151 declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 2152 2153 define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) { 2154 ; CHECK-LABEL: test_mask_add_epi64_rr: 2155 ; CHECK: ## %bb.0: 2156 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1] 2157 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 2158 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2159 ret < 8 x i64> %res 2160 } 2161 2162 define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 2163 ; X86-LABEL: test_mask_add_epi64_rrk: 2164 ; X86: ## %bb.0: 2165 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 2166 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2167 ; X86-NEXT: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1] 2168 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2169 ; X86-NEXT: retl ## encoding: [0xc3] 2170 ; 2171 ; X64-LABEL: test_mask_add_epi64_rrk: 2172 ; X64: ## %bb.0: 2173 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2174 ; X64-NEXT: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1] 2175 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2176 ; X64-NEXT: retq ## encoding: [0xc3] 2177 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2178 ret < 8 x i64> %res 2179 } 2180 2181 define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 2182 ; X86-LABEL: test_mask_add_epi64_rrkz: 2183 ; X86: ## %bb.0: 2184 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 2185 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2186 ; X86-NEXT: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1] 2187 ; X86-NEXT: retl ## encoding: [0xc3] 2188 ; 2189 ; X64-LABEL: test_mask_add_epi64_rrkz: 2190 ; X64: ## %bb.0: 2191 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2192 ; X64-NEXT: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1] 2193 ; X64-NEXT: retq ## encoding: [0xc3] 2194 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2195 ret < 8 x i64> %res 2196 } 2197 2198 define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) { 2199 ; X86-LABEL: test_mask_add_epi64_rm: 2200 ; X86: ## %bb.0: 2201 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2202 ; X86-NEXT: vpaddq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x00] 2203 ; X86-NEXT: retl ## encoding: [0xc3] 2204 ; 2205 ; X64-LABEL: test_mask_add_epi64_rm: 2206 ; X64: ## %bb.0: 2207 ; X64-NEXT: vpaddq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x07] 2208 ; X64-NEXT: retq ## encoding: [0xc3] 2209 %b = load <8 x i64>, <8 x i64>* %ptr_b 2210 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2211 ret < 8 x i64> %res 2212 } 2213 2214 define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2215 ; X86-LABEL: test_mask_add_epi64_rmk: 2216 ; X86: ## %bb.0: 2217 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2218 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2219 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 2220 ; X86-NEXT: vpaddq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x08] 2221 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2222 ; X86-NEXT: retl ## encoding: [0xc3] 2223 ; 2224 ; X64-LABEL: test_mask_add_epi64_rmk: 2225 ; X64: ## %bb.0: 2226 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2227 ; X64-NEXT: vpaddq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x0f] 2228 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2229 ; X64-NEXT: retq ## encoding: [0xc3] 2230 %b = load <8 x i64>, <8 x i64>* %ptr_b 2231 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2232 ret < 8 x i64> %res 2233 } 2234 2235 define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) { 2236 ; X86-LABEL: test_mask_add_epi64_rmkz: 2237 ; X86: ## %bb.0: 2238 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2239 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2240 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 2241 ; X86-NEXT: vpaddq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x00] 2242 ; X86-NEXT: retl ## encoding: [0xc3] 2243 ; 2244 ; X64-LABEL: test_mask_add_epi64_rmkz: 2245 ; X64: ## %bb.0: 2246 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2247 ; X64-NEXT: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x07] 2248 ; X64-NEXT: retq ## encoding: [0xc3] 2249 %b = load <8 x i64>, <8 x i64>* %ptr_b 2250 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2251 ret < 8 x i64> %res 2252 } 2253 2254 define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) { 2255 ; X86-LABEL: test_mask_add_epi64_rmb: 2256 ; X86: ## %bb.0: 2257 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2258 ; X86-NEXT: vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 2259 ; X86-NEXT: ## xmm1 = mem[0],zero 2260 ; X86-NEXT: vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9] 2261 ; X86-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1] 2262 ; X86-NEXT: retl ## encoding: [0xc3] 2263 ; 2264 ; X64-LABEL: test_mask_add_epi64_rmb: 2265 ; X64: ## %bb.0: 2266 ; X64-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x07] 2267 ; X64-NEXT: retq ## encoding: [0xc3] 2268 %q = load i64, i64* %ptr_b 2269 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2270 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2271 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2272 ret < 8 x i64> %res 2273 } 2274 2275 define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2276 ; X86-LABEL: test_mask_add_epi64_rmbk: 2277 ; X86: ## %bb.0: 2278 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2279 ; X86-NEXT: vmovq (%eax), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10] 2280 ; X86-NEXT: ## xmm2 = mem[0],zero 2281 ; X86-NEXT: vpbroadcastq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2] 2282 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] 2283 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2284 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xca] 2285 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2286 ; X86-NEXT: retl ## encoding: [0xc3] 2287 ; 2288 ; X64-LABEL: test_mask_add_epi64_rmbk: 2289 ; X64: ## %bb.0: 2290 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2291 ; X64-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x0f] 2292 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2293 ; X64-NEXT: retq ## encoding: [0xc3] 2294 %q = load i64, i64* %ptr_b 2295 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2296 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2297 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2298 ret < 8 x i64> %res 2299 } 2300 2301 define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) { 2302 ; X86-LABEL: test_mask_add_epi64_rmbkz: 2303 ; X86: ## %bb.0: 2304 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2305 ; X86-NEXT: vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 2306 ; X86-NEXT: ## xmm1 = mem[0],zero 2307 ; X86-NEXT: vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9] 2308 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] 2309 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2310 ; X86-NEXT: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1] 2311 ; X86-NEXT: retl ## encoding: [0xc3] 2312 ; 2313 ; X64-LABEL: test_mask_add_epi64_rmbkz: 2314 ; X64: ## %bb.0: 2315 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2316 ; X64-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x07] 2317 ; X64-NEXT: retq ## encoding: [0xc3] 2318 %q = load i64, i64* %ptr_b 2319 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2320 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2321 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2322 ret < 8 x i64> %res 2323 } 2324 2325 declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 2326 2327 define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) { 2328 ; CHECK-LABEL: test_mask_sub_epi64_rr: 2329 ; CHECK: ## %bb.0: 2330 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1] 2331 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 2332 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2333 ret < 8 x i64> %res 2334 } 2335 2336 define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 2337 ; X86-LABEL: test_mask_sub_epi64_rrk: 2338 ; X86: ## %bb.0: 2339 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 2340 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2341 ; X86-NEXT: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1] 2342 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2343 ; X86-NEXT: retl ## encoding: [0xc3] 2344 ; 2345 ; X64-LABEL: test_mask_sub_epi64_rrk: 2346 ; X64: ## %bb.0: 2347 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2348 ; X64-NEXT: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1] 2349 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2350 ; X64-NEXT: retq ## encoding: [0xc3] 2351 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2352 ret < 8 x i64> %res 2353 } 2354 2355 define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 2356 ; X86-LABEL: test_mask_sub_epi64_rrkz: 2357 ; X86: ## %bb.0: 2358 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 2359 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2360 ; X86-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1] 2361 ; X86-NEXT: retl ## encoding: [0xc3] 2362 ; 2363 ; X64-LABEL: test_mask_sub_epi64_rrkz: 2364 ; X64: ## %bb.0: 2365 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2366 ; X64-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1] 2367 ; X64-NEXT: retq ## encoding: [0xc3] 2368 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2369 ret < 8 x i64> %res 2370 } 2371 2372 define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) { 2373 ; X86-LABEL: test_mask_sub_epi64_rm: 2374 ; X86: ## %bb.0: 2375 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2376 ; X86-NEXT: vpsubq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x00] 2377 ; X86-NEXT: retl ## encoding: [0xc3] 2378 ; 2379 ; X64-LABEL: test_mask_sub_epi64_rm: 2380 ; X64: ## %bb.0: 2381 ; X64-NEXT: vpsubq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x07] 2382 ; X64-NEXT: retq ## encoding: [0xc3] 2383 %b = load <8 x i64>, <8 x i64>* %ptr_b 2384 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2385 ret < 8 x i64> %res 2386 } 2387 2388 define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2389 ; X86-LABEL: test_mask_sub_epi64_rmk: 2390 ; X86: ## %bb.0: 2391 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2392 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2393 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 2394 ; X86-NEXT: vpsubq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x08] 2395 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2396 ; X86-NEXT: retl ## encoding: [0xc3] 2397 ; 2398 ; X64-LABEL: test_mask_sub_epi64_rmk: 2399 ; X64: ## %bb.0: 2400 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2401 ; X64-NEXT: vpsubq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x0f] 2402 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2403 ; X64-NEXT: retq ## encoding: [0xc3] 2404 %b = load <8 x i64>, <8 x i64>* %ptr_b 2405 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2406 ret < 8 x i64> %res 2407 } 2408 2409 define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) { 2410 ; X86-LABEL: test_mask_sub_epi64_rmkz: 2411 ; X86: ## %bb.0: 2412 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2413 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2414 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 2415 ; X86-NEXT: vpsubq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x00] 2416 ; X86-NEXT: retl ## encoding: [0xc3] 2417 ; 2418 ; X64-LABEL: test_mask_sub_epi64_rmkz: 2419 ; X64: ## %bb.0: 2420 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2421 ; X64-NEXT: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x07] 2422 ; X64-NEXT: retq ## encoding: [0xc3] 2423 %b = load <8 x i64>, <8 x i64>* %ptr_b 2424 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2425 ret < 8 x i64> %res 2426 } 2427 2428 define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) { 2429 ; X86-LABEL: test_mask_sub_epi64_rmb: 2430 ; X86: ## %bb.0: 2431 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2432 ; X86-NEXT: vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 2433 ; X86-NEXT: ## xmm1 = mem[0],zero 2434 ; X86-NEXT: vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9] 2435 ; X86-NEXT: vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1] 2436 ; X86-NEXT: retl ## encoding: [0xc3] 2437 ; 2438 ; X64-LABEL: test_mask_sub_epi64_rmb: 2439 ; X64: ## %bb.0: 2440 ; X64-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x07] 2441 ; X64-NEXT: retq ## encoding: [0xc3] 2442 %q = load i64, i64* %ptr_b 2443 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2444 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2445 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2446 ret < 8 x i64> %res 2447 } 2448 2449 define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2450 ; X86-LABEL: test_mask_sub_epi64_rmbk: 2451 ; X86: ## %bb.0: 2452 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2453 ; X86-NEXT: vmovq (%eax), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10] 2454 ; X86-NEXT: ## xmm2 = mem[0],zero 2455 ; X86-NEXT: vpbroadcastq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2] 2456 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] 2457 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2458 ; X86-NEXT: vpsubq %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xca] 2459 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2460 ; X86-NEXT: retl ## encoding: [0xc3] 2461 ; 2462 ; X64-LABEL: test_mask_sub_epi64_rmbk: 2463 ; X64: ## %bb.0: 2464 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2465 ; X64-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x0f] 2466 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2467 ; X64-NEXT: retq ## encoding: [0xc3] 2468 %q = load i64, i64* %ptr_b 2469 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2470 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2471 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2472 ret < 8 x i64> %res 2473 } 2474 2475 define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) { 2476 ; X86-LABEL: test_mask_sub_epi64_rmbkz: 2477 ; X86: ## %bb.0: 2478 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2479 ; X86-NEXT: vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 2480 ; X86-NEXT: ## xmm1 = mem[0],zero 2481 ; X86-NEXT: vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9] 2482 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] 2483 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2484 ; X86-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1] 2485 ; X86-NEXT: retl ## encoding: [0xc3] 2486 ; 2487 ; X64-LABEL: test_mask_sub_epi64_rmbkz: 2488 ; X64: ## %bb.0: 2489 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2490 ; X64-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x07] 2491 ; X64-NEXT: retq ## encoding: [0xc3] 2492 %q = load i64, i64* %ptr_b 2493 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2494 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2495 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2496 ret < 8 x i64> %res 2497 } 2498 2499 declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 2500 2501 define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { 2502 ; CHECK-LABEL: test_mask_mullo_epi32_rr_512: 2503 ; CHECK: ## %bb.0: 2504 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0xc1] 2505 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 2506 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2507 ret <16 x i32> %res 2508 } 2509 2510 define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 2511 ; X86-LABEL: test_mask_mullo_epi32_rrk_512: 2512 ; X86: ## %bb.0: 2513 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2514 ; X86-NEXT: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1] 2515 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2516 ; X86-NEXT: retl ## encoding: [0xc3] 2517 ; 2518 ; X64-LABEL: test_mask_mullo_epi32_rrk_512: 2519 ; X64: ## %bb.0: 2520 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2521 ; X64-NEXT: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1] 2522 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2523 ; X64-NEXT: retq ## encoding: [0xc3] 2524 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2525 ret < 16 x i32> %res 2526 } 2527 2528 define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 2529 ; X86-LABEL: test_mask_mullo_epi32_rrkz_512: 2530 ; X86: ## %bb.0: 2531 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2532 ; X86-NEXT: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1] 2533 ; X86-NEXT: retl ## encoding: [0xc3] 2534 ; 2535 ; X64-LABEL: test_mask_mullo_epi32_rrkz_512: 2536 ; X64: ## %bb.0: 2537 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2538 ; X64-NEXT: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1] 2539 ; X64-NEXT: retq ## encoding: [0xc3] 2540 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2541 ret < 16 x i32> %res 2542 } 2543 2544 define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { 2545 ; X86-LABEL: test_mask_mullo_epi32_rm_512: 2546 ; X86: ## %bb.0: 2547 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2548 ; X86-NEXT: vpmulld (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x00] 2549 ; X86-NEXT: retl ## encoding: [0xc3] 2550 ; 2551 ; X64-LABEL: test_mask_mullo_epi32_rm_512: 2552 ; X64: ## %bb.0: 2553 ; X64-NEXT: vpmulld (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x07] 2554 ; X64-NEXT: retq ## encoding: [0xc3] 2555 %b = load <16 x i32>, <16 x i32>* %ptr_b 2556 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2557 ret < 16 x i32> %res 2558 } 2559 2560 define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) { 2561 ; X86-LABEL: test_mask_mullo_epi32_rmk_512: 2562 ; X86: ## %bb.0: 2563 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2564 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2565 ; X86-NEXT: vpmulld (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x08] 2566 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2567 ; X86-NEXT: retl ## encoding: [0xc3] 2568 ; 2569 ; X64-LABEL: test_mask_mullo_epi32_rmk_512: 2570 ; X64: ## %bb.0: 2571 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2572 ; X64-NEXT: vpmulld (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x0f] 2573 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2574 ; X64-NEXT: retq ## encoding: [0xc3] 2575 %b = load <16 x i32>, <16 x i32>* %ptr_b 2576 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2577 ret < 16 x i32> %res 2578 } 2579 2580 define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) { 2581 ; X86-LABEL: test_mask_mullo_epi32_rmkz_512: 2582 ; X86: ## %bb.0: 2583 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2584 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2585 ; X86-NEXT: vpmulld (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x00] 2586 ; X86-NEXT: retl ## encoding: [0xc3] 2587 ; 2588 ; X64-LABEL: test_mask_mullo_epi32_rmkz_512: 2589 ; X64: ## %bb.0: 2590 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2591 ; X64-NEXT: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x07] 2592 ; X64-NEXT: retq ## encoding: [0xc3] 2593 %b = load <16 x i32>, <16 x i32>* %ptr_b 2594 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2595 ret < 16 x i32> %res 2596 } 2597 2598 define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { 2599 ; X86-LABEL: test_mask_mullo_epi32_rmb_512: 2600 ; X86: ## %bb.0: 2601 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2602 ; X86-NEXT: vpmulld (%eax){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x00] 2603 ; X86-NEXT: retl ## encoding: [0xc3] 2604 ; 2605 ; X64-LABEL: test_mask_mullo_epi32_rmb_512: 2606 ; X64: ## %bb.0: 2607 ; X64-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x07] 2608 ; X64-NEXT: retq ## encoding: [0xc3] 2609 %q = load i32, i32* %ptr_b 2610 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2611 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2612 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2613 ret < 16 x i32> %res 2614 } 2615 2616 define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) { 2617 ; X86-LABEL: test_mask_mullo_epi32_rmbk_512: 2618 ; X86: ## %bb.0: 2619 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2620 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2621 ; X86-NEXT: vpmulld (%eax){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x08] 2622 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2623 ; X86-NEXT: retl ## encoding: [0xc3] 2624 ; 2625 ; X64-LABEL: test_mask_mullo_epi32_rmbk_512: 2626 ; X64: ## %bb.0: 2627 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2628 ; X64-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x0f] 2629 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2630 ; X64-NEXT: retq ## encoding: [0xc3] 2631 %q = load i32, i32* %ptr_b 2632 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2633 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2634 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2635 ret < 16 x i32> %res 2636 } 2637 2638 define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) { 2639 ; X86-LABEL: test_mask_mullo_epi32_rmbkz_512: 2640 ; X86: ## %bb.0: 2641 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 2642 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2643 ; X86-NEXT: vpmulld (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x00] 2644 ; X86-NEXT: retl ## encoding: [0xc3] 2645 ; 2646 ; X64-LABEL: test_mask_mullo_epi32_rmbkz_512: 2647 ; X64: ## %bb.0: 2648 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2649 ; X64-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x07] 2650 ; X64-NEXT: retq ## encoding: [0xc3] 2651 %q = load i32, i32* %ptr_b 2652 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2653 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2654 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2655 ret < 16 x i32> %res 2656 } 2657 2658 declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 2659 2660 2661 declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16) 2662 2663 define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) { 2664 ; X86-LABEL: test_int_x86_avx512_mask_shuf_f32x4: 2665 ; X86: ## %bb.0: 2666 ; X86-NEXT: vshuff32x4 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0x7d,0x48,0x23,0xd9,0x16] 2667 ; X86-NEXT: ## zmm3 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 2668 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2669 ; X86-NEXT: vshuff32x4 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x23,0xd1,0x16] 2670 ; X86-NEXT: ## zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 2671 ; X86-NEXT: vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3] 2672 ; X86-NEXT: retl ## encoding: [0xc3] 2673 ; 2674 ; X64-LABEL: test_int_x86_avx512_mask_shuf_f32x4: 2675 ; X64: ## %bb.0: 2676 ; X64-NEXT: vshuff32x4 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0x7d,0x48,0x23,0xd9,0x16] 2677 ; X64-NEXT: ## zmm3 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 2678 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2679 ; X64-NEXT: vshuff32x4 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x23,0xd1,0x16] 2680 ; X64-NEXT: ## zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 2681 ; X64-NEXT: vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3] 2682 ; X64-NEXT: retq ## encoding: [0xc3] 2683 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4) 2684 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1) 2685 %res2 = fadd <16 x float> %res, %res1 2686 ret <16 x float> %res2 2687 } 2688 2689 declare <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double>, <8 x double>, i32, <8 x double>, i8) 2690 2691 define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) { 2692 ; X86-LABEL: test_int_x86_avx512_mask_shuf_f64x2: 2693 ; X86: ## %bb.0: 2694 ; X86-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x23,0xd9,0x16] 2695 ; X86-NEXT: ## zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1] 2696 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 2697 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2698 ; X86-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x23,0xd1,0x16] 2699 ; X86-NEXT: ## zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1] 2700 ; X86-NEXT: vaddpd %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xd3] 2701 ; X86-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x23,0xc1,0x16] 2702 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[4,5,2,3],zmm1[2,3,0,1] 2703 ; X86-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0] 2704 ; X86-NEXT: retl ## encoding: [0xc3] 2705 ; 2706 ; X64-LABEL: test_int_x86_avx512_mask_shuf_f64x2: 2707 ; X64: ## %bb.0: 2708 ; X64-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x23,0xd9,0x16] 2709 ; X64-NEXT: ## zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1] 2710 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2711 ; X64-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x23,0xd1,0x16] 2712 ; X64-NEXT: ## zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1] 2713 ; X64-NEXT: vaddpd %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xd3] 2714 ; X64-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x23,0xc1,0x16] 2715 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[4,5,2,3],zmm1[2,3,0,1] 2716 ; X64-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0] 2717 ; X64-NEXT: retq ## encoding: [0xc3] 2718 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4) 2719 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1) 2720 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4) 2721 2722 %res3 = fadd <8 x double> %res, %res1 2723 %res4 = fadd <8 x double> %res3, %res2 2724 ret <8 x double> %res4 2725 } 2726 2727 declare <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16) 2728 2729 define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) { 2730 ; X86-LABEL: test_int_x86_avx512_mask_shuf_i32x4: 2731 ; X86: ## %bb.0: 2732 ; X86-NEXT: vshufi32x4 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0x7d,0x48,0x43,0xd9,0x16] 2733 ; X86-NEXT: ## zmm3 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 2734 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2735 ; X86-NEXT: vshufi32x4 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x43,0xd1,0x16] 2736 ; X86-NEXT: ## zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 2737 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 2738 ; X86-NEXT: retl ## encoding: [0xc3] 2739 ; 2740 ; X64-LABEL: test_int_x86_avx512_mask_shuf_i32x4: 2741 ; X64: ## %bb.0: 2742 ; X64-NEXT: vshufi32x4 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0x7d,0x48,0x43,0xd9,0x16] 2743 ; X64-NEXT: ## zmm3 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 2744 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2745 ; X64-NEXT: vshufi32x4 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x43,0xd1,0x16] 2746 ; X64-NEXT: ## zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 2747 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 2748 ; X64-NEXT: retq ## encoding: [0xc3] 2749 %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4) 2750 %res1 = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1) 2751 %res2 = add <16 x i32> %res, %res1 2752 ret <16 x i32> %res2 2753 } 2754 2755 declare <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8) 2756 2757 define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) { 2758 ; X86-LABEL: test_int_x86_avx512_mask_shuf_i64x2: 2759 ; X86: ## %bb.0: 2760 ; X86-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x43,0xd9,0x16] 2761 ; X86-NEXT: ## zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1] 2762 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 2763 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2764 ; X86-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x43,0xd1,0x16] 2765 ; X86-NEXT: ## zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1] 2766 ; X86-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3] 2767 ; X86-NEXT: retl ## encoding: [0xc3] 2768 ; 2769 ; X64-LABEL: test_int_x86_avx512_mask_shuf_i64x2: 2770 ; X64: ## %bb.0: 2771 ; X64-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x43,0xd9,0x16] 2772 ; X64-NEXT: ## zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1] 2773 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2774 ; X64-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x43,0xd1,0x16] 2775 ; X64-NEXT: ## zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1] 2776 ; X64-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3] 2777 ; X64-NEXT: retq ## encoding: [0xc3] 2778 %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4) 2779 %res1 = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1) 2780 %res2 = add <8 x i64> %res, %res1 2781 ret <8 x i64> %res2 2782 } 2783 2784 declare <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8) 2785 2786 define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) { 2787 ; X86-LABEL: test_int_x86_avx512_mask_shuf_pd_512: 2788 ; X86: ## %bb.0: 2789 ; X86-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0xc6,0xd9,0x16] 2790 ; X86-NEXT: ## zmm3 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6] 2791 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 2792 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2793 ; X86-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc6,0xd1,0x16] 2794 ; X86-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6] 2795 ; X86-NEXT: vaddpd %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xd3] 2796 ; X86-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xc6,0xc1,0x16] 2797 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6] 2798 ; X86-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0] 2799 ; X86-NEXT: retl ## encoding: [0xc3] 2800 ; 2801 ; X64-LABEL: test_int_x86_avx512_mask_shuf_pd_512: 2802 ; X64: ## %bb.0: 2803 ; X64-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0xc6,0xd9,0x16] 2804 ; X64-NEXT: ## zmm3 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6] 2805 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2806 ; X64-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc6,0xd1,0x16] 2807 ; X64-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6] 2808 ; X64-NEXT: vaddpd %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xd3] 2809 ; X64-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xc6,0xc1,0x16] 2810 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6] 2811 ; X64-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0] 2812 ; X64-NEXT: retq ## encoding: [0xc3] 2813 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4) 2814 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1) 2815 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4) 2816 2817 %res3 = fadd <8 x double> %res, %res1 2818 %res4 = fadd <8 x double> %res3, %res2 2819 ret <8 x double> %res4 2820 } 2821 2822 declare <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16) 2823 2824 define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) { 2825 ; X86-LABEL: test_int_x86_avx512_mask_shuf_ps_512: 2826 ; X86: ## %bb.0: 2827 ; X86-NEXT: vshufps $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0xc6,0xd9,0x16] 2828 ; X86-NEXT: ## zmm3 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12] 2829 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2830 ; X86-NEXT: vshufps $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc6,0xd1,0x16] 2831 ; X86-NEXT: ## zmm2 {%k1} = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12] 2832 ; X86-NEXT: vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3] 2833 ; X86-NEXT: retl ## encoding: [0xc3] 2834 ; 2835 ; X64-LABEL: test_int_x86_avx512_mask_shuf_ps_512: 2836 ; X64: ## %bb.0: 2837 ; X64-NEXT: vshufps $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0xc6,0xd9,0x16] 2838 ; X64-NEXT: ## zmm3 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12] 2839 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2840 ; X64-NEXT: vshufps $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc6,0xd1,0x16] 2841 ; X64-NEXT: ## zmm2 {%k1} = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12] 2842 ; X64-NEXT: vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3] 2843 ; X64-NEXT: retq ## encoding: [0xc3] 2844 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4) 2845 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1) 2846 %res2 = fadd <16 x float> %res, %res1 2847 ret <16 x float> %res2 2848 } 2849 2850 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 2851 2852 define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 2853 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_d_512: 2854 ; X86: ## %bb.0: 2855 ; X86-NEXT: vpmaxsd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xd9] 2856 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2857 ; X86-NEXT: vpmaxsd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3d,0xd1] 2858 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 2859 ; X86-NEXT: retl ## encoding: [0xc3] 2860 ; 2861 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_d_512: 2862 ; X64: ## %bb.0: 2863 ; X64-NEXT: vpmaxsd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xd9] 2864 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2865 ; X64-NEXT: vpmaxsd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3d,0xd1] 2866 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 2867 ; X64-NEXT: retq ## encoding: [0xc3] 2868 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 2869 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 2870 %res2 = add <16 x i32> %res, %res1 2871 ret <16 x i32> %res2 2872 } 2873 2874 declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 2875 2876 define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 2877 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_q_512: 2878 ; X86: ## %bb.0: 2879 ; X86-NEXT: vpmaxsq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xd9] 2880 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 2881 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2882 ; X86-NEXT: vpmaxsq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3d,0xd1] 2883 ; X86-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3] 2884 ; X86-NEXT: retl ## encoding: [0xc3] 2885 ; 2886 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_q_512: 2887 ; X64: ## %bb.0: 2888 ; X64-NEXT: vpmaxsq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xd9] 2889 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2890 ; X64-NEXT: vpmaxsq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3d,0xd1] 2891 ; X64-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3] 2892 ; X64-NEXT: retq ## encoding: [0xc3] 2893 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 2894 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 2895 %res2 = add <8 x i64> %res, %res1 2896 ret <8 x i64> %res2 2897 } 2898 2899 declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 2900 2901 define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 2902 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_d_512: 2903 ; X86: ## %bb.0: 2904 ; X86-NEXT: vpmaxud %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x3f,0xd9] 2905 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2906 ; X86-NEXT: vpmaxud %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3f,0xd1] 2907 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 2908 ; X86-NEXT: retl ## encoding: [0xc3] 2909 ; 2910 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_d_512: 2911 ; X64: ## %bb.0: 2912 ; X64-NEXT: vpmaxud %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x3f,0xd9] 2913 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2914 ; X64-NEXT: vpmaxud %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3f,0xd1] 2915 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 2916 ; X64-NEXT: retq ## encoding: [0xc3] 2917 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 2918 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 2919 %res2 = add <16 x i32> %res, %res1 2920 ret <16 x i32> %res2 2921 } 2922 2923 declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 2924 2925 define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 2926 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_q_512: 2927 ; X86: ## %bb.0: 2928 ; X86-NEXT: vpmaxuq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x3f,0xd9] 2929 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 2930 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2931 ; X86-NEXT: vpmaxuq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3f,0xd1] 2932 ; X86-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3] 2933 ; X86-NEXT: retl ## encoding: [0xc3] 2934 ; 2935 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_q_512: 2936 ; X64: ## %bb.0: 2937 ; X64-NEXT: vpmaxuq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x3f,0xd9] 2938 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2939 ; X64-NEXT: vpmaxuq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3f,0xd1] 2940 ; X64-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3] 2941 ; X64-NEXT: retq ## encoding: [0xc3] 2942 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 2943 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 2944 %res2 = add <8 x i64> %res, %res1 2945 ret <8 x i64> %res2 2946 } 2947 2948 declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 2949 2950 define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 2951 ; X86-LABEL: test_int_x86_avx512_mask_pmins_d_512: 2952 ; X86: ## %bb.0: 2953 ; X86-NEXT: vpminsd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x39,0xd9] 2954 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2955 ; X86-NEXT: vpminsd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x39,0xd1] 2956 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 2957 ; X86-NEXT: retl ## encoding: [0xc3] 2958 ; 2959 ; X64-LABEL: test_int_x86_avx512_mask_pmins_d_512: 2960 ; X64: ## %bb.0: 2961 ; X64-NEXT: vpminsd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x39,0xd9] 2962 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2963 ; X64-NEXT: vpminsd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x39,0xd1] 2964 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 2965 ; X64-NEXT: retq ## encoding: [0xc3] 2966 %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 2967 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 2968 %res2 = add <16 x i32> %res, %res1 2969 ret <16 x i32> %res2 2970 } 2971 2972 declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 2973 2974 define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 2975 ; X86-LABEL: test_int_x86_avx512_mask_pmins_q_512: 2976 ; X86: ## %bb.0: 2977 ; X86-NEXT: vpminsq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x39,0xd9] 2978 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 2979 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 2980 ; X86-NEXT: vpminsq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x39,0xd1] 2981 ; X86-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3] 2982 ; X86-NEXT: retl ## encoding: [0xc3] 2983 ; 2984 ; X64-LABEL: test_int_x86_avx512_mask_pmins_q_512: 2985 ; X64: ## %bb.0: 2986 ; X64-NEXT: vpminsq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x39,0xd9] 2987 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2988 ; X64-NEXT: vpminsq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x39,0xd1] 2989 ; X64-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3] 2990 ; X64-NEXT: retq ## encoding: [0xc3] 2991 %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 2992 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 2993 %res2 = add <8 x i64> %res, %res1 2994 ret <8 x i64> %res2 2995 } 2996 2997 declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 2998 2999 define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 3000 ; X86-LABEL: test_int_x86_avx512_mask_pminu_d_512: 3001 ; X86: ## %bb.0: 3002 ; X86-NEXT: vpminud %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xd9] 3003 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3004 ; X86-NEXT: vpminud %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3b,0xd1] 3005 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 3006 ; X86-NEXT: retl ## encoding: [0xc3] 3007 ; 3008 ; X64-LABEL: test_int_x86_avx512_mask_pminu_d_512: 3009 ; X64: ## %bb.0: 3010 ; X64-NEXT: vpminud %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xd9] 3011 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3012 ; X64-NEXT: vpminud %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3b,0xd1] 3013 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 3014 ; X64-NEXT: retq ## encoding: [0xc3] 3015 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 3016 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 3017 %res2 = add <16 x i32> %res, %res1 3018 ret <16 x i32> %res2 3019 } 3020 3021 declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 3022 3023 define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 3024 ; X86-LABEL: test_int_x86_avx512_mask_pminu_q_512: 3025 ; X86: ## %bb.0: 3026 ; X86-NEXT: vpminuq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x3b,0xd9] 3027 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 3028 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3029 ; X86-NEXT: vpminuq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3b,0xd1] 3030 ; X86-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3] 3031 ; X86-NEXT: retl ## encoding: [0xc3] 3032 ; 3033 ; X64-LABEL: test_int_x86_avx512_mask_pminu_q_512: 3034 ; X64: ## %bb.0: 3035 ; X64-NEXT: vpminuq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x3b,0xd9] 3036 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3037 ; X64-NEXT: vpminuq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3b,0xd1] 3038 ; X64-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3] 3039 ; X64-NEXT: retq ## encoding: [0xc3] 3040 %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 3041 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 3042 %res2 = add <8 x i64> %res, %res1 3043 ret <8 x i64> %res2 3044 } 3045 3046 define <4 x float> @test_mm_mask_move_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 3047 ; X86-LABEL: test_mm_mask_move_ss: 3048 ; X86: ## %bb.0: ## %entry 3049 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3050 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3051 ; X86-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x10,0xc2] 3052 ; X86-NEXT: retl ## encoding: [0xc3] 3053 ; 3054 ; X64-LABEL: test_mm_mask_move_ss: 3055 ; X64: ## %bb.0: ## %entry 3056 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3057 ; X64-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x10,0xc2] 3058 ; X64-NEXT: retq ## encoding: [0xc3] 3059 entry: 3060 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U) 3061 ret <4 x float> %res 3062 } 3063 3064 3065 define <4 x float> @test_mm_maskz_move_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 3066 ; X86-LABEL: test_mm_maskz_move_ss: 3067 ; X86: ## %bb.0: ## %entry 3068 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3069 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3070 ; X86-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc1] 3071 ; X86-NEXT: retl ## encoding: [0xc3] 3072 ; 3073 ; X64-LABEL: test_mm_maskz_move_ss: 3074 ; X64: ## %bb.0: ## %entry 3075 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3076 ; X64-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc1] 3077 ; X64-NEXT: retq ## encoding: [0xc3] 3078 entry: 3079 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U) 3080 ret <4 x float> %res 3081 } 3082 3083 define <2 x double> @test_mm_mask_move_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 3084 ; X86-LABEL: test_mm_mask_move_sd: 3085 ; X86: ## %bb.0: ## %entry 3086 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3087 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3088 ; X86-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x10,0xc2] 3089 ; X86-NEXT: retl ## encoding: [0xc3] 3090 ; 3091 ; X64-LABEL: test_mm_mask_move_sd: 3092 ; X64: ## %bb.0: ## %entry 3093 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3094 ; X64-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x10,0xc2] 3095 ; X64-NEXT: retq ## encoding: [0xc3] 3096 entry: 3097 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U) 3098 ret <2 x double> %res 3099 } 3100 3101 define <2 x double> @test_mm_maskz_move_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 3102 ; X86-LABEL: test_mm_maskz_move_sd: 3103 ; X86: ## %bb.0: ## %entry 3104 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 3105 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3106 ; X86-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc1] 3107 ; X86-NEXT: retl ## encoding: [0xc3] 3108 ; 3109 ; X64-LABEL: test_mm_maskz_move_sd: 3110 ; X64: ## %bb.0: ## %entry 3111 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3112 ; X64-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc1] 3113 ; X64-NEXT: retq ## encoding: [0xc3] 3114 entry: 3115 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U) 3116 ret <2 x double> %res 3117 } 3118 3119 declare <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float>, <4 x float>, <4 x float>, i8) 3120 declare <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double>, <2 x double>, <2 x double>, i8) 3121 3122 declare <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8>, <16 x i32>, i16) 3123 3124 define <16 x i32>@test_int_x86_avx512_mask_pmovzxb_d_512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) { 3125 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_d_512: 3126 ; X86: ## %bb.0: 3127 ; X86-NEXT: vpmovzxbd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x31,0xd0] 3128 ; X86-NEXT: ## zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 3129 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3130 ; X86-NEXT: vpmovzxbd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x31,0xc8] 3131 ; X86-NEXT: ## zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 3132 ; X86-NEXT: vpmovzxbd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x31,0xc0] 3133 ; X86-NEXT: ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 3134 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 3135 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 3136 ; X86-NEXT: retl ## encoding: [0xc3] 3137 ; 3138 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_d_512: 3139 ; X64: ## %bb.0: 3140 ; X64-NEXT: vpmovzxbd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x31,0xd0] 3141 ; X64-NEXT: ## zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 3142 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3143 ; X64-NEXT: vpmovzxbd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x31,0xc8] 3144 ; X64-NEXT: ## zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 3145 ; X64-NEXT: vpmovzxbd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x31,0xc0] 3146 ; X64-NEXT: ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 3147 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 3148 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 3149 ; X64-NEXT: retq ## encoding: [0xc3] 3150 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) 3151 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> zeroinitializer, i16 %x2) 3152 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 -1) 3153 %res3 = add <16 x i32> %res, %res1 3154 %res4 = add <16 x i32> %res3, %res2 3155 ret <16 x i32> %res4 3156 } 3157 3158 declare <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8>, <8 x i64>, i8) 3159 3160 define <8 x i64>@test_int_x86_avx512_mask_pmovzxb_q_512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) { 3161 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_q_512: 3162 ; X86: ## %bb.0: 3163 ; X86-NEXT: vpmovzxbq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x32,0xd0] 3164 ; X86-NEXT: ## zmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 3165 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 3166 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3167 ; X86-NEXT: vpmovzxbq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x32,0xc8] 3168 ; X86-NEXT: ## zmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 3169 ; X86-NEXT: vpmovzxbq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x32,0xc0] 3170 ; X86-NEXT: ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 3171 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 3172 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 3173 ; X86-NEXT: retl ## encoding: [0xc3] 3174 ; 3175 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_q_512: 3176 ; X64: ## %bb.0: 3177 ; X64-NEXT: vpmovzxbq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x32,0xd0] 3178 ; X64-NEXT: ## zmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 3179 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3180 ; X64-NEXT: vpmovzxbq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x32,0xc8] 3181 ; X64-NEXT: ## zmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 3182 ; X64-NEXT: vpmovzxbq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x32,0xc0] 3183 ; X64-NEXT: ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 3184 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 3185 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 3186 ; X64-NEXT: retq ## encoding: [0xc3] 3187 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) 3188 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> zeroinitializer, i8 %x2) 3189 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 -1) 3190 %res3 = add <8 x i64> %res, %res1 3191 %res4 = add <8 x i64> %res3, %res2 3192 ret <8 x i64> %res4 3193 } 3194 3195 declare <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32>, <8 x i64>, i8) 3196 3197 define <8 x i64>@test_int_x86_avx512_mask_pmovzxd_q_512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) { 3198 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxd_q_512: 3199 ; X86: ## %bb.0: 3200 ; X86-NEXT: vpmovzxdq %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x35,0xd0] 3201 ; X86-NEXT: ## zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 3202 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 3203 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3204 ; X86-NEXT: vpmovzxdq %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x35,0xc8] 3205 ; X86-NEXT: ## zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 3206 ; X86-NEXT: vpmovzxdq %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x35,0xc0] 3207 ; X86-NEXT: ## zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 3208 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 3209 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 3210 ; X86-NEXT: retl ## encoding: [0xc3] 3211 ; 3212 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxd_q_512: 3213 ; X64: ## %bb.0: 3214 ; X64-NEXT: vpmovzxdq %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x35,0xd0] 3215 ; X64-NEXT: ## zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 3216 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3217 ; X64-NEXT: vpmovzxdq %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x35,0xc8] 3218 ; X64-NEXT: ## zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 3219 ; X64-NEXT: vpmovzxdq %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x35,0xc0] 3220 ; X64-NEXT: ## zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 3221 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 3222 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 3223 ; X64-NEXT: retq ## encoding: [0xc3] 3224 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) 3225 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> zeroinitializer, i8 %x2) 3226 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 -1) 3227 %res3 = add <8 x i64> %res, %res1 3228 %res4 = add <8 x i64> %res3, %res2 3229 ret <8 x i64> %res4 3230 } 3231 3232 declare <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16>, <16 x i32>, i16) 3233 3234 define <16 x i32>@test_int_x86_avx512_mask_pmovzxw_d_512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) { 3235 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_d_512: 3236 ; X86: ## %bb.0: 3237 ; X86-NEXT: vpmovzxwd %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x33,0xd0] 3238 ; X86-NEXT: ## zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 3239 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3240 ; X86-NEXT: vpmovzxwd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x33,0xc8] 3241 ; X86-NEXT: ## zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 3242 ; X86-NEXT: vpmovzxwd %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x33,0xc0] 3243 ; X86-NEXT: ## zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 3244 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 3245 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 3246 ; X86-NEXT: retl ## encoding: [0xc3] 3247 ; 3248 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_d_512: 3249 ; X64: ## %bb.0: 3250 ; X64-NEXT: vpmovzxwd %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x33,0xd0] 3251 ; X64-NEXT: ## zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 3252 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3253 ; X64-NEXT: vpmovzxwd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x33,0xc8] 3254 ; X64-NEXT: ## zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 3255 ; X64-NEXT: vpmovzxwd %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x33,0xc0] 3256 ; X64-NEXT: ## zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 3257 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 3258 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 3259 ; X64-NEXT: retq ## encoding: [0xc3] 3260 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) 3261 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> zeroinitializer, i16 %x2) 3262 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 -1) 3263 %res3 = add <16 x i32> %res, %res1 3264 %res4 = add <16 x i32> %res3, %res2 3265 ret <16 x i32> %res4 3266 } 3267 3268 declare <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16>, <8 x i64>, i8) 3269 3270 define <8 x i64>@test_int_x86_avx512_mask_pmovzxw_q_512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) { 3271 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_q_512: 3272 ; X86: ## %bb.0: 3273 ; X86-NEXT: vpmovzxwq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x34,0xd0] 3274 ; X86-NEXT: ## zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 3275 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 3276 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3277 ; X86-NEXT: vpmovzxwq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x34,0xc8] 3278 ; X86-NEXT: ## zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 3279 ; X86-NEXT: vpmovzxwq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x34,0xc0] 3280 ; X86-NEXT: ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 3281 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 3282 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 3283 ; X86-NEXT: retl ## encoding: [0xc3] 3284 ; 3285 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_q_512: 3286 ; X64: ## %bb.0: 3287 ; X64-NEXT: vpmovzxwq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x34,0xd0] 3288 ; X64-NEXT: ## zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 3289 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3290 ; X64-NEXT: vpmovzxwq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x34,0xc8] 3291 ; X64-NEXT: ## zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 3292 ; X64-NEXT: vpmovzxwq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x34,0xc0] 3293 ; X64-NEXT: ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 3294 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 3295 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 3296 ; X64-NEXT: retq ## encoding: [0xc3] 3297 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) 3298 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> zeroinitializer, i8 %x2) 3299 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 -1) 3300 %res3 = add <8 x i64> %res, %res1 3301 %res4 = add <8 x i64> %res3, %res2 3302 ret <8 x i64> %res4 3303 } 3304 3305 declare <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8>, <16 x i32>, i16) 3306 3307 define <16 x i32>@test_int_x86_avx512_mask_pmovsxb_d_512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) { 3308 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_d_512: 3309 ; X86: ## %bb.0: 3310 ; X86-NEXT: vpmovsxbd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x21,0xd0] 3311 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3312 ; X86-NEXT: vpmovsxbd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x21,0xc8] 3313 ; X86-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x21,0xc0] 3314 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 3315 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 3316 ; X86-NEXT: retl ## encoding: [0xc3] 3317 ; 3318 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_d_512: 3319 ; X64: ## %bb.0: 3320 ; X64-NEXT: vpmovsxbd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x21,0xd0] 3321 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3322 ; X64-NEXT: vpmovsxbd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x21,0xc8] 3323 ; X64-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x21,0xc0] 3324 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 3325 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 3326 ; X64-NEXT: retq ## encoding: [0xc3] 3327 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) 3328 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> zeroinitializer, i16 %x2) 3329 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 -1) 3330 %res3 = add <16 x i32> %res, %res1 3331 %res4 = add <16 x i32> %res3, %res2 3332 ret <16 x i32> %res4 3333 } 3334 3335 declare <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8>, <8 x i64>, i8) 3336 3337 define <8 x i64>@test_int_x86_avx512_mask_pmovsxb_q_512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) { 3338 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_q_512: 3339 ; X86: ## %bb.0: 3340 ; X86-NEXT: vpmovsxbq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x22,0xd0] 3341 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 3342 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3343 ; X86-NEXT: vpmovsxbq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x22,0xc8] 3344 ; X86-NEXT: vpmovsxbq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x22,0xc0] 3345 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 3346 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 3347 ; X86-NEXT: retl ## encoding: [0xc3] 3348 ; 3349 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_q_512: 3350 ; X64: ## %bb.0: 3351 ; X64-NEXT: vpmovsxbq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x22,0xd0] 3352 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3353 ; X64-NEXT: vpmovsxbq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x22,0xc8] 3354 ; X64-NEXT: vpmovsxbq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x22,0xc0] 3355 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 3356 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 3357 ; X64-NEXT: retq ## encoding: [0xc3] 3358 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) 3359 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> zeroinitializer, i8 %x2) 3360 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 -1) 3361 %res3 = add <8 x i64> %res, %res1 3362 %res4 = add <8 x i64> %res3, %res2 3363 ret <8 x i64> %res4 3364 } 3365 3366 declare <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32>, <8 x i64>, i8) 3367 3368 define <8 x i64>@test_int_x86_avx512_mask_pmovsxd_q_512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) { 3369 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxd_q_512: 3370 ; X86: ## %bb.0: 3371 ; X86-NEXT: vpmovsxdq %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x25,0xd0] 3372 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 3373 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3374 ; X86-NEXT: vpmovsxdq %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x25,0xc8] 3375 ; X86-NEXT: vpmovsxdq %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x25,0xc0] 3376 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 3377 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 3378 ; X86-NEXT: retl ## encoding: [0xc3] 3379 ; 3380 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxd_q_512: 3381 ; X64: ## %bb.0: 3382 ; X64-NEXT: vpmovsxdq %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x25,0xd0] 3383 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3384 ; X64-NEXT: vpmovsxdq %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x25,0xc8] 3385 ; X64-NEXT: vpmovsxdq %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x25,0xc0] 3386 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 3387 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 3388 ; X64-NEXT: retq ## encoding: [0xc3] 3389 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) 3390 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> zeroinitializer, i8 %x2) 3391 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 -1) 3392 %res3 = add <8 x i64> %res, %res1 3393 %res4 = add <8 x i64> %res3, %res2 3394 ret <8 x i64> %res4 3395 } 3396 3397 3398 declare <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16>, <16 x i32>, i16) 3399 3400 define <16 x i32>@test_int_x86_avx512_mask_pmovsxw_d_512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) { 3401 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_d_512: 3402 ; X86: ## %bb.0: 3403 ; X86-NEXT: vpmovsxwd %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xd0] 3404 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3405 ; X86-NEXT: vpmovsxwd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x23,0xc8] 3406 ; X86-NEXT: vpmovsxwd %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x23,0xc0] 3407 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 3408 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 3409 ; X86-NEXT: retl ## encoding: [0xc3] 3410 ; 3411 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_d_512: 3412 ; X64: ## %bb.0: 3413 ; X64-NEXT: vpmovsxwd %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xd0] 3414 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3415 ; X64-NEXT: vpmovsxwd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x23,0xc8] 3416 ; X64-NEXT: vpmovsxwd %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x23,0xc0] 3417 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 3418 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 3419 ; X64-NEXT: retq ## encoding: [0xc3] 3420 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) 3421 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> zeroinitializer, i16 %x2) 3422 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 -1) 3423 %res3 = add <16 x i32> %res, %res1 3424 %res4 = add <16 x i32> %res3, %res2 3425 ret <16 x i32> %res4 3426 } 3427 3428 3429 declare <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16>, <8 x i64>, i8) 3430 3431 define <8 x i64>@test_int_x86_avx512_mask_pmovsxw_q_512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) { 3432 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_q_512: 3433 ; X86: ## %bb.0: 3434 ; X86-NEXT: vpmovsxwq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x24,0xd0] 3435 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 3436 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3437 ; X86-NEXT: vpmovsxwq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x24,0xc8] 3438 ; X86-NEXT: vpmovsxwq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x24,0xc0] 3439 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 3440 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 3441 ; X86-NEXT: retl ## encoding: [0xc3] 3442 ; 3443 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_q_512: 3444 ; X64: ## %bb.0: 3445 ; X64-NEXT: vpmovsxwq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x24,0xd0] 3446 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3447 ; X64-NEXT: vpmovsxwq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x24,0xc8] 3448 ; X64-NEXT: vpmovsxwq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x24,0xc0] 3449 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 3450 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 3451 ; X64-NEXT: retq ## encoding: [0xc3] 3452 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) 3453 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> zeroinitializer, i8 %x2) 3454 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 -1) 3455 %res3 = add <8 x i64> %res, %res1 3456 %res4 = add <8 x i64> %res3, %res2 3457 ret <8 x i64> %res4 3458 } 3459 3460 declare <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64>, i32, <8 x i64>, i8) 3461 3462 define <8 x i64>@test_int_x86_avx512_mask_psrl_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) { 3463 ; X86-LABEL: test_int_x86_avx512_mask_psrl_qi_512: 3464 ; X86: ## %bb.0: 3465 ; X86-NEXT: vpsrlq $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xd0,0x04] 3466 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] 3467 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3468 ; X86-NEXT: vpsrlq $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x04] 3469 ; X86-NEXT: vpsrlq $4, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x04] 3470 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 3471 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 3472 ; X86-NEXT: retl ## encoding: [0xc3] 3473 ; 3474 ; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_512: 3475 ; X64: ## %bb.0: 3476 ; X64-NEXT: vpsrlq $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xd0,0x04] 3477 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3478 ; X64-NEXT: vpsrlq $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x04] 3479 ; X64-NEXT: vpsrlq $4, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x04] 3480 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 3481 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 3482 ; X64-NEXT: retq ## encoding: [0xc3] 3483 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 4, <8 x i64> %x2, i8 %x3) 3484 %res1 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 4, <8 x i64> %x2, i8 -1) 3485 %res2 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 4, <8 x i64> zeroinitializer, i8 %x3) 3486 %res3 = add <8 x i64> %res, %res1 3487 %res4 = add <8 x i64> %res3, %res2 3488 ret <8 x i64> %res4 3489 } 3490 3491 declare <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32>, i32, <16 x i32>, i16) 3492 3493 define <16 x i32>@test_int_x86_avx512_mask_psrl_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { 3494 ; X86-LABEL: test_int_x86_avx512_mask_psrl_di_512: 3495 ; X86: ## %bb.0: 3496 ; X86-NEXT: vpsrld $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xd0,0x04] 3497 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3498 ; X86-NEXT: vpsrld $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x04] 3499 ; X86-NEXT: vpsrld $4, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x04] 3500 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 3501 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 3502 ; X86-NEXT: retl ## encoding: [0xc3] 3503 ; 3504 ; X64-LABEL: test_int_x86_avx512_mask_psrl_di_512: 3505 ; X64: ## %bb.0: 3506 ; X64-NEXT: vpsrld $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xd0,0x04] 3507 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3508 ; X64-NEXT: vpsrld $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x04] 3509 ; X64-NEXT: vpsrld $4, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x04] 3510 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 3511 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 3512 ; X64-NEXT: retq ## encoding: [0xc3] 3513 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 4, <16 x i32> %x2, i16 %x3) 3514 %res1 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 4, <16 x i32> %x2, i16 -1) 3515 %res2 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 4, <16 x i32> zeroinitializer, i16 %x3) 3516 %res3 = add <16 x i32> %res, %res1 3517 %res4 = add <16 x i32> %res3, %res2 3518 ret <16 x i32> %res4 3519 } 3520 3521 declare <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32>, i32, <16 x i32>, i16) 3522 3523 define <16 x i32>@test_int_x86_avx512_mask_psra_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { 3524 ; X86-LABEL: test_int_x86_avx512_mask_psra_di_512: 3525 ; X86: ## %bb.0: 3526 ; X86-NEXT: vpsrad $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xe0,0x03] 3527 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3528 ; X86-NEXT: vpsrad $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x03] 3529 ; X86-NEXT: vpsrad $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xe0,0x03] 3530 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 3531 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 3532 ; X86-NEXT: retl ## encoding: [0xc3] 3533 ; 3534 ; X64-LABEL: test_int_x86_avx512_mask_psra_di_512: 3535 ; X64: ## %bb.0: 3536 ; X64-NEXT: vpsrad $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xe0,0x03] 3537 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3538 ; X64-NEXT: vpsrad $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x03] 3539 ; X64-NEXT: vpsrad $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xe0,0x03] 3540 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 3541 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 3542 ; X64-NEXT: retq ## encoding: [0xc3] 3543 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3) 3544 %res1 = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3) 3545 %res2 = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1) 3546 %res3 = add <16 x i32> %res, %res1 3547 %res4 = add <16 x i32> %res3, %res2 3548 ret <16 x i32> %res4 3549 } 3550 3551 declare <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64>, i32, <8 x i64>, i8) 3552 3553 define <8 x i64>@test_int_x86_avx512_mask_psra_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) { 3554 ; X86-LABEL: test_int_x86_avx512_mask_psra_qi_512: 3555 ; X86: ## %bb.0: 3556 ; X86-NEXT: vpsraq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xe0,0x03] 3557 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] 3558 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3559 ; X86-NEXT: vpsraq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x03] 3560 ; X86-NEXT: vpsraq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xe0,0x03] 3561 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 3562 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 3563 ; X86-NEXT: retl ## encoding: [0xc3] 3564 ; 3565 ; X64-LABEL: test_int_x86_avx512_mask_psra_qi_512: 3566 ; X64: ## %bb.0: 3567 ; X64-NEXT: vpsraq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xe0,0x03] 3568 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3569 ; X64-NEXT: vpsraq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x03] 3570 ; X64-NEXT: vpsraq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xe0,0x03] 3571 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 3572 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 3573 ; X64-NEXT: retq ## encoding: [0xc3] 3574 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3) 3575 %res1 = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3) 3576 %res2 = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1) 3577 %res3 = add <8 x i64> %res, %res1 3578 %res4 = add <8 x i64> %res3, %res2 3579 ret <8 x i64> %res4 3580 } 3581 3582 declare <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32>, i32, <16 x i32>, i16) 3583 3584 define <16 x i32>@test_int_x86_avx512_mask_psll_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { 3585 ; X86-LABEL: test_int_x86_avx512_mask_psll_di_512: 3586 ; X86: ## %bb.0: 3587 ; X86-NEXT: vpslld $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xf0,0x03] 3588 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3589 ; X86-NEXT: vpslld $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x03] 3590 ; X86-NEXT: vpslld $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xf0,0x03] 3591 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 3592 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 3593 ; X86-NEXT: retl ## encoding: [0xc3] 3594 ; 3595 ; X64-LABEL: test_int_x86_avx512_mask_psll_di_512: 3596 ; X64: ## %bb.0: 3597 ; X64-NEXT: vpslld $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xf0,0x03] 3598 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3599 ; X64-NEXT: vpslld $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x03] 3600 ; X64-NEXT: vpslld $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xf0,0x03] 3601 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 3602 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 3603 ; X64-NEXT: retq ## encoding: [0xc3] 3604 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3) 3605 %res1 = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3) 3606 %res2 = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1) 3607 %res3 = add <16 x i32> %res, %res1 3608 %res4 = add <16 x i32> %res3, %res2 3609 ret <16 x i32> %res4 3610 } 3611 3612 declare <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64>, i32, <8 x i64>, i8) 3613 3614 define <8 x i64>@test_int_x86_avx512_mask_psll_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) { 3615 ; X86-LABEL: test_int_x86_avx512_mask_psll_qi_512: 3616 ; X86: ## %bb.0: 3617 ; X86-NEXT: vpsllq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xf0,0x03] 3618 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] 3619 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3620 ; X86-NEXT: vpsllq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x03] 3621 ; X86-NEXT: vpsllq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xf0,0x03] 3622 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 3623 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 3624 ; X86-NEXT: retl ## encoding: [0xc3] 3625 ; 3626 ; X64-LABEL: test_int_x86_avx512_mask_psll_qi_512: 3627 ; X64: ## %bb.0: 3628 ; X64-NEXT: vpsllq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xf0,0x03] 3629 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3630 ; X64-NEXT: vpsllq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x03] 3631 ; X64-NEXT: vpsllq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xf0,0x03] 3632 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 3633 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 3634 ; X64-NEXT: retq ## encoding: [0xc3] 3635 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3) 3636 %res1 = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3) 3637 %res2 = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1) 3638 %res3 = add <8 x i64> %res, %res1 3639 %res4 = add <8 x i64> %res3, %res2 3640 ret <8 x i64> %res4 3641 } 3642 3643 define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) { 3644 ; CHECK-LABEL: test_x86_avx512_psll_d: 3645 ; CHECK: ## %bb.0: 3646 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf2,0xc1] 3647 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 3648 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 3649 ret <16 x i32> %res 3650 } 3651 3652 define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) { 3653 ; X86-LABEL: test_x86_avx512_mask_psll_d: 3654 ; X86: ## %bb.0: 3655 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3656 ; X86-NEXT: vpslld %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf2,0xd1] 3657 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3658 ; X86-NEXT: retl ## encoding: [0xc3] 3659 ; 3660 ; X64-LABEL: test_x86_avx512_mask_psll_d: 3661 ; X64: ## %bb.0: 3662 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3663 ; X64-NEXT: vpslld %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf2,0xd1] 3664 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3665 ; X64-NEXT: retq ## encoding: [0xc3] 3666 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) 3667 ret <16 x i32> %res 3668 } 3669 3670 define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 3671 ; X86-LABEL: test_x86_avx512_maskz_psll_d: 3672 ; X86: ## %bb.0: 3673 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3674 ; X86-NEXT: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf2,0xc1] 3675 ; X86-NEXT: retl ## encoding: [0xc3] 3676 ; 3677 ; X64-LABEL: test_x86_avx512_maskz_psll_d: 3678 ; X64: ## %bb.0: 3679 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3680 ; X64-NEXT: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf2,0xc1] 3681 ; X64-NEXT: retq ## encoding: [0xc3] 3682 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 3683 ret <16 x i32> %res 3684 } 3685 3686 declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone 3687 3688 define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) { 3689 ; CHECK-LABEL: test_x86_avx512_psll_q: 3690 ; CHECK: ## %bb.0: 3691 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf3,0xc1] 3692 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 3693 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 3694 ret <8 x i64> %res 3695 } 3696 3697 define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) { 3698 ; X86-LABEL: test_x86_avx512_mask_psll_q: 3699 ; X86: ## %bb.0: 3700 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 3701 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3702 ; X86-NEXT: vpsllq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf3,0xd1] 3703 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3704 ; X86-NEXT: retl ## encoding: [0xc3] 3705 ; 3706 ; X64-LABEL: test_x86_avx512_mask_psll_q: 3707 ; X64: ## %bb.0: 3708 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3709 ; X64-NEXT: vpsllq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf3,0xd1] 3710 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3711 ; X64-NEXT: retq ## encoding: [0xc3] 3712 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) 3713 ret <8 x i64> %res 3714 } 3715 3716 define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 3717 ; X86-LABEL: test_x86_avx512_maskz_psll_q: 3718 ; X86: ## %bb.0: 3719 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 3720 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3721 ; X86-NEXT: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf3,0xc1] 3722 ; X86-NEXT: retl ## encoding: [0xc3] 3723 ; 3724 ; X64-LABEL: test_x86_avx512_maskz_psll_q: 3725 ; X64: ## %bb.0: 3726 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3727 ; X64-NEXT: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf3,0xc1] 3728 ; X64-NEXT: retq ## encoding: [0xc3] 3729 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 3730 ret <8 x i64> %res 3731 } 3732 3733 declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone 3734 3735 define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) { 3736 ; CHECK-LABEL: test_x86_avx512_psrl_d: 3737 ; CHECK: ## %bb.0: 3738 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd2,0xc1] 3739 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 3740 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 3741 ret <16 x i32> %res 3742 } 3743 3744 define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) { 3745 ; X86-LABEL: test_x86_avx512_mask_psrl_d: 3746 ; X86: ## %bb.0: 3747 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3748 ; X86-NEXT: vpsrld %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd2,0xd1] 3749 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3750 ; X86-NEXT: retl ## encoding: [0xc3] 3751 ; 3752 ; X64-LABEL: test_x86_avx512_mask_psrl_d: 3753 ; X64: ## %bb.0: 3754 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3755 ; X64-NEXT: vpsrld %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd2,0xd1] 3756 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3757 ; X64-NEXT: retq ## encoding: [0xc3] 3758 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) 3759 ret <16 x i32> %res 3760 } 3761 3762 define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 3763 ; X86-LABEL: test_x86_avx512_maskz_psrl_d: 3764 ; X86: ## %bb.0: 3765 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3766 ; X86-NEXT: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd2,0xc1] 3767 ; X86-NEXT: retl ## encoding: [0xc3] 3768 ; 3769 ; X64-LABEL: test_x86_avx512_maskz_psrl_d: 3770 ; X64: ## %bb.0: 3771 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3772 ; X64-NEXT: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd2,0xc1] 3773 ; X64-NEXT: retq ## encoding: [0xc3] 3774 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 3775 ret <16 x i32> %res 3776 } 3777 3778 declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone 3779 3780 define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) { 3781 ; CHECK-LABEL: test_x86_avx512_psrl_q: 3782 ; CHECK: ## %bb.0: 3783 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd3,0xc1] 3784 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 3785 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 3786 ret <8 x i64> %res 3787 } 3788 3789 define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) { 3790 ; X86-LABEL: test_x86_avx512_mask_psrl_q: 3791 ; X86: ## %bb.0: 3792 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 3793 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3794 ; X86-NEXT: vpsrlq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd3,0xd1] 3795 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3796 ; X86-NEXT: retl ## encoding: [0xc3] 3797 ; 3798 ; X64-LABEL: test_x86_avx512_mask_psrl_q: 3799 ; X64: ## %bb.0: 3800 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3801 ; X64-NEXT: vpsrlq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd3,0xd1] 3802 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3803 ; X64-NEXT: retq ## encoding: [0xc3] 3804 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) 3805 ret <8 x i64> %res 3806 } 3807 3808 define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 3809 ; X86-LABEL: test_x86_avx512_maskz_psrl_q: 3810 ; X86: ## %bb.0: 3811 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 3812 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3813 ; X86-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd3,0xc1] 3814 ; X86-NEXT: retl ## encoding: [0xc3] 3815 ; 3816 ; X64-LABEL: test_x86_avx512_maskz_psrl_q: 3817 ; X64: ## %bb.0: 3818 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3819 ; X64-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd3,0xc1] 3820 ; X64-NEXT: retq ## encoding: [0xc3] 3821 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 3822 ret <8 x i64> %res 3823 } 3824 3825 declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone 3826 3827 define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) { 3828 ; CHECK-LABEL: test_x86_avx512_psra_d: 3829 ; CHECK: ## %bb.0: 3830 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xe2,0xc1] 3831 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 3832 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 3833 ret <16 x i32> %res 3834 } 3835 3836 define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) { 3837 ; X86-LABEL: test_x86_avx512_mask_psra_d: 3838 ; X86: ## %bb.0: 3839 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3840 ; X86-NEXT: vpsrad %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xe2,0xd1] 3841 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3842 ; X86-NEXT: retl ## encoding: [0xc3] 3843 ; 3844 ; X64-LABEL: test_x86_avx512_mask_psra_d: 3845 ; X64: ## %bb.0: 3846 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3847 ; X64-NEXT: vpsrad %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xe2,0xd1] 3848 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3849 ; X64-NEXT: retq ## encoding: [0xc3] 3850 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) 3851 ret <16 x i32> %res 3852 } 3853 3854 define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 3855 ; X86-LABEL: test_x86_avx512_maskz_psra_d: 3856 ; X86: ## %bb.0: 3857 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3858 ; X86-NEXT: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xe2,0xc1] 3859 ; X86-NEXT: retl ## encoding: [0xc3] 3860 ; 3861 ; X64-LABEL: test_x86_avx512_maskz_psra_d: 3862 ; X64: ## %bb.0: 3863 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3864 ; X64-NEXT: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xe2,0xc1] 3865 ; X64-NEXT: retq ## encoding: [0xc3] 3866 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 3867 ret <16 x i32> %res 3868 } 3869 3870 declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone 3871 3872 define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) { 3873 ; CHECK-LABEL: test_x86_avx512_psra_q: 3874 ; CHECK: ## %bb.0: 3875 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xe2,0xc1] 3876 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 3877 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 3878 ret <8 x i64> %res 3879 } 3880 3881 define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) { 3882 ; X86-LABEL: test_x86_avx512_mask_psra_q: 3883 ; X86: ## %bb.0: 3884 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 3885 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3886 ; X86-NEXT: vpsraq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xe2,0xd1] 3887 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3888 ; X86-NEXT: retl ## encoding: [0xc3] 3889 ; 3890 ; X64-LABEL: test_x86_avx512_mask_psra_q: 3891 ; X64: ## %bb.0: 3892 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3893 ; X64-NEXT: vpsraq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xe2,0xd1] 3894 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3895 ; X64-NEXT: retq ## encoding: [0xc3] 3896 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) 3897 ret <8 x i64> %res 3898 } 3899 3900 define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 3901 ; X86-LABEL: test_x86_avx512_maskz_psra_q: 3902 ; X86: ## %bb.0: 3903 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 3904 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3905 ; X86-NEXT: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xe2,0xc1] 3906 ; X86-NEXT: retl ## encoding: [0xc3] 3907 ; 3908 ; X64-LABEL: test_x86_avx512_maskz_psra_q: 3909 ; X64: ## %bb.0: 3910 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3911 ; X64-NEXT: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xe2,0xc1] 3912 ; X64-NEXT: retq ## encoding: [0xc3] 3913 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 3914 ret <8 x i64> %res 3915 } 3916 3917 declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone 3918 3919 define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) { 3920 ; CHECK-LABEL: test_x86_avx512_psllv_d: 3921 ; CHECK: ## %bb.0: 3922 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x47,0xc1] 3923 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 3924 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 3925 ret <16 x i32> %res 3926 } 3927 3928 define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 3929 ; X86-LABEL: test_x86_avx512_mask_psllv_d: 3930 ; X86: ## %bb.0: 3931 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3932 ; X86-NEXT: vpsllvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x47,0xd1] 3933 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3934 ; X86-NEXT: retl ## encoding: [0xc3] 3935 ; 3936 ; X64-LABEL: test_x86_avx512_mask_psllv_d: 3937 ; X64: ## %bb.0: 3938 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3939 ; X64-NEXT: vpsllvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x47,0xd1] 3940 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3941 ; X64-NEXT: retq ## encoding: [0xc3] 3942 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) 3943 ret <16 x i32> %res 3944 } 3945 3946 define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 3947 ; X86-LABEL: test_x86_avx512_maskz_psllv_d: 3948 ; X86: ## %bb.0: 3949 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3950 ; X86-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x47,0xc1] 3951 ; X86-NEXT: retl ## encoding: [0xc3] 3952 ; 3953 ; X64-LABEL: test_x86_avx512_maskz_psllv_d: 3954 ; X64: ## %bb.0: 3955 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3956 ; X64-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x47,0xc1] 3957 ; X64-NEXT: retq ## encoding: [0xc3] 3958 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 3959 ret <16 x i32> %res 3960 } 3961 3962 declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone 3963 3964 define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) { 3965 ; CHECK-LABEL: test_x86_avx512_psllv_q: 3966 ; CHECK: ## %bb.0: 3967 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x47,0xc1] 3968 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 3969 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 3970 ret <8 x i64> %res 3971 } 3972 3973 define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 3974 ; X86-LABEL: test_x86_avx512_mask_psllv_q: 3975 ; X86: ## %bb.0: 3976 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 3977 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3978 ; X86-NEXT: vpsllvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x47,0xd1] 3979 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3980 ; X86-NEXT: retl ## encoding: [0xc3] 3981 ; 3982 ; X64-LABEL: test_x86_avx512_mask_psllv_q: 3983 ; X64: ## %bb.0: 3984 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3985 ; X64-NEXT: vpsllvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x47,0xd1] 3986 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 3987 ; X64-NEXT: retq ## encoding: [0xc3] 3988 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) 3989 ret <8 x i64> %res 3990 } 3991 3992 define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 3993 ; X86-LABEL: test_x86_avx512_maskz_psllv_q: 3994 ; X86: ## %bb.0: 3995 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 3996 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 3997 ; X86-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x47,0xc1] 3998 ; X86-NEXT: retl ## encoding: [0xc3] 3999 ; 4000 ; X64-LABEL: test_x86_avx512_maskz_psllv_q: 4001 ; X64: ## %bb.0: 4002 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4003 ; X64-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x47,0xc1] 4004 ; X64-NEXT: retq ## encoding: [0xc3] 4005 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 4006 ret <8 x i64> %res 4007 } 4008 4009 declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone 4010 4011 4012 define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) { 4013 ; CHECK-LABEL: test_x86_avx512_psrav_d: 4014 ; CHECK: ## %bb.0: 4015 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x46,0xc1] 4016 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 4017 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 4018 ret <16 x i32> %res 4019 } 4020 4021 define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 4022 ; X86-LABEL: test_x86_avx512_mask_psrav_d: 4023 ; X86: ## %bb.0: 4024 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4025 ; X86-NEXT: vpsravd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x46,0xd1] 4026 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4027 ; X86-NEXT: retl ## encoding: [0xc3] 4028 ; 4029 ; X64-LABEL: test_x86_avx512_mask_psrav_d: 4030 ; X64: ## %bb.0: 4031 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4032 ; X64-NEXT: vpsravd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x46,0xd1] 4033 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4034 ; X64-NEXT: retq ## encoding: [0xc3] 4035 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) 4036 ret <16 x i32> %res 4037 } 4038 4039 define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 4040 ; X86-LABEL: test_x86_avx512_maskz_psrav_d: 4041 ; X86: ## %bb.0: 4042 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4043 ; X86-NEXT: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x46,0xc1] 4044 ; X86-NEXT: retl ## encoding: [0xc3] 4045 ; 4046 ; X64-LABEL: test_x86_avx512_maskz_psrav_d: 4047 ; X64: ## %bb.0: 4048 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4049 ; X64-NEXT: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x46,0xc1] 4050 ; X64-NEXT: retq ## encoding: [0xc3] 4051 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 4052 ret <16 x i32> %res 4053 } 4054 4055 declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone 4056 4057 define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) { 4058 ; CHECK-LABEL: test_x86_avx512_psrav_q: 4059 ; CHECK: ## %bb.0: 4060 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x46,0xc1] 4061 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 4062 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 4063 ret <8 x i64> %res 4064 } 4065 4066 define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 4067 ; X86-LABEL: test_x86_avx512_mask_psrav_q: 4068 ; X86: ## %bb.0: 4069 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 4070 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4071 ; X86-NEXT: vpsravq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x46,0xd1] 4072 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4073 ; X86-NEXT: retl ## encoding: [0xc3] 4074 ; 4075 ; X64-LABEL: test_x86_avx512_mask_psrav_q: 4076 ; X64: ## %bb.0: 4077 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4078 ; X64-NEXT: vpsravq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x46,0xd1] 4079 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4080 ; X64-NEXT: retq ## encoding: [0xc3] 4081 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) 4082 ret <8 x i64> %res 4083 } 4084 4085 define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 4086 ; X86-LABEL: test_x86_avx512_maskz_psrav_q: 4087 ; X86: ## %bb.0: 4088 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 4089 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4090 ; X86-NEXT: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x46,0xc1] 4091 ; X86-NEXT: retl ## encoding: [0xc3] 4092 ; 4093 ; X64-LABEL: test_x86_avx512_maskz_psrav_q: 4094 ; X64: ## %bb.0: 4095 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4096 ; X64-NEXT: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x46,0xc1] 4097 ; X64-NEXT: retq ## encoding: [0xc3] 4098 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 4099 ret <8 x i64> %res 4100 } 4101 4102 declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone 4103 4104 define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) { 4105 ; CHECK-LABEL: test_x86_avx512_psrlv_d: 4106 ; CHECK: ## %bb.0: 4107 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x45,0xc1] 4108 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 4109 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 4110 ret <16 x i32> %res 4111 } 4112 4113 define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 4114 ; X86-LABEL: test_x86_avx512_mask_psrlv_d: 4115 ; X86: ## %bb.0: 4116 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4117 ; X86-NEXT: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x45,0xd1] 4118 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4119 ; X86-NEXT: retl ## encoding: [0xc3] 4120 ; 4121 ; X64-LABEL: test_x86_avx512_mask_psrlv_d: 4122 ; X64: ## %bb.0: 4123 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4124 ; X64-NEXT: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x45,0xd1] 4125 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4126 ; X64-NEXT: retq ## encoding: [0xc3] 4127 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) 4128 ret <16 x i32> %res 4129 } 4130 4131 define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 4132 ; X86-LABEL: test_x86_avx512_maskz_psrlv_d: 4133 ; X86: ## %bb.0: 4134 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4135 ; X86-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x45,0xc1] 4136 ; X86-NEXT: retl ## encoding: [0xc3] 4137 ; 4138 ; X64-LABEL: test_x86_avx512_maskz_psrlv_d: 4139 ; X64: ## %bb.0: 4140 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4141 ; X64-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x45,0xc1] 4142 ; X64-NEXT: retq ## encoding: [0xc3] 4143 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 4144 ret <16 x i32> %res 4145 } 4146 4147 declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone 4148 4149 define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) { 4150 ; CHECK-LABEL: test_x86_avx512_psrlv_q: 4151 ; CHECK: ## %bb.0: 4152 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x45,0xc1] 4153 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 4154 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 4155 ret <8 x i64> %res 4156 } 4157 4158 define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 4159 ; X86-LABEL: test_x86_avx512_mask_psrlv_q: 4160 ; X86: ## %bb.0: 4161 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 4162 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4163 ; X86-NEXT: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x45,0xd1] 4164 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4165 ; X86-NEXT: retl ## encoding: [0xc3] 4166 ; 4167 ; X64-LABEL: test_x86_avx512_mask_psrlv_q: 4168 ; X64: ## %bb.0: 4169 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4170 ; X64-NEXT: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x45,0xd1] 4171 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4172 ; X64-NEXT: retq ## encoding: [0xc3] 4173 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) 4174 ret <8 x i64> %res 4175 } 4176 4177 define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 4178 ; X86-LABEL: test_x86_avx512_maskz_psrlv_q: 4179 ; X86: ## %bb.0: 4180 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 4181 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4182 ; X86-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x45,0xc1] 4183 ; X86-NEXT: retl ## encoding: [0xc3] 4184 ; 4185 ; X64-LABEL: test_x86_avx512_maskz_psrlv_q: 4186 ; X64: ## %bb.0: 4187 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4188 ; X64-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x45,0xc1] 4189 ; X64-NEXT: retq ## encoding: [0xc3] 4190 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 4191 ret <8 x i64> %res 4192 } 4193 4194 declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone 4195 4196 define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) { 4197 ; X86-LABEL: test_x86_avx512_psrlv_q_memop: 4198 ; X86: ## %bb.0: 4199 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 4200 ; X86-NEXT: vpsrlvq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x45,0x00] 4201 ; X86-NEXT: retl ## encoding: [0xc3] 4202 ; 4203 ; X64-LABEL: test_x86_avx512_psrlv_q_memop: 4204 ; X64: ## %bb.0: 4205 ; X64-NEXT: vpsrlvq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x45,0x07] 4206 ; X64-NEXT: retq ## encoding: [0xc3] 4207 %b = load <8 x i64>, <8 x i64>* %ptr 4208 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 4209 ret <8 x i64> %res 4210 } 4211 4212 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8) 4213 4214 define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) { 4215 ; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512: 4216 ; X86: ## %bb.0: 4217 ; X86-NEXT: vcvtdq2pd %ymm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0xe6,0xd0] 4218 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 4219 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4220 ; X86-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0xe6,0xc8] 4221 ; X86-NEXT: vaddpd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc2] 4222 ; X86-NEXT: retl ## encoding: [0xc3] 4223 ; 4224 ; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512: 4225 ; X64: ## %bb.0: 4226 ; X64-NEXT: vcvtdq2pd %ymm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0xe6,0xd0] 4227 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4228 ; X64-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0xe6,0xc8] 4229 ; X64-NEXT: vaddpd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc2] 4230 ; X64-NEXT: retq ## encoding: [0xc3] 4231 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) 4232 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1) 4233 %res2 = fadd <8 x double> %res, %res1 4234 ret <8 x double> %res2 4235 } 4236 4237 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8) 4238 4239 define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) { 4240 ; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512: 4241 ; X86: ## %bb.0: 4242 ; X86-NEXT: vcvtudq2pd %ymm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0x7a,0xd0] 4243 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 4244 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4245 ; X86-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x7a,0xc8] 4246 ; X86-NEXT: vaddpd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc2] 4247 ; X86-NEXT: retl ## encoding: [0xc3] 4248 ; 4249 ; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512: 4250 ; X64: ## %bb.0: 4251 ; X64-NEXT: vcvtudq2pd %ymm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0x7a,0xd0] 4252 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4253 ; X64-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x7a,0xc8] 4254 ; X64-NEXT: vaddpd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc2] 4255 ; X64-NEXT: retq ## encoding: [0xc3] 4256 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) 4257 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1) 4258 %res2 = fadd <8 x double> %res, %res1 4259 ret <8 x double> %res2 4260 } 4261 4262 define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) { 4263 ; CHECK-LABEL: test_valign_q: 4264 ; CHECK: ## %bb.0: 4265 ; CHECK-NEXT: valignq $2, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x03,0xc1,0x02] 4266 ; CHECK-NEXT: ## zmm0 = zmm1[2,3,4,5,6,7],zmm0[0,1] 4267 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 4268 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1) 4269 ret <8 x i64> %res 4270 } 4271 4272 define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) { 4273 ; X86-LABEL: test_mask_valign_q: 4274 ; X86: ## %bb.0: 4275 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 4276 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4277 ; X86-NEXT: valignq $2, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x03,0xd1,0x02] 4278 ; X86-NEXT: ## zmm2 {%k1} = zmm1[2,3,4,5,6,7],zmm0[0,1] 4279 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4280 ; X86-NEXT: retl ## encoding: [0xc3] 4281 ; 4282 ; X64-LABEL: test_mask_valign_q: 4283 ; X64: ## %bb.0: 4284 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4285 ; X64-NEXT: valignq $2, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x03,0xd1,0x02] 4286 ; X64-NEXT: ## zmm2 {%k1} = zmm1[2,3,4,5,6,7],zmm0[0,1] 4287 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4288 ; X64-NEXT: retq ## encoding: [0xc3] 4289 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask) 4290 ret <8 x i64> %res 4291 } 4292 4293 declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8) 4294 4295 define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 4296 ; X86-LABEL: test_maskz_valign_d: 4297 ; X86: ## %bb.0: 4298 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4299 ; X86-NEXT: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05] 4300 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm1[5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1,2,3,4] 4301 ; X86-NEXT: retl ## encoding: [0xc3] 4302 ; 4303 ; X64-LABEL: test_maskz_valign_d: 4304 ; X64: ## %bb.0: 4305 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4306 ; X64-NEXT: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05] 4307 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm1[5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1,2,3,4] 4308 ; X64-NEXT: retq ## encoding: [0xc3] 4309 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask) 4310 ret <16 x i32> %res 4311 } 4312 4313 declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16) 4314 4315 declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8) 4316 4317 define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) { 4318 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512: 4319 ; X86: ## %bb.0: 4320 ; X86-NEXT: vpermilpd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x0d,0xd9] 4321 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 4322 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4323 ; X86-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x0d,0xd1] 4324 ; X86-NEXT: vpermilpd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x0d,0xc1] 4325 ; X86-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0] 4326 ; X86-NEXT: vaddpd %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xe5,0x48,0x58,0xc0] 4327 ; X86-NEXT: retl ## encoding: [0xc3] 4328 ; 4329 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512: 4330 ; X64: ## %bb.0: 4331 ; X64-NEXT: vpermilpd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x0d,0xd9] 4332 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4333 ; X64-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x0d,0xd1] 4334 ; X64-NEXT: vpermilpd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x0d,0xc1] 4335 ; X64-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0] 4336 ; X64-NEXT: vaddpd %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xe5,0x48,0x58,0xc0] 4337 ; X64-NEXT: retq ## encoding: [0xc3] 4338 %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) 4339 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3) 4340 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1) 4341 %res3 = fadd <8 x double> %res, %res1 4342 %res4 = fadd <8 x double> %res2, %res3 4343 ret <8 x double> %res4 4344 } 4345 4346 declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16) 4347 4348 define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) { 4349 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512: 4350 ; X86: ## %bb.0: 4351 ; X86-NEXT: vpermilps %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0xd9] 4352 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4353 ; X86-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0xd1] 4354 ; X86-NEXT: vpermilps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0xc1] 4355 ; X86-NEXT: vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0] 4356 ; X86-NEXT: vaddps %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0x64,0x48,0x58,0xc0] 4357 ; X86-NEXT: retl ## encoding: [0xc3] 4358 ; 4359 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512: 4360 ; X64: ## %bb.0: 4361 ; X64-NEXT: vpermilps %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0xd9] 4362 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4363 ; X64-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0xd1] 4364 ; X64-NEXT: vpermilps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0xc1] 4365 ; X64-NEXT: vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0] 4366 ; X64-NEXT: vaddps %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0x64,0x48,0x58,0xc0] 4367 ; X64-NEXT: retq ## encoding: [0xc3] 4368 %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) 4369 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3) 4370 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1) 4371 %res3 = fadd <16 x float> %res, %res1 4372 %res4 = fadd <16 x float> %res2, %res3 4373 ret <16 x float> %res4 4374 } 4375 4376 ; Test case to make sure we can print shuffle decode comments for constant pool loads. 4377 define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) { 4378 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool: 4379 ; X86: ## %bb.0: 4380 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4381 ; X86-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15] 4382 ; X86-NEXT: ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0x15,A,A,A,A] 4383 ; X86-NEXT: ## fixup A - offset: 6, value: LCPI203_0, kind: FK_Data_4 4384 ; X86-NEXT: vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] 4385 ; X86-NEXT: ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0x0d,A,A,A,A] 4386 ; X86-NEXT: ## fixup A - offset: 6, value: LCPI203_1, kind: FK_Data_4 4387 ; X86-NEXT: vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9] 4388 ; X86-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12] 4389 ; X86-NEXT: ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0x05,A,A,A,A] 4390 ; X86-NEXT: ## fixup A - offset: 6, value: LCPI203_2, kind: FK_Data_4 4391 ; X86-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1] 4392 ; X86-NEXT: retl ## encoding: [0xc3] 4393 ; 4394 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool: 4395 ; X64: ## %bb.0: 4396 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4397 ; X64-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15] 4398 ; X64-NEXT: ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0x15,A,A,A,A] 4399 ; X64-NEXT: ## fixup A - offset: 6, value: LCPI203_0-4, kind: reloc_riprel_4byte 4400 ; X64-NEXT: vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] 4401 ; X64-NEXT: ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0x0d,A,A,A,A] 4402 ; X64-NEXT: ## fixup A - offset: 6, value: LCPI203_1-4, kind: reloc_riprel_4byte 4403 ; X64-NEXT: vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9] 4404 ; X64-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12] 4405 ; X64-NEXT: ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0x05,A,A,A,A] 4406 ; X64-NEXT: ## fixup A - offset: 6, value: LCPI203_2-4, kind: reloc_riprel_4byte 4407 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1] 4408 ; X64-NEXT: retq ## encoding: [0xc3] 4409 %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> %x2, i16 %x3) 4410 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> zeroinitializer, i16 %x3) 4411 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>, <16 x float> %x2, i16 -1) 4412 %res3 = fadd <16 x float> %res, %res1 4413 %res4 = fadd <16 x float> %res2, %res3 4414 ret <16 x float> %res4 4415 } 4416 4417 define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) { 4418 ; CHECK-LABEL: test_mask_mul_epi32_rr: 4419 ; CHECK: ## %bb.0: 4420 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1] 4421 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 4422 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 4423 ret < 8 x i64> %res 4424 } 4425 4426 define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) { 4427 ; X86-LABEL: test_mask_mul_epi32_rrk: 4428 ; X86: ## %bb.0: 4429 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 4430 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4431 ; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1] 4432 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4433 ; X86-NEXT: retl ## encoding: [0xc3] 4434 ; 4435 ; X64-LABEL: test_mask_mul_epi32_rrk: 4436 ; X64: ## %bb.0: 4437 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4438 ; X64-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1] 4439 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4440 ; X64-NEXT: retq ## encoding: [0xc3] 4441 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 4442 ret < 8 x i64> %res 4443 } 4444 4445 define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) { 4446 ; X86-LABEL: test_mask_mul_epi32_rrkz: 4447 ; X86: ## %bb.0: 4448 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 4449 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4450 ; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1] 4451 ; X86-NEXT: retl ## encoding: [0xc3] 4452 ; 4453 ; X64-LABEL: test_mask_mul_epi32_rrkz: 4454 ; X64: ## %bb.0: 4455 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4456 ; X64-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1] 4457 ; X64-NEXT: retq ## encoding: [0xc3] 4458 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 4459 ret < 8 x i64> %res 4460 } 4461 4462 define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 4463 ; X86-LABEL: test_mask_mul_epi32_rm: 4464 ; X86: ## %bb.0: 4465 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 4466 ; X86-NEXT: vpmuldq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x00] 4467 ; X86-NEXT: retl ## encoding: [0xc3] 4468 ; 4469 ; X64-LABEL: test_mask_mul_epi32_rm: 4470 ; X64: ## %bb.0: 4471 ; X64-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07] 4472 ; X64-NEXT: retq ## encoding: [0xc3] 4473 %b = load <16 x i32>, <16 x i32>* %ptr_b 4474 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 4475 ret < 8 x i64> %res 4476 } 4477 4478 define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 4479 ; X86-LABEL: test_mask_mul_epi32_rmk: 4480 ; X86: ## %bb.0: 4481 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 4482 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4483 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 4484 ; X86-NEXT: vpmuldq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x08] 4485 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 4486 ; X86-NEXT: retl ## encoding: [0xc3] 4487 ; 4488 ; X64-LABEL: test_mask_mul_epi32_rmk: 4489 ; X64: ## %bb.0: 4490 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 4491 ; X64-NEXT: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f] 4492 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 4493 ; X64-NEXT: retq ## encoding: [0xc3] 4494 %b = load <16 x i32>, <16 x i32>* %ptr_b 4495 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 4496 ret < 8 x i64> %res 4497 } 4498 4499 define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) { 4500 ; X86-LABEL: test_mask_mul_epi32_rmkz: 4501 ; X86: ## %bb.0: 4502 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 4503 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4504 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 4505 ; X86-NEXT: vpmuldq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x00] 4506 ; X86-NEXT: retl ## encoding: [0xc3] 4507 ; 4508 ; X64-LABEL: test_mask_mul_epi32_rmkz: 4509 ; X64: ## %bb.0: 4510 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 4511 ; X64-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07] 4512 ; X64-NEXT: retq ## encoding: [0xc3] 4513 %b = load <16 x i32>, <16 x i32>* %ptr_b 4514 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 4515 ret < 8 x i64> %res 4516 } 4517 4518 define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) { 4519 ; X86-LABEL: test_mask_mul_epi32_rmb: 4520 ; X86: ## %bb.0: 4521 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 4522 ; X86-NEXT: vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 4523 ; X86-NEXT: ## xmm1 = mem[0],zero 4524 ; X86-NEXT: vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9] 4525 ; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1] 4526 ; X86-NEXT: retl ## encoding: [0xc3] 4527 ; 4528 ; X64-LABEL: test_mask_mul_epi32_rmb: 4529 ; X64: ## %bb.0: 4530 ; X64-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07] 4531 ; X64-NEXT: retq ## encoding: [0xc3] 4532 %q = load i64, i64* %ptr_b 4533 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 4534 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 4535 %b = bitcast <8 x i64> %b64 to <16 x i32> 4536 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 4537 ret < 8 x i64> %res 4538 } 4539 4540 define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 4541 ; X86-LABEL: test_mask_mul_epi32_rmbk: 4542 ; X86: ## %bb.0: 4543 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 4544 ; X86-NEXT: vmovq (%eax), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10] 4545 ; X86-NEXT: ## xmm2 = mem[0],zero 4546 ; X86-NEXT: vpbroadcastq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2] 4547 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] 4548 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4549 ; X86-NEXT: vpmuldq %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xca] 4550 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 4551 ; X86-NEXT: retl ## encoding: [0xc3] 4552 ; 4553 ; X64-LABEL: test_mask_mul_epi32_rmbk: 4554 ; X64: ## %bb.0: 4555 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 4556 ; X64-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f] 4557 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 4558 ; X64-NEXT: retq ## encoding: [0xc3] 4559 %q = load i64, i64* %ptr_b 4560 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 4561 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 4562 %b = bitcast <8 x i64> %b64 to <16 x i32> 4563 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 4564 ret < 8 x i64> %res 4565 } 4566 4567 define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) { 4568 ; X86-LABEL: test_mask_mul_epi32_rmbkz: 4569 ; X86: ## %bb.0: 4570 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 4571 ; X86-NEXT: vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 4572 ; X86-NEXT: ## xmm1 = mem[0],zero 4573 ; X86-NEXT: vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9] 4574 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] 4575 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4576 ; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1] 4577 ; X86-NEXT: retl ## encoding: [0xc3] 4578 ; 4579 ; X64-LABEL: test_mask_mul_epi32_rmbkz: 4580 ; X64: ## %bb.0: 4581 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 4582 ; X64-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07] 4583 ; X64-NEXT: retq ## encoding: [0xc3] 4584 %q = load i64, i64* %ptr_b 4585 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 4586 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 4587 %b = bitcast <8 x i64> %b64 to <16 x i32> 4588 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 4589 ret < 8 x i64> %res 4590 } 4591 4592 declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8) 4593 4594 define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) { 4595 ; CHECK-LABEL: test_mask_mul_epu32_rr: 4596 ; CHECK: ## %bb.0: 4597 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1] 4598 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 4599 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 4600 ret < 8 x i64> %res 4601 } 4602 4603 define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) { 4604 ; X86-LABEL: test_mask_mul_epu32_rrk: 4605 ; X86: ## %bb.0: 4606 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 4607 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4608 ; X86-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1] 4609 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4610 ; X86-NEXT: retl ## encoding: [0xc3] 4611 ; 4612 ; X64-LABEL: test_mask_mul_epu32_rrk: 4613 ; X64: ## %bb.0: 4614 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4615 ; X64-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1] 4616 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 4617 ; X64-NEXT: retq ## encoding: [0xc3] 4618 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 4619 ret < 8 x i64> %res 4620 } 4621 4622 define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) { 4623 ; X86-LABEL: test_mask_mul_epu32_rrkz: 4624 ; X86: ## %bb.0: 4625 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 4626 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4627 ; X86-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1] 4628 ; X86-NEXT: retl ## encoding: [0xc3] 4629 ; 4630 ; X64-LABEL: test_mask_mul_epu32_rrkz: 4631 ; X64: ## %bb.0: 4632 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4633 ; X64-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1] 4634 ; X64-NEXT: retq ## encoding: [0xc3] 4635 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 4636 ret < 8 x i64> %res 4637 } 4638 4639 define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 4640 ; X86-LABEL: test_mask_mul_epu32_rm: 4641 ; X86: ## %bb.0: 4642 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 4643 ; X86-NEXT: vpmuludq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x00] 4644 ; X86-NEXT: retl ## encoding: [0xc3] 4645 ; 4646 ; X64-LABEL: test_mask_mul_epu32_rm: 4647 ; X64: ## %bb.0: 4648 ; X64-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07] 4649 ; X64-NEXT: retq ## encoding: [0xc3] 4650 %b = load <16 x i32>, <16 x i32>* %ptr_b 4651 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 4652 ret < 8 x i64> %res 4653 } 4654 4655 define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 4656 ; X86-LABEL: test_mask_mul_epu32_rmk: 4657 ; X86: ## %bb.0: 4658 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 4659 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4660 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 4661 ; X86-NEXT: vpmuludq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x08] 4662 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 4663 ; X86-NEXT: retl ## encoding: [0xc3] 4664 ; 4665 ; X64-LABEL: test_mask_mul_epu32_rmk: 4666 ; X64: ## %bb.0: 4667 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 4668 ; X64-NEXT: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f] 4669 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 4670 ; X64-NEXT: retq ## encoding: [0xc3] 4671 %b = load <16 x i32>, <16 x i32>* %ptr_b 4672 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 4673 ret < 8 x i64> %res 4674 } 4675 4676 define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) { 4677 ; X86-LABEL: test_mask_mul_epu32_rmkz: 4678 ; X86: ## %bb.0: 4679 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 4680 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4681 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 4682 ; X86-NEXT: vpmuludq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x00] 4683 ; X86-NEXT: retl ## encoding: [0xc3] 4684 ; 4685 ; X64-LABEL: test_mask_mul_epu32_rmkz: 4686 ; X64: ## %bb.0: 4687 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 4688 ; X64-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07] 4689 ; X64-NEXT: retq ## encoding: [0xc3] 4690 %b = load <16 x i32>, <16 x i32>* %ptr_b 4691 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 4692 ret < 8 x i64> %res 4693 } 4694 4695 define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) { 4696 ; X86-LABEL: test_mask_mul_epu32_rmb: 4697 ; X86: ## %bb.0: 4698 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 4699 ; X86-NEXT: vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 4700 ; X86-NEXT: ## xmm1 = mem[0],zero 4701 ; X86-NEXT: vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9] 4702 ; X86-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1] 4703 ; X86-NEXT: retl ## encoding: [0xc3] 4704 ; 4705 ; X64-LABEL: test_mask_mul_epu32_rmb: 4706 ; X64: ## %bb.0: 4707 ; X64-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07] 4708 ; X64-NEXT: retq ## encoding: [0xc3] 4709 %q = load i64, i64* %ptr_b 4710 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 4711 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 4712 %b = bitcast <8 x i64> %b64 to <16 x i32> 4713 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 4714 ret < 8 x i64> %res 4715 } 4716 4717 define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 4718 ; X86-LABEL: test_mask_mul_epu32_rmbk: 4719 ; X86: ## %bb.0: 4720 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 4721 ; X86-NEXT: vmovq (%eax), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10] 4722 ; X86-NEXT: ## xmm2 = mem[0],zero 4723 ; X86-NEXT: vpbroadcastq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2] 4724 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] 4725 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4726 ; X86-NEXT: vpmuludq %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xca] 4727 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 4728 ; X86-NEXT: retl ## encoding: [0xc3] 4729 ; 4730 ; X64-LABEL: test_mask_mul_epu32_rmbk: 4731 ; X64: ## %bb.0: 4732 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 4733 ; X64-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f] 4734 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 4735 ; X64-NEXT: retq ## encoding: [0xc3] 4736 %q = load i64, i64* %ptr_b 4737 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 4738 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 4739 %b = bitcast <8 x i64> %b64 to <16 x i32> 4740 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 4741 ret < 8 x i64> %res 4742 } 4743 4744 define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) { 4745 ; X86-LABEL: test_mask_mul_epu32_rmbkz: 4746 ; X86: ## %bb.0: 4747 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 4748 ; X86-NEXT: vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 4749 ; X86-NEXT: ## xmm1 = mem[0],zero 4750 ; X86-NEXT: vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9] 4751 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] 4752 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4753 ; X86-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1] 4754 ; X86-NEXT: retl ## encoding: [0xc3] 4755 ; 4756 ; X64-LABEL: test_mask_mul_epu32_rmbkz: 4757 ; X64: ## %bb.0: 4758 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 4759 ; X64-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07] 4760 ; X64-NEXT: retq ## encoding: [0xc3] 4761 %q = load i64, i64* %ptr_b 4762 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 4763 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 4764 %b = bitcast <8 x i64> %b64 to <16 x i32> 4765 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 4766 ret < 8 x i64> %res 4767 } 4768 4769 declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8) 4770 4771 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) { 4772 ; X86-LABEL: test_mask_vextractf32x4: 4773 ; X86: ## %bb.0: 4774 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 4775 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4776 ; X86-NEXT: vextractf32x4 $2, %zmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x19,0xc8,0x02] 4777 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 4778 ; X86-NEXT: retl ## encoding: [0xc3] 4779 ; 4780 ; X64-LABEL: test_mask_vextractf32x4: 4781 ; X64: ## %bb.0: 4782 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4783 ; X64-NEXT: vextractf32x4 $2, %zmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x19,0xc8,0x02] 4784 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 4785 ; X64-NEXT: retq ## encoding: [0xc3] 4786 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask) 4787 ret <4 x float> %res 4788 } 4789 4790 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, <4 x float>, i8) 4791 4792 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) { 4793 ; X86-LABEL: test_mask_vextracti64x4: 4794 ; X86: ## %bb.0: 4795 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 4796 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4797 ; X86-NEXT: vextracti64x4 $1, %zmm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3b,0xc8,0x01] 4798 ; X86-NEXT: retl ## encoding: [0xc3] 4799 ; 4800 ; X64-LABEL: test_mask_vextracti64x4: 4801 ; X64: ## %bb.0: 4802 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4803 ; X64-NEXT: vextracti64x4 $1, %zmm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3b,0xc8,0x01] 4804 ; X64-NEXT: retq ## encoding: [0xc3] 4805 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 1, <4 x i64> %b, i8 %mask) 4806 ret <4 x i64> %res 4807 } 4808 4809 declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i64>, i8) 4810 4811 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) { 4812 ; X86-LABEL: test_maskz_vextracti32x4: 4813 ; X86: ## %bb.0: 4814 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 4815 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4816 ; X86-NEXT: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x39,0xc0,0x02] 4817 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 4818 ; X86-NEXT: retl ## encoding: [0xc3] 4819 ; 4820 ; X64-LABEL: test_maskz_vextracti32x4: 4821 ; X64: ## %bb.0: 4822 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4823 ; X64-NEXT: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x39,0xc0,0x02] 4824 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 4825 ; X64-NEXT: retq ## encoding: [0xc3] 4826 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask) 4827 ret <4 x i32> %res 4828 } 4829 4830 declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i32, <4 x i32>, i8) 4831 4832 define <4 x double> @test_vextractf64x4(<8 x double> %a) { 4833 ; CHECK-LABEL: test_vextractf64x4: 4834 ; CHECK: ## %bb.0: 4835 ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1b,0xc0,0x01] 4836 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 4837 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i32 1, <4 x double> zeroinitializer, i8 -1) 4838 ret <4 x double> %res 4839 } 4840 4841 declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8) 4842 4843 declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i16) 4844 4845 define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i16 %x4) { 4846 ; X86-LABEL: test_int_x86_avx512_mask_insertf32x4_512: 4847 ; X86: ## %bb.0: 4848 ; X86-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0x7d,0x48,0x18,0xd9,0x01] 4849 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4850 ; X86-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x18,0xd1,0x01] 4851 ; X86-NEXT: vaddps %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xd3] 4852 ; X86-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x18,0xc1,0x01] 4853 ; X86-NEXT: vaddps %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc2] 4854 ; X86-NEXT: retl ## encoding: [0xc3] 4855 ; 4856 ; X64-LABEL: test_int_x86_avx512_mask_insertf32x4_512: 4857 ; X64: ## %bb.0: 4858 ; X64-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0x7d,0x48,0x18,0xd9,0x01] 4859 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4860 ; X64-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x18,0xd1,0x01] 4861 ; X64-NEXT: vaddps %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xd3] 4862 ; X64-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x18,0xc1,0x01] 4863 ; X64-NEXT: vaddps %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc2] 4864 ; X64-NEXT: retq ## encoding: [0xc3] 4865 %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i16 %x4) 4866 %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i16 -1) 4867 %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4) 4868 %res3 = fadd <16 x float> %res, %res1 4869 %res4 = fadd <16 x float> %res2, %res3 4870 ret <16 x float> %res4 4871 } 4872 4873 declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i16) 4874 4875 define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i16 %x4) { 4876 ; X86-LABEL: test_int_x86_avx512_mask_inserti32x4_512: 4877 ; X86: ## %bb.0: 4878 ; X86-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0x7d,0x48,0x38,0xd9,0x01] 4879 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4880 ; X86-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x38,0xd1,0x01] 4881 ; X86-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x38,0xc1,0x01] 4882 ; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 4883 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 4884 ; X86-NEXT: retl ## encoding: [0xc3] 4885 ; 4886 ; X64-LABEL: test_int_x86_avx512_mask_inserti32x4_512: 4887 ; X64: ## %bb.0: 4888 ; X64-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0x7d,0x48,0x38,0xd9,0x01] 4889 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4890 ; X64-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x38,0xd1,0x01] 4891 ; X64-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x38,0xc1,0x01] 4892 ; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 4893 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 4894 ; X64-NEXT: retq ## encoding: [0xc3] 4895 %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4) 4896 %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1) 4897 %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4) 4898 %res3 = add <16 x i32> %res, %res1 4899 %res4 = add <16 x i32> %res2, %res3 4900 ret <16 x i32> %res4 4901 } 4902 4903 declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8) 4904 4905 define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) { 4906 ; X86-LABEL: test_int_x86_avx512_mask_insertf64x4_512: 4907 ; X86: ## %bb.0: 4908 ; X86-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd9,0x01] 4909 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 4910 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4911 ; X86-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xd1,0x01] 4912 ; X86-NEXT: vaddpd %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xd3] 4913 ; X86-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc1,0x01] 4914 ; X86-NEXT: vaddpd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc2] 4915 ; X86-NEXT: retl ## encoding: [0xc3] 4916 ; 4917 ; X64-LABEL: test_int_x86_avx512_mask_insertf64x4_512: 4918 ; X64: ## %bb.0: 4919 ; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd9,0x01] 4920 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4921 ; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xd1,0x01] 4922 ; X64-NEXT: vaddpd %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xd3] 4923 ; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc1,0x01] 4924 ; X64-NEXT: vaddpd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc2] 4925 ; X64-NEXT: retq ## encoding: [0xc3] 4926 %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4) 4927 %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1) 4928 %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4) 4929 %res3 = fadd <8 x double> %res, %res1 4930 %res4 = fadd <8 x double> %res2, %res3 4931 ret <8 x double> %res4 4932 } 4933 4934 declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8) 4935 4936 define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) { 4937 ; X86-LABEL: test_int_x86_avx512_mask_inserti64x4_512: 4938 ; X86: ## %bb.0: 4939 ; X86-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd9,0x01] 4940 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 4941 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 4942 ; X86-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xd1,0x01] 4943 ; X86-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc1,0x01] 4944 ; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] 4945 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 4946 ; X86-NEXT: retl ## encoding: [0xc3] 4947 ; 4948 ; X64-LABEL: test_int_x86_avx512_mask_inserti64x4_512: 4949 ; X64: ## %bb.0: 4950 ; X64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd9,0x01] 4951 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4952 ; X64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xd1,0x01] 4953 ; X64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc1,0x01] 4954 ; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] 4955 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 4956 ; X64-NEXT: retq ## encoding: [0xc3] 4957 %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4) 4958 %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1) 4959 %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4) 4960 %res3 = add <8 x i64> %res, %res1 4961 %res4 = add <8 x i64> %res2, %res3 4962 ret <8 x i64> %res4 4963 } 4964 4965 define <8 x i64> @test_x86_avx512_movntdqa(i8* %a0) { 4966 ; X86-LABEL: test_x86_avx512_movntdqa: 4967 ; X86: ## %bb.0: 4968 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 4969 ; X86-NEXT: vmovntdqa (%eax), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x00] 4970 ; X86-NEXT: retl ## encoding: [0xc3] 4971 ; 4972 ; X64-LABEL: test_x86_avx512_movntdqa: 4973 ; X64: ## %bb.0: 4974 ; X64-NEXT: vmovntdqa (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x07] 4975 ; X64-NEXT: retq ## encoding: [0xc3] 4976 %res = call <8 x i64> @llvm.x86.avx512.movntdqa(i8* %a0) 4977 ret <8 x i64> %res 4978 } 4979 4980 declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*) nounwind readonly 4981 4982 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) { 4983 ; CHECK-LABEL: test_cmp_d_512: 4984 ; CHECK: ## %bb.0: 4985 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1] 4986 ; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x66,0xc8] 4987 ; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xd1,0x02] 4988 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xd9,0x04] 4989 ; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xe1,0x05] 4990 ; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xe9] 4991 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 4992 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] 4993 ; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 4994 ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 4995 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 4996 ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 4997 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 4998 ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 4999 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5000 ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 5001 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5002 ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 5003 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5004 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9] 5005 ; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80] 5006 ; CHECK-NEXT: ## xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 5007 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5008 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 5009 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1) 5010 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5011 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1) 5012 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5013 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1) 5014 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5015 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1) 5016 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5017 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1) 5018 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5019 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1) 5020 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5021 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1) 5022 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5023 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1) 5024 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5025 ret <8 x i16> %vec7 5026 } 5027 5028 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 5029 ; X86-LABEL: test_mask_cmp_d_512: 5030 ; X86: ## %bb.0: 5031 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 5032 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 5033 ; X86-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc1] 5034 ; X86-NEXT: vpcmpgtd %zmm0, %zmm1, %k2 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x66,0xd0] 5035 ; X86-NEXT: vpcmpled %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xd9,0x02] 5036 ; X86-NEXT: vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04] 5037 ; X86-NEXT: vpcmpnltd %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe9,0x05] 5038 ; X86-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x66,0xc9] 5039 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 5040 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] 5041 ; X86-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x00] 5042 ; X86-NEXT: kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca] 5043 ; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x01] 5044 ; X86-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] 5045 ; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x02] 5046 ; X86-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] 5047 ; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 5048 ; X86-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] 5049 ; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x05] 5050 ; X86-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9] 5051 ; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x06] 5052 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 5053 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5054 ; X86-NEXT: retl ## encoding: [0xc3] 5055 ; 5056 ; X64-LABEL: test_mask_cmp_d_512: 5057 ; X64: ## %bb.0: 5058 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5059 ; X64-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc1] 5060 ; X64-NEXT: vpcmpgtd %zmm0, %zmm1, %k2 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x66,0xd0] 5061 ; X64-NEXT: vpcmpled %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xd9,0x02] 5062 ; X64-NEXT: vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04] 5063 ; X64-NEXT: vpcmpnltd %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe9,0x05] 5064 ; X64-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x66,0xc9] 5065 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 5066 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] 5067 ; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 5068 ; X64-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 5069 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5070 ; X64-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 5071 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5072 ; X64-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 5073 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5074 ; X64-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 5075 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5076 ; X64-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 5077 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5078 ; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 5079 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5080 ; X64-NEXT: retq ## encoding: [0xc3] 5081 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask) 5082 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5083 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask) 5084 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5085 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask) 5086 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5087 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask) 5088 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5089 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask) 5090 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5091 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask) 5092 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5093 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask) 5094 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5095 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask) 5096 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5097 ret <8 x i16> %vec7 5098 } 5099 5100 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone 5101 5102 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) { 5103 ; CHECK-LABEL: test_ucmp_d_512: 5104 ; CHECK: ## %bb.0: 5105 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1] 5106 ; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xc9,0x01] 5107 ; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xd1,0x02] 5108 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xd9,0x04] 5109 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xe1,0x05] 5110 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xe9,0x06] 5111 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 5112 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] 5113 ; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 5114 ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 5115 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5116 ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 5117 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5118 ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 5119 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5120 ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 5121 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5122 ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 5123 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5124 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9] 5125 ; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80] 5126 ; CHECK-NEXT: ## xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 5127 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5128 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 5129 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1) 5130 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5131 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1) 5132 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5133 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1) 5134 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5135 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1) 5136 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5137 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1) 5138 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5139 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1) 5140 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5141 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1) 5142 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5143 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1) 5144 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5145 ret <8 x i16> %vec7 5146 } 5147 5148 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 5149 ; X86-LABEL: test_mask_ucmp_d_512: 5150 ; X86: ## %bb.0: 5151 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 5152 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 5153 ; X86-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc1] 5154 ; X86-NEXT: vpcmpltud %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xd1,0x01] 5155 ; X86-NEXT: vpcmpleud %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xd9,0x02] 5156 ; X86-NEXT: vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04] 5157 ; X86-NEXT: vpcmpnltud %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xe9,0x05] 5158 ; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xc9,0x06] 5159 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 5160 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] 5161 ; X86-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x00] 5162 ; X86-NEXT: kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca] 5163 ; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x01] 5164 ; X86-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] 5165 ; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x02] 5166 ; X86-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] 5167 ; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 5168 ; X86-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] 5169 ; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x05] 5170 ; X86-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9] 5171 ; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x06] 5172 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 5173 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5174 ; X86-NEXT: retl ## encoding: [0xc3] 5175 ; 5176 ; X64-LABEL: test_mask_ucmp_d_512: 5177 ; X64: ## %bb.0: 5178 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5179 ; X64-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc1] 5180 ; X64-NEXT: vpcmpltud %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xd1,0x01] 5181 ; X64-NEXT: vpcmpleud %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xd9,0x02] 5182 ; X64-NEXT: vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04] 5183 ; X64-NEXT: vpcmpnltud %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xe9,0x05] 5184 ; X64-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xc9,0x06] 5185 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 5186 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] 5187 ; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 5188 ; X64-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 5189 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5190 ; X64-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 5191 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5192 ; X64-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 5193 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5194 ; X64-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 5195 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5196 ; X64-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 5197 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5198 ; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 5199 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5200 ; X64-NEXT: retq ## encoding: [0xc3] 5201 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask) 5202 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5203 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask) 5204 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5205 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask) 5206 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5207 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask) 5208 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5209 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask) 5210 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5211 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask) 5212 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5213 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask) 5214 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5215 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask) 5216 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5217 ret <8 x i16> %vec7 5218 } 5219 5220 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone 5221 5222 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) { 5223 ; CHECK-LABEL: test_cmp_q_512: 5224 ; CHECK: ## %bb.0: 5225 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1] 5226 ; CHECK-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc8] 5227 ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd1,0x02] 5228 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd9,0x04] 5229 ; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xe1,0x05] 5230 ; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xe9] 5231 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 5232 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] 5233 ; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 5234 ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 5235 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5236 ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 5237 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5238 ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 5239 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5240 ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 5241 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5242 ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 5243 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5244 ; CHECK-NEXT: movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00] 5245 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 5246 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5247 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 5248 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1) 5249 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 5250 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1) 5251 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 5252 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1) 5253 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 5254 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1) 5255 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 5256 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1) 5257 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 5258 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1) 5259 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 5260 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1) 5261 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 5262 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1) 5263 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 5264 ret <8 x i8> %vec7 5265 } 5266 5267 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 5268 ; X86-LABEL: test_mask_cmp_q_512: 5269 ; X86: ## %bb.0: 5270 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04] 5271 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 5272 ; X86-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1] 5273 ; X86-NEXT: vpcmpgtq %zmm0, %zmm1, %k2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x37,0xd0] 5274 ; X86-NEXT: vpcmpleq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x02] 5275 ; X86-NEXT: vpcmpneqq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x04] 5276 ; X86-NEXT: vpcmpnltq %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe9,0x05] 5277 ; X86-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0xc9] 5278 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 5279 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] 5280 ; X86-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x00] 5281 ; X86-NEXT: kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca] 5282 ; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x01] 5283 ; X86-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] 5284 ; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x02] 5285 ; X86-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] 5286 ; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 5287 ; X86-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] 5288 ; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x05] 5289 ; X86-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9] 5290 ; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x06] 5291 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 5292 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5293 ; X86-NEXT: retl ## encoding: [0xc3] 5294 ; 5295 ; X64-LABEL: test_mask_cmp_q_512: 5296 ; X64: ## %bb.0: 5297 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5298 ; X64-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1] 5299 ; X64-NEXT: vpcmpgtq %zmm0, %zmm1, %k2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x37,0xd0] 5300 ; X64-NEXT: vpcmpleq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x02] 5301 ; X64-NEXT: vpcmpneqq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x04] 5302 ; X64-NEXT: vpcmpnltq %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe9,0x05] 5303 ; X64-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0xc9] 5304 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 5305 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] 5306 ; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 5307 ; X64-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 5308 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5309 ; X64-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 5310 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5311 ; X64-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 5312 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5313 ; X64-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 5314 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5315 ; X64-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 5316 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5317 ; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 5318 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5319 ; X64-NEXT: retq ## encoding: [0xc3] 5320 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask) 5321 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 5322 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask) 5323 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 5324 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask) 5325 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 5326 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask) 5327 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 5328 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask) 5329 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 5330 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask) 5331 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 5332 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask) 5333 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 5334 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask) 5335 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 5336 ret <8 x i8> %vec7 5337 } 5338 5339 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone 5340 5341 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) { 5342 ; CHECK-LABEL: test_ucmp_q_512: 5343 ; CHECK: ## %bb.0: 5344 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1] 5345 ; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x01] 5346 ; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xd1,0x02] 5347 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd9,0x04] 5348 ; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xe1,0x05] 5349 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xe9,0x06] 5350 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 5351 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] 5352 ; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 5353 ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 5354 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5355 ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 5356 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5357 ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 5358 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5359 ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 5360 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5361 ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 5362 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5363 ; CHECK-NEXT: movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00] 5364 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 5365 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5366 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 5367 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1) 5368 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 5369 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1) 5370 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 5371 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1) 5372 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 5373 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1) 5374 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 5375 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1) 5376 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 5377 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1) 5378 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 5379 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1) 5380 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 5381 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1) 5382 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 5383 ret <8 x i8> %vec7 5384 } 5385 5386 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 5387 ; X86-LABEL: test_mask_ucmp_q_512: 5388 ; X86: ## %bb.0: 5389 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04] 5390 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 5391 ; X86-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1] 5392 ; X86-NEXT: vpcmpltuq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd1,0x01] 5393 ; X86-NEXT: vpcmpleuq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd9,0x02] 5394 ; X86-NEXT: vpcmpneqq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x04] 5395 ; X86-NEXT: vpcmpnltuq %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xe9,0x05] 5396 ; X86-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc9,0x06] 5397 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 5398 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] 5399 ; X86-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x00] 5400 ; X86-NEXT: kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca] 5401 ; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x01] 5402 ; X86-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] 5403 ; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x02] 5404 ; X86-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] 5405 ; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 5406 ; X86-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] 5407 ; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x05] 5408 ; X86-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9] 5409 ; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x06] 5410 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 5411 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5412 ; X86-NEXT: retl ## encoding: [0xc3] 5413 ; 5414 ; X64-LABEL: test_mask_ucmp_q_512: 5415 ; X64: ## %bb.0: 5416 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5417 ; X64-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1] 5418 ; X64-NEXT: vpcmpltuq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd1,0x01] 5419 ; X64-NEXT: vpcmpleuq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd9,0x02] 5420 ; X64-NEXT: vpcmpneqq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x04] 5421 ; X64-NEXT: vpcmpnltuq %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xe9,0x05] 5422 ; X64-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc9,0x06] 5423 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 5424 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] 5425 ; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 5426 ; X64-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 5427 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5428 ; X64-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 5429 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5430 ; X64-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 5431 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5432 ; X64-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 5433 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5434 ; X64-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 5435 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5436 ; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 5437 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5438 ; X64-NEXT: retq ## encoding: [0xc3] 5439 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask) 5440 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 5441 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask) 5442 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 5443 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask) 5444 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 5445 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask) 5446 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 5447 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask) 5448 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 5449 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask) 5450 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 5451 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask) 5452 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 5453 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask) 5454 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 5455 ret <8 x i8> %vec7 5456 } 5457 5458 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone 5459 5460 declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float>, <16 x float>, i16) 5461 5462 define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512(<4 x float> %x0, <16 x float> %x2, i16 %mask) { 5463 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512: 5464 ; X86: ## %bb.0: 5465 ; X86-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0 5466 ; X86-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01] 5467 ; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc0,0x01] 5468 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 5469 ; X86-NEXT: vmovaps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0xc8] 5470 ; X86-NEXT: vmovaps %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0xd0] 5471 ; X86-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1] 5472 ; X86-NEXT: vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0] 5473 ; X86-NEXT: retl ## encoding: [0xc3] 5474 ; 5475 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512: 5476 ; X64: ## %bb.0: 5477 ; X64-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0 5478 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01] 5479 ; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc0,0x01] 5480 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5481 ; X64-NEXT: vmovaps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0xc8] 5482 ; X64-NEXT: vmovaps %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0xd0] 5483 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1] 5484 ; X64-NEXT: vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0] 5485 ; X64-NEXT: retq ## encoding: [0xc3] 5486 5487 %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 -1) 5488 %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 %mask) 5489 %res3 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %mask) 5490 %res4 = fadd <16 x float> %res1, %res2 5491 %res5 = fadd <16 x float> %res3, %res4 5492 ret <16 x float> %res5 5493 } 5494 5495 define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512_load(<4 x float>* %x0ptr, <16 x float> %x2, i16 %mask) { 5496 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512_load: 5497 ; X86: ## %bb.0: 5498 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 5499 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 5500 ; X86-NEXT: vbroadcastf32x4 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x1a,0x00] 5501 ; X86-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 5502 ; X86-NEXT: retl ## encoding: [0xc3] 5503 ; 5504 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512_load: 5505 ; X64: ## %bb.0: 5506 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5507 ; X64-NEXT: vbroadcastf32x4 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x1a,0x07] 5508 ; X64-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 5509 ; X64-NEXT: retq ## encoding: [0xc3] 5510 %x0 = load <4 x float>, <4 x float>* %x0ptr 5511 %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 %mask) 5512 ret <16 x float> %res 5513 } 5514 5515 declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double>, <8 x double>, i8) 5516 5517 define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512(<4 x double> %x0, <8 x double> %x2, i8 %mask) { 5518 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512: 5519 ; X86: ## %bb.0: 5520 ; X86-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 5521 ; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd0,0x01] 5522 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 5523 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 5524 ; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xc8,0x01] 5525 ; X86-NEXT: vaddpd %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc9] 5526 ; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc0,0x01] 5527 ; X86-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1] 5528 ; X86-NEXT: retl ## encoding: [0xc3] 5529 ; 5530 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512: 5531 ; X64: ## %bb.0: 5532 ; X64-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 5533 ; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd0,0x01] 5534 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5535 ; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xc8,0x01] 5536 ; X64-NEXT: vaddpd %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc9] 5537 ; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc0,0x01] 5538 ; X64-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1] 5539 ; X64-NEXT: retq ## encoding: [0xc3] 5540 5541 %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 -1) 5542 %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 %mask) 5543 %res3 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> zeroinitializer, i8 %mask) 5544 %res4 = fadd <8 x double> %res1, %res2 5545 %res5 = fadd <8 x double> %res3, %res4 5546 ret <8 x double> %res5 5547 } 5548 5549 define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512_load(<4 x double>* %x0ptr, <8 x double> %x2, i8 %mask) { 5550 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512_load: 5551 ; X86: ## %bb.0: 5552 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 5553 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 5554 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 5555 ; X86-NEXT: vbroadcastf64x4 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x1b,0x00] 5556 ; X86-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] 5557 ; X86-NEXT: retl ## encoding: [0xc3] 5558 ; 5559 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512_load: 5560 ; X64: ## %bb.0: 5561 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5562 ; X64-NEXT: vbroadcastf64x4 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x1b,0x07] 5563 ; X64-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] 5564 ; X64-NEXT: retq ## encoding: [0xc3] 5565 5566 %x0 = load <4 x double>, <4 x double>* %x0ptr 5567 %res = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 %mask) 5568 ret <8 x double> %res 5569 } 5570 5571 declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32>, <16 x i32>, i16) 5572 5573 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x4_512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask) { 5574 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512: 5575 ; X86: ## %bb.0: 5576 ; X86-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0 5577 ; X86-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01] 5578 ; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xc0,0x01] 5579 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 5580 ; X86-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xc8] 5581 ; X86-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xd0] 5582 ; X86-NEXT: vpaddd %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xca] 5583 ; X86-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1] 5584 ; X86-NEXT: retl ## encoding: [0xc3] 5585 ; 5586 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512: 5587 ; X64: ## %bb.0: 5588 ; X64-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0 5589 ; X64-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01] 5590 ; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xc0,0x01] 5591 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5592 ; X64-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xc8] 5593 ; X64-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xd0] 5594 ; X64-NEXT: vpaddd %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xca] 5595 ; X64-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1] 5596 ; X64-NEXT: retq ## encoding: [0xc3] 5597 5598 %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1) 5599 %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask) 5600 %res3 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask) 5601 %res4 = add <16 x i32> %res1, %res2 5602 %res5 = add <16 x i32> %res3, %res4 5603 ret <16 x i32> %res5 5604 } 5605 5606 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x4_512_load(<4 x i32>* %x0ptr, <16 x i32> %x2, i16 %mask) { 5607 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512_load: 5608 ; X86: ## %bb.0: 5609 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 5610 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 5611 ; X86-NEXT: vbroadcasti32x4 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x5a,0x00] 5612 ; X86-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 5613 ; X86-NEXT: retl ## encoding: [0xc3] 5614 ; 5615 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512_load: 5616 ; X64: ## %bb.0: 5617 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5618 ; X64-NEXT: vbroadcasti32x4 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x5a,0x07] 5619 ; X64-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 5620 ; X64-NEXT: retq ## encoding: [0xc3] 5621 5622 %x0 = load <4 x i32>, <4 x i32>* %x0ptr 5623 %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask) 5624 ret <16 x i32> %res 5625 } 5626 5627 declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64>, <8 x i64>, i8) 5628 5629 define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask) { 5630 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512: 5631 ; X86: ## %bb.0: 5632 ; X86-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 5633 ; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd0,0x01] 5634 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 5635 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 5636 ; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xc8,0x01] 5637 ; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc0,0x01] 5638 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 5639 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 5640 ; X86-NEXT: retl ## encoding: [0xc3] 5641 ; 5642 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512: 5643 ; X64: ## %bb.0: 5644 ; X64-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 5645 ; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd0,0x01] 5646 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5647 ; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xc8,0x01] 5648 ; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc0,0x01] 5649 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 5650 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 5651 ; X64-NEXT: retq ## encoding: [0xc3] 5652 5653 %res1 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 -1) 5654 %res2 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask) 5655 %res3 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask) 5656 %res4 = add <8 x i64> %res1, %res2 5657 %res5 = add <8 x i64> %res3, %res4 5658 ret <8 x i64> %res5 5659 } 5660 5661 define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512_load(<4 x i64>* %x0ptr, <8 x i64> %x2, i8 %mask) { 5662 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512_load: 5663 ; X86: ## %bb.0: 5664 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 5665 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 5666 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 5667 ; X86-NEXT: vbroadcasti64x4 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x5b,0x00] 5668 ; X86-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] 5669 ; X86-NEXT: retl ## encoding: [0xc3] 5670 ; 5671 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512_load: 5672 ; X64: ## %bb.0: 5673 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5674 ; X64-NEXT: vbroadcasti64x4 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x5b,0x07] 5675 ; X64-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] 5676 ; X64-NEXT: retq ## encoding: [0xc3] 5677 5678 %x0 = load <4 x i64>, <4 x i64>* %x0ptr 5679 %res = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask) 5680 ret <8 x i64> %res 5681 } 5682 5683 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16) 5684 5685 define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { 5686 ; X86-LABEL: test_int_x86_avx512_mask_pabs_d_512: 5687 ; X86: ## %bb.0: 5688 ; X86-NEXT: vpabsd %zmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xd0] 5689 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 5690 ; X86-NEXT: vpabsd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x1e,0xc8] 5691 ; X86-NEXT: vpaddd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc2] 5692 ; X86-NEXT: retl ## encoding: [0xc3] 5693 ; 5694 ; X64-LABEL: test_int_x86_avx512_mask_pabs_d_512: 5695 ; X64: ## %bb.0: 5696 ; X64-NEXT: vpabsd %zmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xd0] 5697 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5698 ; X64-NEXT: vpabsd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x1e,0xc8] 5699 ; X64-NEXT: vpaddd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc2] 5700 ; X64-NEXT: retq ## encoding: [0xc3] 5701 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) 5702 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1) 5703 %res2 = add <16 x i32> %res, %res1 5704 ret <16 x i32> %res2 5705 } 5706 5707 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8) 5708 5709 define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { 5710 ; X86-LABEL: test_int_x86_avx512_mask_pabs_q_512: 5711 ; X86: ## %bb.0: 5712 ; X86-NEXT: vpabsq %zmm0, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xd0] 5713 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 5714 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 5715 ; X86-NEXT: vpabsq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x1f,0xc8] 5716 ; X86-NEXT: vpaddq %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc2] 5717 ; X86-NEXT: retl ## encoding: [0xc3] 5718 ; 5719 ; X64-LABEL: test_int_x86_avx512_mask_pabs_q_512: 5720 ; X64: ## %bb.0: 5721 ; X64-NEXT: vpabsq %zmm0, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xd0] 5722 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5723 ; X64-NEXT: vpabsq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x1f,0xc8] 5724 ; X64-NEXT: vpaddq %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc2] 5725 ; X64-NEXT: retq ## encoding: [0xc3] 5726 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) 5727 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1) 5728 %res2 = add <8 x i64> %res, %res1 5729 ret <8 x i64> %res2 5730 } 5731 5732 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1, i8 %m) { 5733 ; X86-LABEL: test_vptestmq: 5734 ; X86: ## %bb.0: 5735 ; X86-NEXT: vptestmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1] 5736 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 5737 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 5738 ; X86-NEXT: andb %cl, %al ## encoding: [0x20,0xc8] 5739 ; X86-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] 5740 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5741 ; X86-NEXT: retl ## encoding: [0xc3] 5742 ; 5743 ; X64-LABEL: test_vptestmq: 5744 ; X64: ## %bb.0: 5745 ; X64-NEXT: vptestmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1] 5746 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 5747 ; X64-NEXT: andb %al, %dil ## encoding: [0x40,0x20,0xc7] 5748 ; X64-NEXT: addb %dil, %al ## encoding: [0x40,0x00,0xf8] 5749 ; X64-NEXT: ## kill: def $al killed $al killed $eax 5750 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5751 ; X64-NEXT: retq ## encoding: [0xc3] 5752 %res = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1) 5753 %res1 = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 %m) 5754 %res2 = add i8 %res1, %res 5755 ret i8 %res2 5756 } 5757 declare i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64>, <8 x i64>, i8) 5758 5759 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1, i16 %m) { 5760 ; X86-LABEL: test_vptestmd: 5761 ; X86: ## %bb.0: 5762 ; X86-NEXT: vptestmd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1] 5763 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 5764 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04] 5765 ; X86-NEXT: andw %cx, %ax ## encoding: [0x66,0x21,0xc8] 5766 ; X86-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] 5767 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax 5768 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5769 ; X86-NEXT: retl ## encoding: [0xc3] 5770 ; 5771 ; X64-LABEL: test_vptestmd: 5772 ; X64: ## %bb.0: 5773 ; X64-NEXT: vptestmd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1] 5774 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 5775 ; X64-NEXT: andl %eax, %edi ## encoding: [0x21,0xc7] 5776 ; X64-NEXT: addl %edi, %eax ## encoding: [0x01,0xf8] 5777 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax 5778 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5779 ; X64-NEXT: retq ## encoding: [0xc3] 5780 %res = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1) 5781 %res1 = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 %m) 5782 %res2 = add i16 %res1, %res 5783 ret i16 %res2 5784 } 5785 declare i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32>, <16 x i32>, i16) 5786 5787 declare i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32>, <16 x i32>, i16 %x2) 5788 5789 define i16@test_int_x86_avx512_ptestnm_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { 5790 ; X86-LABEL: test_int_x86_avx512_ptestnm_d_512: 5791 ; X86: ## %bb.0: 5792 ; X86-NEXT: vptestnmd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc1] 5793 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 5794 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04] 5795 ; X86-NEXT: andw %cx, %ax ## encoding: [0x66,0x21,0xc8] 5796 ; X86-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] 5797 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax 5798 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5799 ; X86-NEXT: retl ## encoding: [0xc3] 5800 ; 5801 ; X64-LABEL: test_int_x86_avx512_ptestnm_d_512: 5802 ; X64: ## %bb.0: 5803 ; X64-NEXT: vptestnmd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc1] 5804 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 5805 ; X64-NEXT: andl %eax, %edi ## encoding: [0x21,0xc7] 5806 ; X64-NEXT: addl %edi, %eax ## encoding: [0x01,0xf8] 5807 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax 5808 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5809 ; X64-NEXT: retq ## encoding: [0xc3] 5810 %res = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) 5811 %res1 = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16-1) 5812 %res2 = add i16 %res, %res1 5813 ret i16 %res2 5814 } 5815 5816 declare i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64>, <8 x i64>, i8 %x2) 5817 5818 define i8@test_int_x86_avx512_ptestnm_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { 5819 ; X86-LABEL: test_int_x86_avx512_ptestnm_q_512: 5820 ; X86: ## %bb.0: 5821 ; X86-NEXT: vptestnmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc1] 5822 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 5823 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 5824 ; X86-NEXT: andb %cl, %al ## encoding: [0x20,0xc8] 5825 ; X86-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] 5826 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5827 ; X86-NEXT: retl ## encoding: [0xc3] 5828 ; 5829 ; X64-LABEL: test_int_x86_avx512_ptestnm_q_512: 5830 ; X64: ## %bb.0: 5831 ; X64-NEXT: vptestnmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc1] 5832 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 5833 ; X64-NEXT: andb %al, %dil ## encoding: [0x40,0x20,0xc7] 5834 ; X64-NEXT: addb %dil, %al ## encoding: [0x40,0x00,0xf8] 5835 ; X64-NEXT: ## kill: def $al killed $al killed $eax 5836 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5837 ; X64-NEXT: retq ## encoding: [0xc3] 5838 %res = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) 5839 %res1 = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8-1) 5840 %res2 = add i8 %res, %res1 5841 ret i8 %res2 5842 } 5843 5844 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone 5845 define i16 @test_kand(i16 %a0, i16 %a1) { 5846 ; X86-LABEL: test_kand: 5847 ; X86: ## %bb.0: 5848 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04] 5849 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x23,0x44,0x24,0x08] 5850 ; X86-NEXT: andl $8, %eax ## encoding: [0x83,0xe0,0x08] 5851 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax 5852 ; X86-NEXT: retl ## encoding: [0xc3] 5853 ; 5854 ; X64-LABEL: test_kand: 5855 ; X64: ## %bb.0: 5856 ; X64-NEXT: andl %esi, %edi ## encoding: [0x21,0xf7] 5857 ; X64-NEXT: andl $8, %edi ## encoding: [0x83,0xe7,0x08] 5858 ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] 5859 ; X64-NEXT: retq ## encoding: [0xc3] 5860 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8) 5861 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1) 5862 ret i16 %t2 5863 } 5864 5865 declare i16 @llvm.x86.avx512.kandn.w(i16, i16) nounwind readnone 5866 define i16 @test_kandn(i16 %a0, i16 %a1) { 5867 ; X86-LABEL: test_kandn: 5868 ; X86: ## %bb.0: 5869 ; X86-NEXT: movl $65527, %eax ## encoding: [0xb8,0xf7,0xff,0x00,0x00] 5870 ; X86-NEXT: ## imm = 0xFFF7 5871 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ## encoding: [0x0b,0x44,0x24,0x04] 5872 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x23,0x44,0x24,0x08] 5873 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax 5874 ; X86-NEXT: retl ## encoding: [0xc3] 5875 ; 5876 ; X64-LABEL: test_kandn: 5877 ; X64: ## %bb.0: 5878 ; X64-NEXT: orl $-9, %edi ## encoding: [0x83,0xcf,0xf7] 5879 ; X64-NEXT: andl %esi, %edi ## encoding: [0x21,0xf7] 5880 ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] 5881 ; X64-NEXT: retq ## encoding: [0xc3] 5882 %t1 = call i16 @llvm.x86.avx512.kandn.w(i16 %a0, i16 8) 5883 %t2 = call i16 @llvm.x86.avx512.kandn.w(i16 %t1, i16 %a1) 5884 ret i16 %t2 5885 } 5886 5887 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone 5888 define i16 @test_knot(i16 %a0) { 5889 ; X86-LABEL: test_knot: 5890 ; X86: ## %bb.0: 5891 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 5892 ; X86-NEXT: notl %eax ## encoding: [0xf7,0xd0] 5893 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax 5894 ; X86-NEXT: retl ## encoding: [0xc3] 5895 ; 5896 ; X64-LABEL: test_knot: 5897 ; X64: ## %bb.0: 5898 ; X64-NEXT: notl %edi ## encoding: [0xf7,0xd7] 5899 ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] 5900 ; X64-NEXT: retq ## encoding: [0xc3] 5901 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0) 5902 ret i16 %res 5903 } 5904 5905 declare i16 @llvm.x86.avx512.kor.w(i16, i16) nounwind readnone 5906 define i16 @test_kor(i16 %a0, i16 %a1) { 5907 ; X86-LABEL: test_kor: 5908 ; X86: ## %bb.0: 5909 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04] 5910 ; X86-NEXT: orw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x0b,0x44,0x24,0x08] 5911 ; X86-NEXT: orl $8, %eax ## encoding: [0x83,0xc8,0x08] 5912 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax 5913 ; X86-NEXT: retl ## encoding: [0xc3] 5914 ; 5915 ; X64-LABEL: test_kor: 5916 ; X64: ## %bb.0: 5917 ; X64-NEXT: orl %esi, %edi ## encoding: [0x09,0xf7] 5918 ; X64-NEXT: orl $8, %edi ## encoding: [0x83,0xcf,0x08] 5919 ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] 5920 ; X64-NEXT: retq ## encoding: [0xc3] 5921 %t1 = call i16 @llvm.x86.avx512.kor.w(i16 %a0, i16 8) 5922 %t2 = call i16 @llvm.x86.avx512.kor.w(i16 %t1, i16 %a1) 5923 ret i16 %t2 5924 } 5925 5926 declare i16 @llvm.x86.avx512.kxnor.w(i16, i16) nounwind readnone 5927 ; TODO: the two kxnor instructions here a no op and should be elimintaed, 5928 ; probably by FoldConstantArithmetic in SelectionDAG. 5929 define i16 @test_kxnor(i16 %a0, i16 %a1) { 5930 ; X86-LABEL: test_kxnor: 5931 ; X86: ## %bb.0: 5932 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04] 5933 ; X86-NEXT: xorw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x33,0x44,0x24,0x08] 5934 ; X86-NEXT: xorl $8, %eax ## encoding: [0x83,0xf0,0x08] 5935 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax 5936 ; X86-NEXT: retl ## encoding: [0xc3] 5937 ; 5938 ; X64-LABEL: test_kxnor: 5939 ; X64: ## %bb.0: 5940 ; X64-NEXT: xorl %esi, %edi ## encoding: [0x31,0xf7] 5941 ; X64-NEXT: xorl $8, %edi ## encoding: [0x83,0xf7,0x08] 5942 ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] 5943 ; X64-NEXT: retq ## encoding: [0xc3] 5944 %t1 = call i16 @llvm.x86.avx512.kxnor.w(i16 %a0, i16 8) 5945 %t2 = call i16 @llvm.x86.avx512.kxnor.w(i16 %t1, i16 %a1) 5946 ret i16 %t2 5947 } 5948 5949 declare i16 @llvm.x86.avx512.kxor.w(i16, i16) nounwind readnone 5950 define i16 @test_kxor(i16 %a0, i16 %a1) { 5951 ; X86-LABEL: test_kxor: 5952 ; X86: ## %bb.0: 5953 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04] 5954 ; X86-NEXT: xorw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x33,0x44,0x24,0x08] 5955 ; X86-NEXT: xorl $8, %eax ## encoding: [0x83,0xf0,0x08] 5956 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax 5957 ; X86-NEXT: retl ## encoding: [0xc3] 5958 ; 5959 ; X64-LABEL: test_kxor: 5960 ; X64: ## %bb.0: 5961 ; X64-NEXT: xorl %esi, %edi ## encoding: [0x31,0xf7] 5962 ; X64-NEXT: xorl $8, %edi ## encoding: [0x83,0xf7,0x08] 5963 ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] 5964 ; X64-NEXT: retq ## encoding: [0xc3] 5965 %t1 = call i16 @llvm.x86.avx512.kxor.w(i16 %a0, i16 8) 5966 %t2 = call i16 @llvm.x86.avx512.kxor.w(i16 %t1, i16 %a1) 5967 ret i16 %t2 5968 } 5969 5970 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone 5971 define i32 @test_kortestz(<8 x i64> %A, <8 x i64> %B, <8 x i64> %C, <8 x i64> %D) { 5972 ; CHECK-LABEL: test_kortestz: 5973 ; CHECK: ## %bb.0: ## %entry 5974 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc1,0x04] 5975 ; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf3,0x6d,0x48,0x1f,0xcb,0x04] 5976 ; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] 5977 ; CHECK-NEXT: kortestw %k1, %k0 ## encoding: [0xc5,0xf8,0x98,0xc1] 5978 ; CHECK-NEXT: sete %al ## encoding: [0x0f,0x94,0xc0] 5979 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 5980 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 5981 entry: 5982 %0 = bitcast <8 x i64> %A to <16 x i32> 5983 %1 = bitcast <8 x i64> %B to <16 x i32> 5984 %2 = icmp ne <16 x i32> %0, %1 5985 %3 = bitcast <8 x i64> %C to <16 x i32> 5986 %4 = bitcast <8 x i64> %D to <16 x i32> 5987 %5 = icmp ne <16 x i32> %3, %4 5988 %6 = bitcast <16 x i1> %2 to i16 5989 %7 = bitcast <16 x i1> %5 to i16 5990 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %6, i16 %7) 5991 ret i32 %res 5992 } 5993 5994 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone 5995 define i32 @test_kortestc(<8 x i64> %A, <8 x i64> %B, <8 x i64> %C, <8 x i64> %D) { 5996 ; CHECK-LABEL: test_kortestc: 5997 ; CHECK: ## %bb.0: ## %entry 5998 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc1,0x04] 5999 ; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf3,0x6d,0x48,0x1f,0xcb,0x04] 6000 ; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] 6001 ; CHECK-NEXT: kortestw %k1, %k0 ## encoding: [0xc5,0xf8,0x98,0xc1] 6002 ; CHECK-NEXT: sete %al ## encoding: [0x0f,0x94,0xc0] 6003 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 6004 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 6005 entry: 6006 %0 = bitcast <8 x i64> %A to <16 x i32> 6007 %1 = bitcast <8 x i64> %B to <16 x i32> 6008 %2 = icmp ne <16 x i32> %0, %1 6009 %3 = bitcast <8 x i64> %C to <16 x i32> 6010 %4 = bitcast <8 x i64> %D to <16 x i32> 6011 %5 = icmp ne <16 x i32> %3, %4 6012 %6 = bitcast <16 x i1> %2 to i16 6013 %7 = bitcast <16 x i1> %5 to i16 6014 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %6, i16 %7) 6015 ret i32 %res 6016 } 6017 6018 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) { 6019 ; CHECK-LABEL: test_cmpps: 6020 ; CHECK: ## %bb.0: 6021 ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02] 6022 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 6023 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax 6024 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 6025 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 6026 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8) 6027 ret i16 %res 6028 } 6029 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32) 6030 6031 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) { 6032 ; CHECK-LABEL: test_cmppd: 6033 ; CHECK: ## %bb.0: 6034 ; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04] 6035 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 6036 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax 6037 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 6038 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 6039 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4) 6040 ret i8 %res 6041 } 6042 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32) 6043 6044 define <8 x i64> @test_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) { 6045 ; CHECK-LABEL: test_mul_epi32_rr: 6046 ; CHECK: ## %bb.0: 6047 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1] 6048 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 6049 %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b) 6050 ret < 8 x i64> %res 6051 } 6052 6053 define <8 x i64> @test_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) { 6054 ; X86-LABEL: test_mul_epi32_rrk: 6055 ; X86: ## %bb.0: 6056 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 6057 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 6058 ; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1] 6059 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 6060 ; X86-NEXT: retl ## encoding: [0xc3] 6061 ; 6062 ; X64-LABEL: test_mul_epi32_rrk: 6063 ; X64: ## %bb.0: 6064 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6065 ; X64-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1] 6066 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 6067 ; X64-NEXT: retq ## encoding: [0xc3] 6068 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b) 6069 %mask.cast = bitcast i8 %mask to <8 x i1> 6070 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru 6071 ret < 8 x i64> %res 6072 } 6073 6074 define <8 x i64> @test_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) { 6075 ; X86-LABEL: test_mul_epi32_rrkz: 6076 ; X86: ## %bb.0: 6077 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 6078 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 6079 ; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1] 6080 ; X86-NEXT: retl ## encoding: [0xc3] 6081 ; 6082 ; X64-LABEL: test_mul_epi32_rrkz: 6083 ; X64: ## %bb.0: 6084 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6085 ; X64-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1] 6086 ; X64-NEXT: retq ## encoding: [0xc3] 6087 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b) 6088 %mask.cast = bitcast i8 %mask to <8 x i1> 6089 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer 6090 ret < 8 x i64> %res 6091 } 6092 6093 define <8 x i64> @test_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 6094 ; X86-LABEL: test_mul_epi32_rm: 6095 ; X86: ## %bb.0: 6096 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 6097 ; X86-NEXT: vpmuldq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x00] 6098 ; X86-NEXT: retl ## encoding: [0xc3] 6099 ; 6100 ; X64-LABEL: test_mul_epi32_rm: 6101 ; X64: ## %bb.0: 6102 ; X64-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07] 6103 ; X64-NEXT: retq ## encoding: [0xc3] 6104 %b = load <16 x i32>, <16 x i32>* %ptr_b 6105 %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b) 6106 ret < 8 x i64> %res 6107 } 6108 6109 define <8 x i64> @test_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 6110 ; X86-LABEL: test_mul_epi32_rmk: 6111 ; X86: ## %bb.0: 6112 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 6113 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 6114 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 6115 ; X86-NEXT: vpmuldq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x08] 6116 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 6117 ; X86-NEXT: retl ## encoding: [0xc3] 6118 ; 6119 ; X64-LABEL: test_mul_epi32_rmk: 6120 ; X64: ## %bb.0: 6121 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6122 ; X64-NEXT: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f] 6123 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 6124 ; X64-NEXT: retq ## encoding: [0xc3] 6125 %b = load <16 x i32>, <16 x i32>* %ptr_b 6126 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b) 6127 %mask.cast = bitcast i8 %mask to <8 x i1> 6128 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru 6129 ret < 8 x i64> %res 6130 } 6131 6132 define <8 x i64> @test_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) { 6133 ; X86-LABEL: test_mul_epi32_rmkz: 6134 ; X86: ## %bb.0: 6135 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 6136 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 6137 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 6138 ; X86-NEXT: vpmuldq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x00] 6139 ; X86-NEXT: retl ## encoding: [0xc3] 6140 ; 6141 ; X64-LABEL: test_mul_epi32_rmkz: 6142 ; X64: ## %bb.0: 6143 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6144 ; X64-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07] 6145 ; X64-NEXT: retq ## encoding: [0xc3] 6146 %b = load <16 x i32>, <16 x i32>* %ptr_b 6147 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b) 6148 %mask.cast = bitcast i8 %mask to <8 x i1> 6149 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer 6150 ret < 8 x i64> %res 6151 } 6152 6153 define <8 x i64> @test_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) { 6154 ; X86-LABEL: test_mul_epi32_rmb: 6155 ; X86: ## %bb.0: 6156 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 6157 ; X86-NEXT: vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 6158 ; X86-NEXT: ## xmm1 = mem[0],zero 6159 ; X86-NEXT: vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9] 6160 ; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1] 6161 ; X86-NEXT: retl ## encoding: [0xc3] 6162 ; 6163 ; X64-LABEL: test_mul_epi32_rmb: 6164 ; X64: ## %bb.0: 6165 ; X64-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07] 6166 ; X64-NEXT: retq ## encoding: [0xc3] 6167 %q = load i64, i64* %ptr_b 6168 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 6169 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 6170 %b = bitcast <8 x i64> %b64 to <16 x i32> 6171 %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b) 6172 ret < 8 x i64> %res 6173 } 6174 6175 define <8 x i64> @test_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 6176 ; X86-LABEL: test_mul_epi32_rmbk: 6177 ; X86: ## %bb.0: 6178 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 6179 ; X86-NEXT: vmovq (%eax), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10] 6180 ; X86-NEXT: ## xmm2 = mem[0],zero 6181 ; X86-NEXT: vpbroadcastq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2] 6182 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] 6183 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 6184 ; X86-NEXT: vpmuldq %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xca] 6185 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 6186 ; X86-NEXT: retl ## encoding: [0xc3] 6187 ; 6188 ; X64-LABEL: test_mul_epi32_rmbk: 6189 ; X64: ## %bb.0: 6190 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6191 ; X64-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f] 6192 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 6193 ; X64-NEXT: retq ## encoding: [0xc3] 6194 %q = load i64, i64* %ptr_b 6195 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 6196 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 6197 %b = bitcast <8 x i64> %b64 to <16 x i32> 6198 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b) 6199 %mask.cast = bitcast i8 %mask to <8 x i1> 6200 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru 6201 ret < 8 x i64> %res 6202 } 6203 6204 define <8 x i64> @test_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) { 6205 ; X86-LABEL: test_mul_epi32_rmbkz: 6206 ; X86: ## %bb.0: 6207 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 6208 ; X86-NEXT: vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 6209 ; X86-NEXT: ## xmm1 = mem[0],zero 6210 ; X86-NEXT: vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9] 6211 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] 6212 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 6213 ; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1] 6214 ; X86-NEXT: retl ## encoding: [0xc3] 6215 ; 6216 ; X64-LABEL: test_mul_epi32_rmbkz: 6217 ; X64: ## %bb.0: 6218 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6219 ; X64-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07] 6220 ; X64-NEXT: retq ## encoding: [0xc3] 6221 %q = load i64, i64* %ptr_b 6222 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 6223 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 6224 %b = bitcast <8 x i64> %b64 to <16 x i32> 6225 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b) 6226 %mask.cast = bitcast i8 %mask to <8 x i1> 6227 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer 6228 ret < 8 x i64> %res 6229 } 6230 6231 declare <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32>, <16 x i32>) 6232 6233 define <8 x i64> @test_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) { 6234 ; CHECK-LABEL: test_mul_epu32_rr: 6235 ; CHECK: ## %bb.0: 6236 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1] 6237 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 6238 %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b) 6239 ret < 8 x i64> %res 6240 } 6241 6242 define <8 x i64> @test_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) { 6243 ; X86-LABEL: test_mul_epu32_rrk: 6244 ; X86: ## %bb.0: 6245 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 6246 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 6247 ; X86-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1] 6248 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 6249 ; X86-NEXT: retl ## encoding: [0xc3] 6250 ; 6251 ; X64-LABEL: test_mul_epu32_rrk: 6252 ; X64: ## %bb.0: 6253 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6254 ; X64-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1] 6255 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 6256 ; X64-NEXT: retq ## encoding: [0xc3] 6257 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b) 6258 %mask.cast = bitcast i8 %mask to <8 x i1> 6259 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru 6260 ret < 8 x i64> %res 6261 } 6262 6263 define <8 x i64> @test_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) { 6264 ; X86-LABEL: test_mul_epu32_rrkz: 6265 ; X86: ## %bb.0: 6266 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 6267 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 6268 ; X86-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1] 6269 ; X86-NEXT: retl ## encoding: [0xc3] 6270 ; 6271 ; X64-LABEL: test_mul_epu32_rrkz: 6272 ; X64: ## %bb.0: 6273 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6274 ; X64-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1] 6275 ; X64-NEXT: retq ## encoding: [0xc3] 6276 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b) 6277 %mask.cast = bitcast i8 %mask to <8 x i1> 6278 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer 6279 ret < 8 x i64> %res 6280 } 6281 6282 define <8 x i64> @test_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 6283 ; X86-LABEL: test_mul_epu32_rm: 6284 ; X86: ## %bb.0: 6285 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 6286 ; X86-NEXT: vpmuludq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x00] 6287 ; X86-NEXT: retl ## encoding: [0xc3] 6288 ; 6289 ; X64-LABEL: test_mul_epu32_rm: 6290 ; X64: ## %bb.0: 6291 ; X64-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07] 6292 ; X64-NEXT: retq ## encoding: [0xc3] 6293 %b = load <16 x i32>, <16 x i32>* %ptr_b 6294 %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b) 6295 ret < 8 x i64> %res 6296 } 6297 6298 define <8 x i64> @test_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 6299 ; X86-LABEL: test_mul_epu32_rmk: 6300 ; X86: ## %bb.0: 6301 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 6302 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 6303 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 6304 ; X86-NEXT: vpmuludq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x08] 6305 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 6306 ; X86-NEXT: retl ## encoding: [0xc3] 6307 ; 6308 ; X64-LABEL: test_mul_epu32_rmk: 6309 ; X64: ## %bb.0: 6310 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6311 ; X64-NEXT: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f] 6312 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 6313 ; X64-NEXT: retq ## encoding: [0xc3] 6314 %b = load <16 x i32>, <16 x i32>* %ptr_b 6315 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b) 6316 %mask.cast = bitcast i8 %mask to <8 x i1> 6317 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru 6318 ret < 8 x i64> %res 6319 } 6320 6321 define <8 x i64> @test_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) { 6322 ; X86-LABEL: test_mul_epu32_rmkz: 6323 ; X86: ## %bb.0: 6324 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 6325 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 6326 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 6327 ; X86-NEXT: vpmuludq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x00] 6328 ; X86-NEXT: retl ## encoding: [0xc3] 6329 ; 6330 ; X64-LABEL: test_mul_epu32_rmkz: 6331 ; X64: ## %bb.0: 6332 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6333 ; X64-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07] 6334 ; X64-NEXT: retq ## encoding: [0xc3] 6335 %b = load <16 x i32>, <16 x i32>* %ptr_b 6336 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b) 6337 %mask.cast = bitcast i8 %mask to <8 x i1> 6338 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer 6339 ret < 8 x i64> %res 6340 } 6341 6342 define <8 x i64> @test_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) { 6343 ; X86-LABEL: test_mul_epu32_rmb: 6344 ; X86: ## %bb.0: 6345 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 6346 ; X86-NEXT: vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 6347 ; X86-NEXT: ## xmm1 = mem[0],zero 6348 ; X86-NEXT: vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9] 6349 ; X86-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1] 6350 ; X86-NEXT: retl ## encoding: [0xc3] 6351 ; 6352 ; X64-LABEL: test_mul_epu32_rmb: 6353 ; X64: ## %bb.0: 6354 ; X64-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07] 6355 ; X64-NEXT: retq ## encoding: [0xc3] 6356 %q = load i64, i64* %ptr_b 6357 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 6358 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 6359 %b = bitcast <8 x i64> %b64 to <16 x i32> 6360 %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b) 6361 ret < 8 x i64> %res 6362 } 6363 6364 define <8 x i64> @test_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 6365 ; X86-LABEL: test_mul_epu32_rmbk: 6366 ; X86: ## %bb.0: 6367 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 6368 ; X86-NEXT: vmovq (%eax), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10] 6369 ; X86-NEXT: ## xmm2 = mem[0],zero 6370 ; X86-NEXT: vpbroadcastq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2] 6371 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] 6372 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 6373 ; X86-NEXT: vpmuludq %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xca] 6374 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 6375 ; X86-NEXT: retl ## encoding: [0xc3] 6376 ; 6377 ; X64-LABEL: test_mul_epu32_rmbk: 6378 ; X64: ## %bb.0: 6379 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6380 ; X64-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f] 6381 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 6382 ; X64-NEXT: retq ## encoding: [0xc3] 6383 %q = load i64, i64* %ptr_b 6384 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 6385 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 6386 %b = bitcast <8 x i64> %b64 to <16 x i32> 6387 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b) 6388 %mask.cast = bitcast i8 %mask to <8 x i1> 6389 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru 6390 ret < 8 x i64> %res 6391 } 6392 6393 define <8 x i64> @test_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) { 6394 ; X86-LABEL: test_mul_epu32_rmbkz: 6395 ; X86: ## %bb.0: 6396 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 6397 ; X86-NEXT: vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 6398 ; X86-NEXT: ## xmm1 = mem[0],zero 6399 ; X86-NEXT: vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9] 6400 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] 6401 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 6402 ; X86-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1] 6403 ; X86-NEXT: retl ## encoding: [0xc3] 6404 ; 6405 ; X64-LABEL: test_mul_epu32_rmbkz: 6406 ; X64: ## %bb.0: 6407 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6408 ; X64-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07] 6409 ; X64-NEXT: retq ## encoding: [0xc3] 6410 %q = load i64, i64* %ptr_b 6411 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 6412 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 6413 %b = bitcast <8 x i64> %b64 to <16 x i32> 6414 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b) 6415 %mask.cast = bitcast i8 %mask to <8 x i1> 6416 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer 6417 ret < 8 x i64> %res 6418 } 6419 6420 declare <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32>, <16 x i32>) 6421 6422 define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b) 6423 ; X86-LABEL: test_x86_avx512_mm_cvtu32_sd: 6424 ; X86: ## %bb.0: 6425 ; X86-NEXT: vcvtusi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x7b,0x44,0x24,0x01] 6426 ; X86-NEXT: retl ## encoding: [0xc3] 6427 ; 6428 ; X64-LABEL: test_x86_avx512_mm_cvtu32_sd: 6429 ; X64: ## %bb.0: 6430 ; X64-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x7b,0xc7] 6431 ; X64-NEXT: retq ## encoding: [0xc3] 6432 { 6433 %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1] 6434 ret <2 x double> %res 6435 } 6436 declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone 6437 6438 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) { 6439 ; X86-LABEL: test_x86_vbroadcast_ss_512: 6440 ; X86: ## %bb.0: 6441 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 6442 ; X86-NEXT: vbroadcastss (%eax), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0x00] 6443 ; X86-NEXT: retl ## encoding: [0xc3] 6444 ; 6445 ; X64-LABEL: test_x86_vbroadcast_ss_512: 6446 ; X64: ## %bb.0: 6447 ; X64-NEXT: vbroadcastss (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0x07] 6448 ; X64-NEXT: retq ## encoding: [0xc3] 6449 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1] 6450 ret <16 x float> %res 6451 } 6452 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly 6453 6454 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) { 6455 ; X86-LABEL: test_x86_vbroadcast_sd_512: 6456 ; X86: ## %bb.0: 6457 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 6458 ; X86-NEXT: vbroadcastsd (%eax), %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0x00] 6459 ; X86-NEXT: retl ## encoding: [0xc3] 6460 ; 6461 ; X64-LABEL: test_x86_vbroadcast_sd_512: 6462 ; X64: ## %bb.0: 6463 ; X64-NEXT: vbroadcastsd (%rdi), %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0x07] 6464 ; X64-NEXT: retq ## encoding: [0xc3] 6465 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1] 6466 ret <8 x double> %res 6467 } 6468 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly 6469 6470 declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8) 6471 6472 define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) { 6473 ; X86-LABEL: test_int_x86_avx512_mask_permvar_df_512: 6474 ; X86: ## %bb.0: 6475 ; X86-NEXT: vpermpd %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x16,0xd8] 6476 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 6477 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 6478 ; X86-NEXT: vpermpd %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x16,0xd0] 6479 ; X86-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x16,0xc0] 6480 ; X86-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0] 6481 ; X86-NEXT: vaddpd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc3] 6482 ; X86-NEXT: retl ## encoding: [0xc3] 6483 ; 6484 ; X64-LABEL: test_int_x86_avx512_mask_permvar_df_512: 6485 ; X64: ## %bb.0: 6486 ; X64-NEXT: vpermpd %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x16,0xd8] 6487 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6488 ; X64-NEXT: vpermpd %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x16,0xd0] 6489 ; X64-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x16,0xc0] 6490 ; X64-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0] 6491 ; X64-NEXT: vaddpd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc3] 6492 ; X64-NEXT: retq ## encoding: [0xc3] 6493 %res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) 6494 %res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3) 6495 %res2 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1) 6496 %res3 = fadd <8 x double> %res, %res1 6497 %res4 = fadd <8 x double> %res3, %res2 6498 ret <8 x double> %res4 6499 } 6500 6501 declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 6502 6503 define <8 x i64>@test_int_x86_avx512_mask_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 6504 ; X86-LABEL: test_int_x86_avx512_mask_permvar_di_512: 6505 ; X86: ## %bb.0: 6506 ; X86-NEXT: vpermq %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x36,0xd8] 6507 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 6508 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 6509 ; X86-NEXT: vpermq %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x36,0xd0] 6510 ; X86-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x36,0xc0] 6511 ; X86-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3] 6512 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 6513 ; X86-NEXT: retl ## encoding: [0xc3] 6514 ; 6515 ; X64-LABEL: test_int_x86_avx512_mask_permvar_di_512: 6516 ; X64: ## %bb.0: 6517 ; X64-NEXT: vpermq %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x36,0xd8] 6518 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6519 ; X64-NEXT: vpermq %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x36,0xd0] 6520 ; X64-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x36,0xc0] 6521 ; X64-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3] 6522 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 6523 ; X64-NEXT: retq ## encoding: [0xc3] 6524 %res = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 6525 %res1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3) 6526 %res2 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 6527 %res3 = add <8 x i64> %res, %res1 6528 %res4 = add <8 x i64> %res3, %res2 6529 ret <8 x i64> %res4 6530 } 6531 6532 declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16) 6533 6534 define <16 x float>@test_int_x86_avx512_mask_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) { 6535 ; X86-LABEL: test_int_x86_avx512_mask_permvar_sf_512: 6536 ; X86: ## %bb.0: 6537 ; X86-NEXT: vpermps %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0x75,0x48,0x16,0xd8] 6538 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6539 ; X86-NEXT: vpermps %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x16,0xd0] 6540 ; X86-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x16,0xc0] 6541 ; X86-NEXT: vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0] 6542 ; X86-NEXT: vaddps %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc3] 6543 ; X86-NEXT: retl ## encoding: [0xc3] 6544 ; 6545 ; X64-LABEL: test_int_x86_avx512_mask_permvar_sf_512: 6546 ; X64: ## %bb.0: 6547 ; X64-NEXT: vpermps %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0x75,0x48,0x16,0xd8] 6548 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6549 ; X64-NEXT: vpermps %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x16,0xd0] 6550 ; X64-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x16,0xc0] 6551 ; X64-NEXT: vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0] 6552 ; X64-NEXT: vaddps %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc3] 6553 ; X64-NEXT: retq ## encoding: [0xc3] 6554 %res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) 6555 %res1 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3) 6556 %res2 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1) 6557 %res3 = fadd <16 x float> %res, %res1 6558 %res4 = fadd <16 x float> %res3, %res2 6559 ret <16 x float> %res4 6560 } 6561 6562 declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 6563 6564 define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 6565 ; X86-LABEL: test_int_x86_avx512_mask_permvar_si_512: 6566 ; X86: ## %bb.0: 6567 ; X86-NEXT: vpermd %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0x75,0x48,0x36,0xd8] 6568 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6569 ; X86-NEXT: vpermd %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x36,0xd0] 6570 ; X86-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x36,0xc0] 6571 ; X86-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3] 6572 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 6573 ; X86-NEXT: retl ## encoding: [0xc3] 6574 ; 6575 ; X64-LABEL: test_int_x86_avx512_mask_permvar_si_512: 6576 ; X64: ## %bb.0: 6577 ; X64-NEXT: vpermd %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0x75,0x48,0x36,0xd8] 6578 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6579 ; X64-NEXT: vpermd %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x36,0xd0] 6580 ; X64-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x36,0xc0] 6581 ; X64-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3] 6582 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 6583 ; X64-NEXT: retq ## encoding: [0xc3] 6584 %res = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 6585 %res1 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3) 6586 %res2 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 6587 %res3 = add <16 x i32> %res, %res1 6588 %res4 = add <16 x i32> %res3, %res2 6589 ret <16 x i32> %res4 6590 } 6591 6592 declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16) 6593 6594 define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) { 6595 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_512: 6596 ; X86: ## %bb.0: 6597 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 6598 ; X86-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0x75,0x48,0x25,0xda,0x21] 6599 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6600 ; X86-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x49,0x25,0xc2,0x21] 6601 ; X86-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3] 6602 ; X86-NEXT: retl ## encoding: [0xc3] 6603 ; 6604 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_512: 6605 ; X64: ## %bb.0: 6606 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 6607 ; X64-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0x75,0x48,0x25,0xda,0x21] 6608 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6609 ; X64-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x49,0x25,0xc2,0x21] 6610 ; X64-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3] 6611 ; X64-NEXT: retq ## encoding: [0xc3] 6612 %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4) 6613 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1) 6614 %res2 = add <16 x i32> %res, %res1 6615 ret <16 x i32> %res2 6616 } 6617 6618 declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16) 6619 6620 define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) { 6621 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_512: 6622 ; X86: ## %bb.0: 6623 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 6624 ; X86-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0x75,0x48,0x25,0xda,0x21] 6625 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6626 ; X86-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc2,0x21] 6627 ; X86-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3] 6628 ; X86-NEXT: retl ## encoding: [0xc3] 6629 ; 6630 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_512: 6631 ; X64: ## %bb.0: 6632 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 6633 ; X64-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0x75,0x48,0x25,0xda,0x21] 6634 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6635 ; X64-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc2,0x21] 6636 ; X64-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3] 6637 ; X64-NEXT: retq ## encoding: [0xc3] 6638 %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4) 6639 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1) 6640 %res2 = add <16 x i32> %res, %res1 6641 ret <16 x i32> %res2 6642 } 6643 6644 declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8) 6645 6646 define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) { 6647 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_512: 6648 ; X86: ## %bb.0: 6649 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 6650 ; X86-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0xf5,0x48,0x25,0xda,0x21] 6651 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 6652 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 6653 ; X86-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x49,0x25,0xc2,0x21] 6654 ; X86-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3] 6655 ; X86-NEXT: retl ## encoding: [0xc3] 6656 ; 6657 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_512: 6658 ; X64: ## %bb.0: 6659 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 6660 ; X64-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0xf5,0x48,0x25,0xda,0x21] 6661 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6662 ; X64-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x49,0x25,0xc2,0x21] 6663 ; X64-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3] 6664 ; X64-NEXT: retq ## encoding: [0xc3] 6665 %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4) 6666 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1) 6667 %res2 = add <8 x i64> %res, %res1 6668 ret <8 x i64> %res2 6669 } 6670 6671 declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8) 6672 6673 define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) { 6674 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_512: 6675 ; X86: ## %bb.0: 6676 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 6677 ; X86-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0xf5,0x48,0x25,0xda,0x21] 6678 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 6679 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 6680 ; X86-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xc9,0x25,0xc2,0x21] 6681 ; X86-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3] 6682 ; X86-NEXT: retl ## encoding: [0xc3] 6683 ; 6684 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_512: 6685 ; X64: ## %bb.0: 6686 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 6687 ; X64-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0xf5,0x48,0x25,0xda,0x21] 6688 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6689 ; X64-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xc9,0x25,0xc2,0x21] 6690 ; X64-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3] 6691 ; X64-NEXT: retq ## encoding: [0xc3] 6692 %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4) 6693 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1) 6694 %res2 = add <8 x i64> %res, %res1 6695 ret <8 x i64> %res2 6696 } 6697 6698 declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 6699 6700 define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { 6701 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512: 6702 ; X86: ## %bb.0: 6703 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 6704 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 6705 ; X86-NEXT: vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] 6706 ; X86-NEXT: vpermi2d (%eax), %zmm0, %zmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x76,0x18] 6707 ; X86-NEXT: vpermt2d %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0x7e,0xc2] 6708 ; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 6709 ; X86-NEXT: retl ## encoding: [0xc3] 6710 ; 6711 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512: 6712 ; X64: ## %bb.0: 6713 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6714 ; X64-NEXT: vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] 6715 ; X64-NEXT: vpermi2d (%rdi), %zmm0, %zmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x76,0x1f] 6716 ; X64-NEXT: vpermt2d %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0x7e,0xc2] 6717 ; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] 6718 ; X64-NEXT: retq ## encoding: [0xc3] 6719 %x2 = load <16 x i32>, <16 x i32>* %x2p 6720 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 6721 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1) 6722 %res2 = add <16 x i32> %res, %res1 6723 ret <16 x i32> %res2 6724 } 6725 6726 declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8) 6727 6728 define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) { 6729 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512: 6730 ; X86: ## %bb.0: 6731 ; X86-NEXT: vmovapd %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xd8] 6732 ; X86-NEXT: vpermt2pd %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x7f,0xda] 6733 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 6734 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 6735 ; X86-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x77,0xca] 6736 ; X86-NEXT: vaddpd %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc3] 6737 ; X86-NEXT: retl ## encoding: [0xc3] 6738 ; 6739 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512: 6740 ; X64: ## %bb.0: 6741 ; X64-NEXT: vmovapd %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xd8] 6742 ; X64-NEXT: vpermt2pd %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x7f,0xda] 6743 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6744 ; X64-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x77,0xca] 6745 ; X64-NEXT: vaddpd %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc3] 6746 ; X64-NEXT: retq ## encoding: [0xc3] 6747 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) 6748 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1) 6749 %res2 = fadd <8 x double> %res, %res1 6750 ret <8 x double> %res2 6751 } 6752 6753 declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16) 6754 6755 define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) { 6756 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512: 6757 ; X86: ## %bb.0: 6758 ; X86-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 6759 ; X86-NEXT: vpermt2ps %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0x75,0x48,0x7f,0xda] 6760 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6761 ; X86-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x77,0xca] 6762 ; X86-NEXT: vaddps %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc3] 6763 ; X86-NEXT: retl ## encoding: [0xc3] 6764 ; 6765 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512: 6766 ; X64: ## %bb.0: 6767 ; X64-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 6768 ; X64-NEXT: vpermt2ps %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0x75,0x48,0x7f,0xda] 6769 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6770 ; X64-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x77,0xca] 6771 ; X64-NEXT: vaddps %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc3] 6772 ; X64-NEXT: retq ## encoding: [0xc3] 6773 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) 6774 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1) 6775 %res2 = fadd <16 x float> %res, %res1 6776 ret <16 x float> %res2 6777 } 6778 6779 declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 6780 6781 define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 6782 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512: 6783 ; X86: ## %bb.0: 6784 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 6785 ; X86-NEXT: vpermt2q %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x7e,0xda] 6786 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 6787 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 6788 ; X86-NEXT: vpermi2q %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x76,0xca] 6789 ; X86-NEXT: vpaddq %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc3] 6790 ; X86-NEXT: retl ## encoding: [0xc3] 6791 ; 6792 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512: 6793 ; X64: ## %bb.0: 6794 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 6795 ; X64-NEXT: vpermt2q %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x7e,0xda] 6796 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6797 ; X64-NEXT: vpermi2q %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x76,0xca] 6798 ; X64-NEXT: vpaddq %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc3] 6799 ; X64-NEXT: retq ## encoding: [0xc3] 6800 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 6801 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 6802 %res2 = add <8 x i64> %res, %res1 6803 ret <8 x i64> %res2 6804 } 6805 6806 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 6807 6808 define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) { 6809 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512: 6810 ; X86: ## %bb.0: 6811 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 6812 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 6813 ; X86-NEXT: vmovdqa64 %zmm1, %zmm2 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd1] 6814 ; X86-NEXT: vpermt2d (%eax), %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7e,0x10] 6815 ; X86-NEXT: vpermt2d %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x7e,0xc9] 6816 ; X86-NEXT: vpaddd %zmm1, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc1] 6817 ; X86-NEXT: retl ## encoding: [0xc3] 6818 ; 6819 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512: 6820 ; X64: ## %bb.0: 6821 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6822 ; X64-NEXT: vmovdqa64 %zmm1, %zmm2 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd1] 6823 ; X64-NEXT: vpermt2d (%rdi), %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7e,0x17] 6824 ; X64-NEXT: vpermt2d %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x7e,0xc9] 6825 ; X64-NEXT: vpaddd %zmm1, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc1] 6826 ; X64-NEXT: retq ## encoding: [0xc3] 6827 %x2 = load <16 x i32>, <16 x i32>* %x2p 6828 %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 6829 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x1, i16 -1) 6830 %res2 = add <16 x i32> %res, %res1 6831 ret <16 x i32> %res2 6832 } 6833 6834 declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8) 6835 6836 define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) { 6837 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512: 6838 ; X86: ## %bb.0: 6839 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 6840 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 6841 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 6842 ; X86-NEXT: vmovapd %zmm1, %zmm2 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xd1] 6843 ; X86-NEXT: vpermt2pd (%eax){1to8}, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x7f,0x10] 6844 ; X86-NEXT: vpermt2pd %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x7f,0xc9] 6845 ; X86-NEXT: vaddpd %zmm1, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc1] 6846 ; X86-NEXT: retl ## encoding: [0xc3] 6847 ; 6848 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512: 6849 ; X64: ## %bb.0: 6850 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6851 ; X64-NEXT: vmovapd %zmm1, %zmm2 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xd1] 6852 ; X64-NEXT: vpermt2pd (%rdi){1to8}, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x7f,0x17] 6853 ; X64-NEXT: vpermt2pd %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x7f,0xc9] 6854 ; X64-NEXT: vaddpd %zmm1, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc1] 6855 ; X64-NEXT: retq ## encoding: [0xc3] 6856 %x2s = load double, double* %x2ptr 6857 %x2ins = insertelement <8 x double> undef, double %x2s, i32 0 6858 %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer 6859 %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) 6860 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x1, i8 -1) 6861 %res2 = fadd <8 x double> %res, %res1 6862 ret <8 x double> %res2 6863 } 6864 6865 declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16) 6866 6867 define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { 6868 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512: 6869 ; X86: ## %bb.0: 6870 ; X86-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] 6871 ; X86-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x7f,0xda] 6872 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6873 ; X86-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7f,0xca] 6874 ; X86-NEXT: vaddps %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc3] 6875 ; X86-NEXT: retl ## encoding: [0xc3] 6876 ; 6877 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512: 6878 ; X64: ## %bb.0: 6879 ; X64-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] 6880 ; X64-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x7f,0xda] 6881 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6882 ; X64-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7f,0xca] 6883 ; X64-NEXT: vaddps %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc3] 6884 ; X64-NEXT: retq ## encoding: [0xc3] 6885 %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) 6886 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1) 6887 %res2 = fadd <16 x float> %res, %res1 6888 ret <16 x float> %res2 6889 } 6890 6891 6892 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 6893 6894 define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 6895 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512: 6896 ; X86: ## %bb.0: 6897 ; X86-NEXT: vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] 6898 ; X86-NEXT: vpermt2q %zmm2, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x7e,0xda] 6899 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 6900 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 6901 ; X86-NEXT: vpermt2q %zmm2, %zmm0, %zmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x7e,0xca] 6902 ; X86-NEXT: vpaddq %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc3] 6903 ; X86-NEXT: retl ## encoding: [0xc3] 6904 ; 6905 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512: 6906 ; X64: ## %bb.0: 6907 ; X64-NEXT: vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] 6908 ; X64-NEXT: vpermt2q %zmm2, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x7e,0xda] 6909 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6910 ; X64-NEXT: vpermt2q %zmm2, %zmm0, %zmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x7e,0xca] 6911 ; X64-NEXT: vpaddq %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc3] 6912 ; X64-NEXT: retq ## encoding: [0xc3] 6913 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 6914 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 6915 %res2 = add <8 x i64> %res, %res1 6916 ret <8 x i64> %res2 6917 } 6918 6919 declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 6920 6921 define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 6922 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512: 6923 ; X86: ## %bb.0: 6924 ; X86-NEXT: vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] 6925 ; X86-NEXT: vpermt2d %zmm2, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x7e,0xda] 6926 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6927 ; X86-NEXT: vpermt2d %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x7e,0xca] 6928 ; X86-NEXT: vpaddd %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc3] 6929 ; X86-NEXT: retl ## encoding: [0xc3] 6930 ; 6931 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512: 6932 ; X64: ## %bb.0: 6933 ; X64-NEXT: vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] 6934 ; X64-NEXT: vpermt2d %zmm2, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x7e,0xda] 6935 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6936 ; X64-NEXT: vpermt2d %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x7e,0xca] 6937 ; X64-NEXT: vpaddd %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc3] 6938 ; X64-NEXT: retq ## encoding: [0xc3] 6939 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 6940 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 6941 %res2 = add <16 x i32> %res, %res1 6942 ret <16 x i32> %res2 6943 } 6944 6945 declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 6946 declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 6947 declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 6948 6949 define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) { 6950 ; CHECK-LABEL: test_vsubps_rn: 6951 ; CHECK: ## %bb.0: 6952 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1] 6953 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 6954 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, 6955 <16 x float> zeroinitializer, i16 -1, i32 0) 6956 ret <16 x float> %res 6957 } 6958 6959 define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) { 6960 ; CHECK-LABEL: test_vsubps_rd: 6961 ; CHECK: ## %bb.0: 6962 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1] 6963 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 6964 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, 6965 <16 x float> zeroinitializer, i16 -1, i32 1) 6966 ret <16 x float> %res 6967 } 6968 6969 define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) { 6970 ; CHECK-LABEL: test_vsubps_ru: 6971 ; CHECK: ## %bb.0: 6972 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1] 6973 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 6974 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, 6975 <16 x float> zeroinitializer, i16 -1, i32 2) 6976 ret <16 x float> %res 6977 } 6978 6979 define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) { 6980 ; CHECK-LABEL: test_vsubps_rz: 6981 ; CHECK: ## %bb.0: 6982 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1] 6983 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 6984 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, 6985 <16 x float> zeroinitializer, i16 -1, i32 3) 6986 ret <16 x float> %res 6987 } 6988 6989 define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) { 6990 ; CHECK-LABEL: test_vmulps_rn: 6991 ; CHECK: ## %bb.0: 6992 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1] 6993 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 6994 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 6995 <16 x float> zeroinitializer, i16 -1, i32 0) 6996 ret <16 x float> %res 6997 } 6998 6999 define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) { 7000 ; CHECK-LABEL: test_vmulps_rd: 7001 ; CHECK: ## %bb.0: 7002 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1] 7003 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 7004 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 7005 <16 x float> zeroinitializer, i16 -1, i32 1) 7006 ret <16 x float> %res 7007 } 7008 7009 define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) { 7010 ; CHECK-LABEL: test_vmulps_ru: 7011 ; CHECK: ## %bb.0: 7012 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1] 7013 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 7014 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 7015 <16 x float> zeroinitializer, i16 -1, i32 2) 7016 ret <16 x float> %res 7017 } 7018 7019 define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) { 7020 ; CHECK-LABEL: test_vmulps_rz: 7021 ; CHECK: ## %bb.0: 7022 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1] 7023 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 7024 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 7025 <16 x float> zeroinitializer, i16 -1, i32 3) 7026 ret <16 x float> %res 7027 } 7028 7029 ;; mask float 7030 define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7031 ; X86-LABEL: test_vmulps_mask_rn: 7032 ; X86: ## %bb.0: 7033 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7034 ; X86-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1] 7035 ; X86-NEXT: retl ## encoding: [0xc3] 7036 ; 7037 ; X64-LABEL: test_vmulps_mask_rn: 7038 ; X64: ## %bb.0: 7039 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7040 ; X64-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1] 7041 ; X64-NEXT: retq ## encoding: [0xc3] 7042 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 7043 <16 x float> zeroinitializer, i16 %mask, i32 0) 7044 ret <16 x float> %res 7045 } 7046 7047 define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7048 ; X86-LABEL: test_vmulps_mask_rd: 7049 ; X86: ## %bb.0: 7050 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7051 ; X86-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1] 7052 ; X86-NEXT: retl ## encoding: [0xc3] 7053 ; 7054 ; X64-LABEL: test_vmulps_mask_rd: 7055 ; X64: ## %bb.0: 7056 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7057 ; X64-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1] 7058 ; X64-NEXT: retq ## encoding: [0xc3] 7059 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 7060 <16 x float> zeroinitializer, i16 %mask, i32 1) 7061 ret <16 x float> %res 7062 } 7063 7064 define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7065 ; X86-LABEL: test_vmulps_mask_ru: 7066 ; X86: ## %bb.0: 7067 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7068 ; X86-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1] 7069 ; X86-NEXT: retl ## encoding: [0xc3] 7070 ; 7071 ; X64-LABEL: test_vmulps_mask_ru: 7072 ; X64: ## %bb.0: 7073 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7074 ; X64-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1] 7075 ; X64-NEXT: retq ## encoding: [0xc3] 7076 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 7077 <16 x float> zeroinitializer, i16 %mask, i32 2) 7078 ret <16 x float> %res 7079 } 7080 7081 define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7082 ; X86-LABEL: test_vmulps_mask_rz: 7083 ; X86: ## %bb.0: 7084 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7085 ; X86-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1] 7086 ; X86-NEXT: retl ## encoding: [0xc3] 7087 ; 7088 ; X64-LABEL: test_vmulps_mask_rz: 7089 ; X64: ## %bb.0: 7090 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7091 ; X64-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1] 7092 ; X64-NEXT: retq ## encoding: [0xc3] 7093 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 7094 <16 x float> zeroinitializer, i16 %mask, i32 3) 7095 ret <16 x float> %res 7096 } 7097 7098 ;; With Passthru value 7099 define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 7100 ; X86-LABEL: test_vmulps_mask_passthru_rn: 7101 ; X86: ## %bb.0: 7102 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7103 ; X86-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1] 7104 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7105 ; X86-NEXT: retl ## encoding: [0xc3] 7106 ; 7107 ; X64-LABEL: test_vmulps_mask_passthru_rn: 7108 ; X64: ## %bb.0: 7109 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7110 ; X64-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1] 7111 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7112 ; X64-NEXT: retq ## encoding: [0xc3] 7113 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 7114 <16 x float> %passthru, i16 %mask, i32 0) 7115 ret <16 x float> %res 7116 } 7117 7118 define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 7119 ; X86-LABEL: test_vmulps_mask_passthru_rd: 7120 ; X86: ## %bb.0: 7121 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7122 ; X86-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1] 7123 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7124 ; X86-NEXT: retl ## encoding: [0xc3] 7125 ; 7126 ; X64-LABEL: test_vmulps_mask_passthru_rd: 7127 ; X64: ## %bb.0: 7128 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7129 ; X64-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1] 7130 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7131 ; X64-NEXT: retq ## encoding: [0xc3] 7132 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 7133 <16 x float> %passthru, i16 %mask, i32 1) 7134 ret <16 x float> %res 7135 } 7136 7137 define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 7138 ; X86-LABEL: test_vmulps_mask_passthru_ru: 7139 ; X86: ## %bb.0: 7140 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7141 ; X86-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1] 7142 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7143 ; X86-NEXT: retl ## encoding: [0xc3] 7144 ; 7145 ; X64-LABEL: test_vmulps_mask_passthru_ru: 7146 ; X64: ## %bb.0: 7147 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7148 ; X64-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1] 7149 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7150 ; X64-NEXT: retq ## encoding: [0xc3] 7151 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 7152 <16 x float> %passthru, i16 %mask, i32 2) 7153 ret <16 x float> %res 7154 } 7155 7156 define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 7157 ; X86-LABEL: test_vmulps_mask_passthru_rz: 7158 ; X86: ## %bb.0: 7159 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7160 ; X86-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1] 7161 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7162 ; X86-NEXT: retl ## encoding: [0xc3] 7163 ; 7164 ; X64-LABEL: test_vmulps_mask_passthru_rz: 7165 ; X64: ## %bb.0: 7166 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7167 ; X64-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1] 7168 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7169 ; X64-NEXT: retq ## encoding: [0xc3] 7170 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 7171 <16 x float> %passthru, i16 %mask, i32 3) 7172 ret <16 x float> %res 7173 } 7174 7175 ;; mask double 7176 define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 7177 ; X86-LABEL: test_vmulpd_mask_rn: 7178 ; X86: ## %bb.0: 7179 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 7180 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 7181 ; X86-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1] 7182 ; X86-NEXT: retl ## encoding: [0xc3] 7183 ; 7184 ; X64-LABEL: test_vmulpd_mask_rn: 7185 ; X64: ## %bb.0: 7186 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7187 ; X64-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1] 7188 ; X64-NEXT: retq ## encoding: [0xc3] 7189 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, 7190 <8 x double> zeroinitializer, i8 %mask, i32 0) 7191 ret <8 x double> %res 7192 } 7193 7194 define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 7195 ; X86-LABEL: test_vmulpd_mask_rd: 7196 ; X86: ## %bb.0: 7197 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 7198 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 7199 ; X86-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1] 7200 ; X86-NEXT: retl ## encoding: [0xc3] 7201 ; 7202 ; X64-LABEL: test_vmulpd_mask_rd: 7203 ; X64: ## %bb.0: 7204 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7205 ; X64-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1] 7206 ; X64-NEXT: retq ## encoding: [0xc3] 7207 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, 7208 <8 x double> zeroinitializer, i8 %mask, i32 1) 7209 ret <8 x double> %res 7210 } 7211 7212 define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 7213 ; X86-LABEL: test_vmulpd_mask_ru: 7214 ; X86: ## %bb.0: 7215 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 7216 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 7217 ; X86-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1] 7218 ; X86-NEXT: retl ## encoding: [0xc3] 7219 ; 7220 ; X64-LABEL: test_vmulpd_mask_ru: 7221 ; X64: ## %bb.0: 7222 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7223 ; X64-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1] 7224 ; X64-NEXT: retq ## encoding: [0xc3] 7225 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, 7226 <8 x double> zeroinitializer, i8 %mask, i32 2) 7227 ret <8 x double> %res 7228 } 7229 7230 define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 7231 ; X86-LABEL: test_vmulpd_mask_rz: 7232 ; X86: ## %bb.0: 7233 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 7234 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 7235 ; X86-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1] 7236 ; X86-NEXT: retl ## encoding: [0xc3] 7237 ; 7238 ; X64-LABEL: test_vmulpd_mask_rz: 7239 ; X64: ## %bb.0: 7240 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7241 ; X64-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1] 7242 ; X64-NEXT: retq ## encoding: [0xc3] 7243 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, 7244 <8 x double> zeroinitializer, i8 %mask, i32 3) 7245 ret <8 x double> %res 7246 } 7247 7248 define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7249 ; X86-LABEL: test_mm512_maskz_add_round_ps_rn_sae: 7250 ; X86: ## %bb.0: 7251 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7252 ; X86-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x58,0xc1] 7253 ; X86-NEXT: retl ## encoding: [0xc3] 7254 ; 7255 ; X64-LABEL: test_mm512_maskz_add_round_ps_rn_sae: 7256 ; X64: ## %bb.0: 7257 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7258 ; X64-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x58,0xc1] 7259 ; X64-NEXT: retq ## encoding: [0xc3] 7260 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0) 7261 ret <16 x float> %res 7262 } 7263 define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7264 ; X86-LABEL: test_mm512_maskz_add_round_ps_rd_sae: 7265 ; X86: ## %bb.0: 7266 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7267 ; X86-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x58,0xc1] 7268 ; X86-NEXT: retl ## encoding: [0xc3] 7269 ; 7270 ; X64-LABEL: test_mm512_maskz_add_round_ps_rd_sae: 7271 ; X64: ## %bb.0: 7272 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7273 ; X64-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x58,0xc1] 7274 ; X64-NEXT: retq ## encoding: [0xc3] 7275 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1) 7276 ret <16 x float> %res 7277 } 7278 define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7279 ; X86-LABEL: test_mm512_maskz_add_round_ps_ru_sae: 7280 ; X86: ## %bb.0: 7281 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7282 ; X86-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x58,0xc1] 7283 ; X86-NEXT: retl ## encoding: [0xc3] 7284 ; 7285 ; X64-LABEL: test_mm512_maskz_add_round_ps_ru_sae: 7286 ; X64: ## %bb.0: 7287 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7288 ; X64-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x58,0xc1] 7289 ; X64-NEXT: retq ## encoding: [0xc3] 7290 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2) 7291 ret <16 x float> %res 7292 } 7293 7294 define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7295 ; X86-LABEL: test_mm512_maskz_add_round_ps_rz_sae: 7296 ; X86: ## %bb.0: 7297 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7298 ; X86-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x58,0xc1] 7299 ; X86-NEXT: retl ## encoding: [0xc3] 7300 ; 7301 ; X64-LABEL: test_mm512_maskz_add_round_ps_rz_sae: 7302 ; X64: ## %bb.0: 7303 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7304 ; X64-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x58,0xc1] 7305 ; X64-NEXT: retq ## encoding: [0xc3] 7306 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3) 7307 ret <16 x float> %res 7308 } 7309 7310 7311 define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7312 ; X86-LABEL: test_mm512_maskz_add_round_ps_current: 7313 ; X86: ## %bb.0: 7314 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7315 ; X86-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x58,0xc1] 7316 ; X86-NEXT: retl ## encoding: [0xc3] 7317 ; 7318 ; X64-LABEL: test_mm512_maskz_add_round_ps_current: 7319 ; X64: ## %bb.0: 7320 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7321 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x58,0xc1] 7322 ; X64-NEXT: retq ## encoding: [0xc3] 7323 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4) 7324 ret <16 x float> %res 7325 } 7326 7327 define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 7328 ; X86-LABEL: test_mm512_mask_add_round_ps_rn_sae: 7329 ; X86: ## %bb.0: 7330 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7331 ; X86-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x58,0xd1] 7332 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7333 ; X86-NEXT: retl ## encoding: [0xc3] 7334 ; 7335 ; X64-LABEL: test_mm512_mask_add_round_ps_rn_sae: 7336 ; X64: ## %bb.0: 7337 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7338 ; X64-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x58,0xd1] 7339 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7340 ; X64-NEXT: retq ## encoding: [0xc3] 7341 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0) 7342 ret <16 x float> %res 7343 } 7344 define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 7345 ; X86-LABEL: test_mm512_mask_add_round_ps_rd_sae: 7346 ; X86: ## %bb.0: 7347 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7348 ; X86-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x58,0xd1] 7349 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7350 ; X86-NEXT: retl ## encoding: [0xc3] 7351 ; 7352 ; X64-LABEL: test_mm512_mask_add_round_ps_rd_sae: 7353 ; X64: ## %bb.0: 7354 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7355 ; X64-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x58,0xd1] 7356 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7357 ; X64-NEXT: retq ## encoding: [0xc3] 7358 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1) 7359 ret <16 x float> %res 7360 } 7361 define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 7362 ; X86-LABEL: test_mm512_mask_add_round_ps_ru_sae: 7363 ; X86: ## %bb.0: 7364 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7365 ; X86-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x58,0xd1] 7366 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7367 ; X86-NEXT: retl ## encoding: [0xc3] 7368 ; 7369 ; X64-LABEL: test_mm512_mask_add_round_ps_ru_sae: 7370 ; X64: ## %bb.0: 7371 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7372 ; X64-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x58,0xd1] 7373 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7374 ; X64-NEXT: retq ## encoding: [0xc3] 7375 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2) 7376 ret <16 x float> %res 7377 } 7378 7379 define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 7380 ; X86-LABEL: test_mm512_mask_add_round_ps_rz_sae: 7381 ; X86: ## %bb.0: 7382 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7383 ; X86-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x58,0xd1] 7384 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7385 ; X86-NEXT: retl ## encoding: [0xc3] 7386 ; 7387 ; X64-LABEL: test_mm512_mask_add_round_ps_rz_sae: 7388 ; X64: ## %bb.0: 7389 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7390 ; X64-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x58,0xd1] 7391 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7392 ; X64-NEXT: retq ## encoding: [0xc3] 7393 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3) 7394 ret <16 x float> %res 7395 } 7396 7397 7398 define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 7399 ; X86-LABEL: test_mm512_mask_add_round_ps_current: 7400 ; X86: ## %bb.0: 7401 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7402 ; X86-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x58,0xd1] 7403 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7404 ; X86-NEXT: retl ## encoding: [0xc3] 7405 ; 7406 ; X64-LABEL: test_mm512_mask_add_round_ps_current: 7407 ; X64: ## %bb.0: 7408 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7409 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x58,0xd1] 7410 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7411 ; X64-NEXT: retq ## encoding: [0xc3] 7412 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) 7413 ret <16 x float> %res 7414 } 7415 7416 7417 define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7418 ; CHECK-LABEL: test_mm512_add_round_ps_rn_sae: 7419 ; CHECK: ## %bb.0: 7420 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x58,0xc1] 7421 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 7422 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0) 7423 ret <16 x float> %res 7424 } 7425 define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7426 ; CHECK-LABEL: test_mm512_add_round_ps_rd_sae: 7427 ; CHECK: ## %bb.0: 7428 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x58,0xc1] 7429 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 7430 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1) 7431 ret <16 x float> %res 7432 } 7433 define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7434 ; CHECK-LABEL: test_mm512_add_round_ps_ru_sae: 7435 ; CHECK: ## %bb.0: 7436 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x58,0xc1] 7437 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 7438 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2) 7439 ret <16 x float> %res 7440 } 7441 7442 define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7443 ; CHECK-LABEL: test_mm512_add_round_ps_rz_sae: 7444 ; CHECK: ## %bb.0: 7445 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x58,0xc1] 7446 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 7447 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3) 7448 ret <16 x float> %res 7449 } 7450 7451 define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7452 ; CHECK-LABEL: test_mm512_add_round_ps_current: 7453 ; CHECK: ## %bb.0: 7454 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1] 7455 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 7456 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) 7457 ret <16 x float> %res 7458 } 7459 declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 7460 7461 define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 7462 ; X86-LABEL: test_mm512_mask_sub_round_ps_rn_sae: 7463 ; X86: ## %bb.0: 7464 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7465 ; X86-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5c,0xd1] 7466 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7467 ; X86-NEXT: retl ## encoding: [0xc3] 7468 ; 7469 ; X64-LABEL: test_mm512_mask_sub_round_ps_rn_sae: 7470 ; X64: ## %bb.0: 7471 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7472 ; X64-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5c,0xd1] 7473 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7474 ; X64-NEXT: retq ## encoding: [0xc3] 7475 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0) 7476 ret <16 x float> %res 7477 } 7478 define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 7479 ; X86-LABEL: test_mm512_mask_sub_round_ps_rd_sae: 7480 ; X86: ## %bb.0: 7481 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7482 ; X86-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5c,0xd1] 7483 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7484 ; X86-NEXT: retl ## encoding: [0xc3] 7485 ; 7486 ; X64-LABEL: test_mm512_mask_sub_round_ps_rd_sae: 7487 ; X64: ## %bb.0: 7488 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7489 ; X64-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5c,0xd1] 7490 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7491 ; X64-NEXT: retq ## encoding: [0xc3] 7492 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1) 7493 ret <16 x float> %res 7494 } 7495 define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 7496 ; X86-LABEL: test_mm512_mask_sub_round_ps_ru_sae: 7497 ; X86: ## %bb.0: 7498 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7499 ; X86-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5c,0xd1] 7500 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7501 ; X86-NEXT: retl ## encoding: [0xc3] 7502 ; 7503 ; X64-LABEL: test_mm512_mask_sub_round_ps_ru_sae: 7504 ; X64: ## %bb.0: 7505 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7506 ; X64-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5c,0xd1] 7507 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7508 ; X64-NEXT: retq ## encoding: [0xc3] 7509 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2) 7510 ret <16 x float> %res 7511 } 7512 7513 define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 7514 ; X86-LABEL: test_mm512_mask_sub_round_ps_rz_sae: 7515 ; X86: ## %bb.0: 7516 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7517 ; X86-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5c,0xd1] 7518 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7519 ; X86-NEXT: retl ## encoding: [0xc3] 7520 ; 7521 ; X64-LABEL: test_mm512_mask_sub_round_ps_rz_sae: 7522 ; X64: ## %bb.0: 7523 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7524 ; X64-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5c,0xd1] 7525 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7526 ; X64-NEXT: retq ## encoding: [0xc3] 7527 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3) 7528 ret <16 x float> %res 7529 } 7530 7531 7532 define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 7533 ; X86-LABEL: test_mm512_mask_sub_round_ps_current: 7534 ; X86: ## %bb.0: 7535 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7536 ; X86-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5c,0xd1] 7537 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7538 ; X86-NEXT: retl ## encoding: [0xc3] 7539 ; 7540 ; X64-LABEL: test_mm512_mask_sub_round_ps_current: 7541 ; X64: ## %bb.0: 7542 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7543 ; X64-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5c,0xd1] 7544 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7545 ; X64-NEXT: retq ## encoding: [0xc3] 7546 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) 7547 ret <16 x float> %res 7548 } 7549 7550 define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7551 ; CHECK-LABEL: test_mm512_sub_round_ps_rn_sae: 7552 ; CHECK: ## %bb.0: 7553 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1] 7554 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 7555 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0) 7556 ret <16 x float> %res 7557 } 7558 define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7559 ; CHECK-LABEL: test_mm512_sub_round_ps_rd_sae: 7560 ; CHECK: ## %bb.0: 7561 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1] 7562 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 7563 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1) 7564 ret <16 x float> %res 7565 } 7566 define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7567 ; CHECK-LABEL: test_mm512_sub_round_ps_ru_sae: 7568 ; CHECK: ## %bb.0: 7569 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1] 7570 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 7571 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2) 7572 ret <16 x float> %res 7573 } 7574 7575 define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7576 ; CHECK-LABEL: test_mm512_sub_round_ps_rz_sae: 7577 ; CHECK: ## %bb.0: 7578 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1] 7579 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 7580 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3) 7581 ret <16 x float> %res 7582 } 7583 7584 define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7585 ; CHECK-LABEL: test_mm512_sub_round_ps_current: 7586 ; CHECK: ## %bb.0: 7587 ; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5c,0xc1] 7588 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 7589 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) 7590 ret <16 x float> %res 7591 } 7592 7593 define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7594 ; X86-LABEL: test_mm512_maskz_div_round_ps_rn_sae: 7595 ; X86: ## %bb.0: 7596 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7597 ; X86-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5e,0xc1] 7598 ; X86-NEXT: retl ## encoding: [0xc3] 7599 ; 7600 ; X64-LABEL: test_mm512_maskz_div_round_ps_rn_sae: 7601 ; X64: ## %bb.0: 7602 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7603 ; X64-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5e,0xc1] 7604 ; X64-NEXT: retq ## encoding: [0xc3] 7605 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0) 7606 ret <16 x float> %res 7607 } 7608 define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7609 ; X86-LABEL: test_mm512_maskz_div_round_ps_rd_sae: 7610 ; X86: ## %bb.0: 7611 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7612 ; X86-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x5e,0xc1] 7613 ; X86-NEXT: retl ## encoding: [0xc3] 7614 ; 7615 ; X64-LABEL: test_mm512_maskz_div_round_ps_rd_sae: 7616 ; X64: ## %bb.0: 7617 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7618 ; X64-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x5e,0xc1] 7619 ; X64-NEXT: retq ## encoding: [0xc3] 7620 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1) 7621 ret <16 x float> %res 7622 } 7623 define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7624 ; X86-LABEL: test_mm512_maskz_div_round_ps_ru_sae: 7625 ; X86: ## %bb.0: 7626 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7627 ; X86-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x5e,0xc1] 7628 ; X86-NEXT: retl ## encoding: [0xc3] 7629 ; 7630 ; X64-LABEL: test_mm512_maskz_div_round_ps_ru_sae: 7631 ; X64: ## %bb.0: 7632 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7633 ; X64-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x5e,0xc1] 7634 ; X64-NEXT: retq ## encoding: [0xc3] 7635 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2) 7636 ret <16 x float> %res 7637 } 7638 7639 define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7640 ; X86-LABEL: test_mm512_maskz_div_round_ps_rz_sae: 7641 ; X86: ## %bb.0: 7642 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7643 ; X86-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x5e,0xc1] 7644 ; X86-NEXT: retl ## encoding: [0xc3] 7645 ; 7646 ; X64-LABEL: test_mm512_maskz_div_round_ps_rz_sae: 7647 ; X64: ## %bb.0: 7648 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7649 ; X64-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x5e,0xc1] 7650 ; X64-NEXT: retq ## encoding: [0xc3] 7651 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3) 7652 ret <16 x float> %res 7653 } 7654 7655 7656 define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7657 ; X86-LABEL: test_mm512_maskz_div_round_ps_current: 7658 ; X86: ## %bb.0: 7659 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7660 ; X86-NEXT: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5e,0xc1] 7661 ; X86-NEXT: retl ## encoding: [0xc3] 7662 ; 7663 ; X64-LABEL: test_mm512_maskz_div_round_ps_current: 7664 ; X64: ## %bb.0: 7665 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7666 ; X64-NEXT: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5e,0xc1] 7667 ; X64-NEXT: retq ## encoding: [0xc3] 7668 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4) 7669 ret <16 x float> %res 7670 } 7671 7672 define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 7673 ; X86-LABEL: test_mm512_mask_div_round_ps_rn_sae: 7674 ; X86: ## %bb.0: 7675 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7676 ; X86-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5e,0xd1] 7677 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7678 ; X86-NEXT: retl ## encoding: [0xc3] 7679 ; 7680 ; X64-LABEL: test_mm512_mask_div_round_ps_rn_sae: 7681 ; X64: ## %bb.0: 7682 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7683 ; X64-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5e,0xd1] 7684 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7685 ; X64-NEXT: retq ## encoding: [0xc3] 7686 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0) 7687 ret <16 x float> %res 7688 } 7689 define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 7690 ; X86-LABEL: test_mm512_mask_div_round_ps_rd_sae: 7691 ; X86: ## %bb.0: 7692 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7693 ; X86-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5e,0xd1] 7694 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7695 ; X86-NEXT: retl ## encoding: [0xc3] 7696 ; 7697 ; X64-LABEL: test_mm512_mask_div_round_ps_rd_sae: 7698 ; X64: ## %bb.0: 7699 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7700 ; X64-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5e,0xd1] 7701 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7702 ; X64-NEXT: retq ## encoding: [0xc3] 7703 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1) 7704 ret <16 x float> %res 7705 } 7706 define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 7707 ; X86-LABEL: test_mm512_mask_div_round_ps_ru_sae: 7708 ; X86: ## %bb.0: 7709 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7710 ; X86-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5e,0xd1] 7711 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7712 ; X86-NEXT: retl ## encoding: [0xc3] 7713 ; 7714 ; X64-LABEL: test_mm512_mask_div_round_ps_ru_sae: 7715 ; X64: ## %bb.0: 7716 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7717 ; X64-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5e,0xd1] 7718 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7719 ; X64-NEXT: retq ## encoding: [0xc3] 7720 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2) 7721 ret <16 x float> %res 7722 } 7723 7724 define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 7725 ; X86-LABEL: test_mm512_mask_div_round_ps_rz_sae: 7726 ; X86: ## %bb.0: 7727 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7728 ; X86-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5e,0xd1] 7729 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7730 ; X86-NEXT: retl ## encoding: [0xc3] 7731 ; 7732 ; X64-LABEL: test_mm512_mask_div_round_ps_rz_sae: 7733 ; X64: ## %bb.0: 7734 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7735 ; X64-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5e,0xd1] 7736 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7737 ; X64-NEXT: retq ## encoding: [0xc3] 7738 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3) 7739 ret <16 x float> %res 7740 } 7741 7742 7743 define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 7744 ; X86-LABEL: test_mm512_mask_div_round_ps_current: 7745 ; X86: ## %bb.0: 7746 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7747 ; X86-NEXT: vdivps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5e,0xd1] 7748 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7749 ; X86-NEXT: retl ## encoding: [0xc3] 7750 ; 7751 ; X64-LABEL: test_mm512_mask_div_round_ps_current: 7752 ; X64: ## %bb.0: 7753 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7754 ; X64-NEXT: vdivps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5e,0xd1] 7755 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 7756 ; X64-NEXT: retq ## encoding: [0xc3] 7757 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) 7758 ret <16 x float> %res 7759 } 7760 7761 7762 define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7763 ; CHECK-LABEL: test_mm512_div_round_ps_rn_sae: 7764 ; CHECK: ## %bb.0: 7765 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5e,0xc1] 7766 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 7767 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0) 7768 ret <16 x float> %res 7769 } 7770 define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7771 ; CHECK-LABEL: test_mm512_div_round_ps_rd_sae: 7772 ; CHECK: ## %bb.0: 7773 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5e,0xc1] 7774 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 7775 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1) 7776 ret <16 x float> %res 7777 } 7778 define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7779 ; CHECK-LABEL: test_mm512_div_round_ps_ru_sae: 7780 ; CHECK: ## %bb.0: 7781 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5e,0xc1] 7782 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 7783 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2) 7784 ret <16 x float> %res 7785 } 7786 7787 define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7788 ; CHECK-LABEL: test_mm512_div_round_ps_rz_sae: 7789 ; CHECK: ## %bb.0: 7790 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5e,0xc1] 7791 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 7792 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3) 7793 ret <16 x float> %res 7794 } 7795 7796 define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 7797 ; CHECK-LABEL: test_mm512_div_round_ps_current: 7798 ; CHECK: ## %bb.0: 7799 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5e,0xc1] 7800 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 7801 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) 7802 ret <16 x float> %res 7803 } 7804 declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 7805 7806 define void @test_mask_compress_store_pd_512(i8* %addr, <8 x double> %data, i8 %mask) { 7807 ; X86-LABEL: test_mask_compress_store_pd_512: 7808 ; X86: ## %bb.0: 7809 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 7810 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 7811 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 7812 ; X86-NEXT: vcompresspd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x00] 7813 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 7814 ; X86-NEXT: retl ## encoding: [0xc3] 7815 ; 7816 ; X64-LABEL: test_mask_compress_store_pd_512: 7817 ; X64: ## %bb.0: 7818 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 7819 ; X64-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x07] 7820 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 7821 ; X64-NEXT: retq ## encoding: [0xc3] 7822 call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask) 7823 ret void 7824 } 7825 7826 declare void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask) 7827 7828 define void @test_compress_store_pd_512(i8* %addr, <8 x double> %data) { 7829 ; X86-LABEL: test_compress_store_pd_512: 7830 ; X86: ## %bb.0: 7831 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 7832 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] 7833 ; X86-NEXT: vcompresspd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x00] 7834 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 7835 ; X86-NEXT: retl ## encoding: [0xc3] 7836 ; 7837 ; X64-LABEL: test_compress_store_pd_512: 7838 ; X64: ## %bb.0: 7839 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] 7840 ; X64-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x07] 7841 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 7842 ; X64-NEXT: retq ## encoding: [0xc3] 7843 call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 -1) 7844 ret void 7845 } 7846 7847 define void @test_mask_compress_store_ps_512(i8* %addr, <16 x float> %data, i16 %mask) { 7848 ; X86-LABEL: test_mask_compress_store_ps_512: 7849 ; X86: ## %bb.0: 7850 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 7851 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7852 ; X86-NEXT: vcompressps %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x00] 7853 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 7854 ; X86-NEXT: retl ## encoding: [0xc3] 7855 ; 7856 ; X64-LABEL: test_mask_compress_store_ps_512: 7857 ; X64: ## %bb.0: 7858 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 7859 ; X64-NEXT: vcompressps %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x07] 7860 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 7861 ; X64-NEXT: retq ## encoding: [0xc3] 7862 call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask) 7863 ret void 7864 } 7865 7866 declare void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask) 7867 7868 define void @test_compress_store_ps_512(i8* %addr, <16 x float> %data) { 7869 ; X86-LABEL: test_compress_store_ps_512: 7870 ; X86: ## %bb.0: 7871 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 7872 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] 7873 ; X86-NEXT: vcompressps %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x00] 7874 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 7875 ; X86-NEXT: retl ## encoding: [0xc3] 7876 ; 7877 ; X64-LABEL: test_compress_store_ps_512: 7878 ; X64: ## %bb.0: 7879 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] 7880 ; X64-NEXT: vcompressps %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x07] 7881 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 7882 ; X64-NEXT: retq ## encoding: [0xc3] 7883 call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 -1) 7884 ret void 7885 } 7886 7887 define void @test_mask_compress_store_q_512(i8* %addr, <8 x i64> %data, i8 %mask) { 7888 ; X86-LABEL: test_mask_compress_store_q_512: 7889 ; X86: ## %bb.0: 7890 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 7891 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 7892 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 7893 ; X86-NEXT: vpcompressq %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x00] 7894 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 7895 ; X86-NEXT: retl ## encoding: [0xc3] 7896 ; 7897 ; X64-LABEL: test_mask_compress_store_q_512: 7898 ; X64: ## %bb.0: 7899 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 7900 ; X64-NEXT: vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07] 7901 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 7902 ; X64-NEXT: retq ## encoding: [0xc3] 7903 call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask) 7904 ret void 7905 } 7906 7907 declare void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask) 7908 7909 define void @test_compress_store_q_512(i8* %addr, <8 x i64> %data) { 7910 ; X86-LABEL: test_compress_store_q_512: 7911 ; X86: ## %bb.0: 7912 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 7913 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] 7914 ; X86-NEXT: vpcompressq %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x00] 7915 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 7916 ; X86-NEXT: retl ## encoding: [0xc3] 7917 ; 7918 ; X64-LABEL: test_compress_store_q_512: 7919 ; X64: ## %bb.0: 7920 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] 7921 ; X64-NEXT: vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07] 7922 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 7923 ; X64-NEXT: retq ## encoding: [0xc3] 7924 call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 -1) 7925 ret void 7926 } 7927 7928 define void @test_mask_compress_store_d_512(i8* %addr, <16 x i32> %data, i16 %mask) { 7929 ; X86-LABEL: test_mask_compress_store_d_512: 7930 ; X86: ## %bb.0: 7931 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 7932 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7933 ; X86-NEXT: vpcompressd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x00] 7934 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 7935 ; X86-NEXT: retl ## encoding: [0xc3] 7936 ; 7937 ; X64-LABEL: test_mask_compress_store_d_512: 7938 ; X64: ## %bb.0: 7939 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 7940 ; X64-NEXT: vpcompressd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x07] 7941 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 7942 ; X64-NEXT: retq ## encoding: [0xc3] 7943 call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask) 7944 ret void 7945 } 7946 7947 declare void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask) 7948 7949 define void @test_compress_store_d_512(i8* %addr, <16 x i32> %data) { 7950 ; X86-LABEL: test_compress_store_d_512: 7951 ; X86: ## %bb.0: 7952 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 7953 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] 7954 ; X86-NEXT: vpcompressd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x00] 7955 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 7956 ; X86-NEXT: retl ## encoding: [0xc3] 7957 ; 7958 ; X64-LABEL: test_compress_store_d_512: 7959 ; X64: ## %bb.0: 7960 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] 7961 ; X64-NEXT: vpcompressd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x07] 7962 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] 7963 ; X64-NEXT: retq ## encoding: [0xc3] 7964 call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 -1) 7965 ret void 7966 } 7967 7968 define <8 x double> @test_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) { 7969 ; X86-LABEL: test_mask_expand_load_pd_512: 7970 ; X86: ## %bb.0: 7971 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 7972 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 7973 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 7974 ; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x00] 7975 ; X86-NEXT: retl ## encoding: [0xc3] 7976 ; 7977 ; X64-LABEL: test_mask_expand_load_pd_512: 7978 ; X64: ## %bb.0: 7979 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 7980 ; X64-NEXT: vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07] 7981 ; X64-NEXT: retq ## encoding: [0xc3] 7982 %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask) 7983 ret <8 x double> %res 7984 } 7985 7986 define <8 x double> @test_maskz_expand_load_pd_512(i8* %addr, i8 %mask) { 7987 ; X86-LABEL: test_maskz_expand_load_pd_512: 7988 ; X86: ## %bb.0: 7989 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 7990 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 7991 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 7992 ; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0x00] 7993 ; X86-NEXT: retl ## encoding: [0xc3] 7994 ; 7995 ; X64-LABEL: test_maskz_expand_load_pd_512: 7996 ; X64: ## %bb.0: 7997 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 7998 ; X64-NEXT: vexpandpd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0x07] 7999 ; X64-NEXT: retq ## encoding: [0xc3] 8000 %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> zeroinitializer, i8 %mask) 8001 ret <8 x double> %res 8002 } 8003 8004 declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask) 8005 8006 define <8 x double> @test_expand_load_pd_512(i8* %addr, <8 x double> %data) { 8007 ; X86-LABEL: test_expand_load_pd_512: 8008 ; X86: ## %bb.0: 8009 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 8010 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] 8011 ; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x00] 8012 ; X86-NEXT: retl ## encoding: [0xc3] 8013 ; 8014 ; X64-LABEL: test_expand_load_pd_512: 8015 ; X64: ## %bb.0: 8016 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] 8017 ; X64-NEXT: vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07] 8018 ; X64-NEXT: retq ## encoding: [0xc3] 8019 %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1) 8020 ret <8 x double> %res 8021 } 8022 8023 ; Make sure we don't crash if you pass 0 to the mask. 8024 define <8 x double> @test_zero_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) { 8025 ; X86-LABEL: test_zero_mask_expand_load_pd_512: 8026 ; X86: ## %bb.0: 8027 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 8028 ; X86-NEXT: kxorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x47,0xc8] 8029 ; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x00] 8030 ; X86-NEXT: retl ## encoding: [0xc3] 8031 ; 8032 ; X64-LABEL: test_zero_mask_expand_load_pd_512: 8033 ; X64: ## %bb.0: 8034 ; X64-NEXT: kxorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x47,0xc8] 8035 ; X64-NEXT: vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07] 8036 ; X64-NEXT: retq ## encoding: [0xc3] 8037 %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 0) 8038 ret <8 x double> %res 8039 } 8040 8041 define <16 x float> @test_mask_expand_load_ps_512(i8* %addr, <16 x float> %data, i16 %mask) { 8042 ; X86-LABEL: test_mask_expand_load_ps_512: 8043 ; X86: ## %bb.0: 8044 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 8045 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8046 ; X86-NEXT: vexpandps (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x00] 8047 ; X86-NEXT: retl ## encoding: [0xc3] 8048 ; 8049 ; X64-LABEL: test_mask_expand_load_ps_512: 8050 ; X64: ## %bb.0: 8051 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 8052 ; X64-NEXT: vexpandps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x07] 8053 ; X64-NEXT: retq ## encoding: [0xc3] 8054 %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask) 8055 ret <16 x float> %res 8056 } 8057 8058 define <16 x float> @test_maskz_expand_load_ps_512(i8* %addr, i16 %mask) { 8059 ; X86-LABEL: test_maskz_expand_load_ps_512: 8060 ; X86: ## %bb.0: 8061 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 8062 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8063 ; X86-NEXT: vexpandps (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0x00] 8064 ; X86-NEXT: retl ## encoding: [0xc3] 8065 ; 8066 ; X64-LABEL: test_maskz_expand_load_ps_512: 8067 ; X64: ## %bb.0: 8068 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 8069 ; X64-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0x07] 8070 ; X64-NEXT: retq ## encoding: [0xc3] 8071 %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> zeroinitializer, i16 %mask) 8072 ret <16 x float> %res 8073 } 8074 8075 declare <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask) 8076 8077 define <16 x float> @test_expand_load_ps_512(i8* %addr, <16 x float> %data) { 8078 ; X86-LABEL: test_expand_load_ps_512: 8079 ; X86: ## %bb.0: 8080 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 8081 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] 8082 ; X86-NEXT: vexpandps (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x00] 8083 ; X86-NEXT: retl ## encoding: [0xc3] 8084 ; 8085 ; X64-LABEL: test_expand_load_ps_512: 8086 ; X64: ## %bb.0: 8087 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] 8088 ; X64-NEXT: vexpandps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x07] 8089 ; X64-NEXT: retq ## encoding: [0xc3] 8090 %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 -1) 8091 ret <16 x float> %res 8092 } 8093 8094 define <8 x i64> @test_mask_expand_load_q_512(i8* %addr, <8 x i64> %data, i8 %mask) { 8095 ; X86-LABEL: test_mask_expand_load_q_512: 8096 ; X86: ## %bb.0: 8097 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 8098 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 8099 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 8100 ; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x00] 8101 ; X86-NEXT: retl ## encoding: [0xc3] 8102 ; 8103 ; X64-LABEL: test_mask_expand_load_q_512: 8104 ; X64: ## %bb.0: 8105 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 8106 ; X64-NEXT: vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07] 8107 ; X64-NEXT: retq ## encoding: [0xc3] 8108 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask) 8109 ret <8 x i64> %res 8110 } 8111 8112 define <8 x i64> @test_maskz_expand_load_q_512(i8* %addr, i8 %mask) { 8113 ; X86-LABEL: test_maskz_expand_load_q_512: 8114 ; X86: ## %bb.0: 8115 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 8116 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] 8117 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 8118 ; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0x00] 8119 ; X86-NEXT: retl ## encoding: [0xc3] 8120 ; 8121 ; X64-LABEL: test_maskz_expand_load_q_512: 8122 ; X64: ## %bb.0: 8123 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 8124 ; X64-NEXT: vpexpandq (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0x07] 8125 ; X64-NEXT: retq ## encoding: [0xc3] 8126 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> zeroinitializer, i8 %mask) 8127 ret <8 x i64> %res 8128 } 8129 8130 declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask) 8131 8132 define <8 x i64> @test_expand_load_q_512(i8* %addr, <8 x i64> %data) { 8133 ; X86-LABEL: test_expand_load_q_512: 8134 ; X86: ## %bb.0: 8135 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 8136 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] 8137 ; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x00] 8138 ; X86-NEXT: retl ## encoding: [0xc3] 8139 ; 8140 ; X64-LABEL: test_expand_load_q_512: 8141 ; X64: ## %bb.0: 8142 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] 8143 ; X64-NEXT: vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07] 8144 ; X64-NEXT: retq ## encoding: [0xc3] 8145 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 -1) 8146 ret <8 x i64> %res 8147 } 8148 8149 define <16 x i32> @test_mask_expand_load_d_512(i8* %addr, <16 x i32> %data, i16 %mask) { 8150 ; X86-LABEL: test_mask_expand_load_d_512: 8151 ; X86: ## %bb.0: 8152 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 8153 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8154 ; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x00] 8155 ; X86-NEXT: retl ## encoding: [0xc3] 8156 ; 8157 ; X64-LABEL: test_mask_expand_load_d_512: 8158 ; X64: ## %bb.0: 8159 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 8160 ; X64-NEXT: vpexpandd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x07] 8161 ; X64-NEXT: retq ## encoding: [0xc3] 8162 %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask) 8163 ret <16 x i32> %res 8164 } 8165 8166 define <16 x i32> @test_maskz_expand_load_d_512(i8* %addr, i16 %mask) { 8167 ; X86-LABEL: test_maskz_expand_load_d_512: 8168 ; X86: ## %bb.0: 8169 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 8170 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8171 ; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0x00] 8172 ; X86-NEXT: retl ## encoding: [0xc3] 8173 ; 8174 ; X64-LABEL: test_maskz_expand_load_d_512: 8175 ; X64: ## %bb.0: 8176 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 8177 ; X64-NEXT: vpexpandd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0x07] 8178 ; X64-NEXT: retq ## encoding: [0xc3] 8179 %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> zeroinitializer, i16 %mask) 8180 ret <16 x i32> %res 8181 } 8182 8183 declare <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask) 8184 8185 define <16 x i32> @test_expand_load_d_512(i8* %addr, <16 x i32> %data) { 8186 ; X86-LABEL: test_expand_load_d_512: 8187 ; X86: ## %bb.0: 8188 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 8189 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] 8190 ; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x00] 8191 ; X86-NEXT: retl ## encoding: [0xc3] 8192 ; 8193 ; X64-LABEL: test_expand_load_d_512: 8194 ; X64: ## %bb.0: 8195 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] 8196 ; X64-NEXT: vpexpandd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x07] 8197 ; X64-NEXT: retq ## encoding: [0xc3] 8198 %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 -1) 8199 ret <16 x i32> %res 8200 } 8201 8202 define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 8203 ; X86-LABEL: test_mm512_maskz_min_round_ps_sae: 8204 ; X86: ## %bb.0: 8205 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8206 ; X86-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5d,0xc1] 8207 ; X86-NEXT: retl ## encoding: [0xc3] 8208 ; 8209 ; X64-LABEL: test_mm512_maskz_min_round_ps_sae: 8210 ; X64: ## %bb.0: 8211 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8212 ; X64-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5d,0xc1] 8213 ; X64-NEXT: retq ## encoding: [0xc3] 8214 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8) 8215 ret <16 x float> %res 8216 } 8217 8218 define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 8219 ; X86-LABEL: test_mm512_maskz_min_round_ps_current: 8220 ; X86: ## %bb.0: 8221 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8222 ; X86-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5d,0xc1] 8223 ; X86-NEXT: retl ## encoding: [0xc3] 8224 ; 8225 ; X64-LABEL: test_mm512_maskz_min_round_ps_current: 8226 ; X64: ## %bb.0: 8227 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8228 ; X64-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5d,0xc1] 8229 ; X64-NEXT: retq ## encoding: [0xc3] 8230 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4) 8231 ret <16 x float> %res 8232 } 8233 8234 define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 8235 ; X86-LABEL: test_mm512_mask_min_round_ps_sae: 8236 ; X86: ## %bb.0: 8237 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8238 ; X86-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5d,0xd1] 8239 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 8240 ; X86-NEXT: retl ## encoding: [0xc3] 8241 ; 8242 ; X64-LABEL: test_mm512_mask_min_round_ps_sae: 8243 ; X64: ## %bb.0: 8244 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8245 ; X64-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5d,0xd1] 8246 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 8247 ; X64-NEXT: retq ## encoding: [0xc3] 8248 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8) 8249 ret <16 x float> %res 8250 } 8251 8252 define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 8253 ; X86-LABEL: test_mm512_mask_min_round_ps_current: 8254 ; X86: ## %bb.0: 8255 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8256 ; X86-NEXT: vminps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5d,0xd1] 8257 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 8258 ; X86-NEXT: retl ## encoding: [0xc3] 8259 ; 8260 ; X64-LABEL: test_mm512_mask_min_round_ps_current: 8261 ; X64: ## %bb.0: 8262 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8263 ; X64-NEXT: vminps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5d,0xd1] 8264 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 8265 ; X64-NEXT: retq ## encoding: [0xc3] 8266 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) 8267 ret <16 x float> %res 8268 } 8269 8270 define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 8271 ; CHECK-LABEL: test_mm512_min_round_ps_sae: 8272 ; CHECK: ## %bb.0: 8273 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5d,0xc1] 8274 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 8275 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8) 8276 ret <16 x float> %res 8277 } 8278 8279 define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 8280 ; CHECK-LABEL: test_mm512_min_round_ps_current: 8281 ; CHECK: ## %bb.0: 8282 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5d,0xc1] 8283 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 8284 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) 8285 ret <16 x float> %res 8286 } 8287 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 8288 8289 define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 8290 ; X86-LABEL: test_mm512_maskz_max_round_ps_sae: 8291 ; X86: ## %bb.0: 8292 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8293 ; X86-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5f,0xc1] 8294 ; X86-NEXT: retl ## encoding: [0xc3] 8295 ; 8296 ; X64-LABEL: test_mm512_maskz_max_round_ps_sae: 8297 ; X64: ## %bb.0: 8298 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8299 ; X64-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5f,0xc1] 8300 ; X64-NEXT: retq ## encoding: [0xc3] 8301 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8) 8302 ret <16 x float> %res 8303 } 8304 8305 define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 8306 ; X86-LABEL: test_mm512_maskz_max_round_ps_current: 8307 ; X86: ## %bb.0: 8308 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8309 ; X86-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5f,0xc1] 8310 ; X86-NEXT: retl ## encoding: [0xc3] 8311 ; 8312 ; X64-LABEL: test_mm512_maskz_max_round_ps_current: 8313 ; X64: ## %bb.0: 8314 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8315 ; X64-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5f,0xc1] 8316 ; X64-NEXT: retq ## encoding: [0xc3] 8317 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4) 8318 ret <16 x float> %res 8319 } 8320 8321 define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 8322 ; X86-LABEL: test_mm512_mask_max_round_ps_sae: 8323 ; X86: ## %bb.0: 8324 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8325 ; X86-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5f,0xd1] 8326 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 8327 ; X86-NEXT: retl ## encoding: [0xc3] 8328 ; 8329 ; X64-LABEL: test_mm512_mask_max_round_ps_sae: 8330 ; X64: ## %bb.0: 8331 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8332 ; X64-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5f,0xd1] 8333 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 8334 ; X64-NEXT: retq ## encoding: [0xc3] 8335 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8) 8336 ret <16 x float> %res 8337 } 8338 8339 define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 8340 ; X86-LABEL: test_mm512_mask_max_round_ps_current: 8341 ; X86: ## %bb.0: 8342 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8343 ; X86-NEXT: vmaxps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5f,0xd1] 8344 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 8345 ; X86-NEXT: retl ## encoding: [0xc3] 8346 ; 8347 ; X64-LABEL: test_mm512_mask_max_round_ps_current: 8348 ; X64: ## %bb.0: 8349 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8350 ; X64-NEXT: vmaxps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5f,0xd1] 8351 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 8352 ; X64-NEXT: retq ## encoding: [0xc3] 8353 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) 8354 ret <16 x float> %res 8355 } 8356 8357 define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 8358 ; CHECK-LABEL: test_mm512_max_round_ps_sae: 8359 ; CHECK: ## %bb.0: 8360 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5f,0xc1] 8361 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 8362 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8) 8363 ret <16 x float> %res 8364 } 8365 8366 define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 8367 ; CHECK-LABEL: test_mm512_max_round_ps_current: 8368 ; CHECK: ## %bb.0: 8369 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5f,0xc1] 8370 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 8371 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) 8372 ret <16 x float> %res 8373 } 8374 declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 8375 8376 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) { 8377 ; CHECK-LABEL: test_sqrt_pd_512: 8378 ; CHECK: ## %bb.0: 8379 ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x51,0xc0] 8380 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 8381 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> undef, i8 -1, i32 4) 8382 ret <8 x double> %res 8383 } 8384 define <8 x double> @test_mask_sqrt_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { 8385 ; X86-LABEL: test_mask_sqrt_pd_512: 8386 ; X86: ## %bb.0: 8387 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 8388 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 8389 ; X86-NEXT: vsqrtpd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x51,0xc8] 8390 ; X86-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1] 8391 ; X86-NEXT: retl ## encoding: [0xc3] 8392 ; 8393 ; X64-LABEL: test_mask_sqrt_pd_512: 8394 ; X64: ## %bb.0: 8395 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8396 ; X64-NEXT: vsqrtpd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x51,0xc8] 8397 ; X64-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1] 8398 ; X64-NEXT: retq ## encoding: [0xc3] 8399 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> %passthru, i8 %mask, i32 4) 8400 ret <8 x double> %res 8401 } 8402 define <8 x double> @test_maskz_sqrt_pd_512(<8 x double> %a0, i8 %mask) { 8403 ; X86-LABEL: test_maskz_sqrt_pd_512: 8404 ; X86: ## %bb.0: 8405 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 8406 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 8407 ; X86-NEXT: vsqrtpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x51,0xc0] 8408 ; X86-NEXT: retl ## encoding: [0xc3] 8409 ; 8410 ; X64-LABEL: test_maskz_sqrt_pd_512: 8411 ; X64: ## %bb.0: 8412 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8413 ; X64-NEXT: vsqrtpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x51,0xc0] 8414 ; X64-NEXT: retq ## encoding: [0xc3] 8415 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 %mask, i32 4) 8416 ret <8 x double> %res 8417 } 8418 define <8 x double> @test_sqrt_round_pd_512(<8 x double> %a0) { 8419 ; CHECK-LABEL: test_sqrt_round_pd_512: 8420 ; CHECK: ## %bb.0: 8421 ; CHECK-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x78,0x51,0xc0] 8422 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 8423 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> undef, i8 -1, i32 11) 8424 ret <8 x double> %res 8425 } 8426 define <8 x double> @test_mask_sqrt_round_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { 8427 ; X86-LABEL: test_mask_sqrt_round_pd_512: 8428 ; X86: ## %bb.0: 8429 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 8430 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 8431 ; X86-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x79,0x51,0xc8] 8432 ; X86-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1] 8433 ; X86-NEXT: retl ## encoding: [0xc3] 8434 ; 8435 ; X64-LABEL: test_mask_sqrt_round_pd_512: 8436 ; X64: ## %bb.0: 8437 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8438 ; X64-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x79,0x51,0xc8] 8439 ; X64-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1] 8440 ; X64-NEXT: retq ## encoding: [0xc3] 8441 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> %passthru, i8 %mask, i32 11) 8442 ret <8 x double> %res 8443 } 8444 define <8 x double> @test_maskz_sqrt_round_pd_512(<8 x double> %a0, i8 %mask) { 8445 ; X86-LABEL: test_maskz_sqrt_round_pd_512: 8446 ; X86: ## %bb.0: 8447 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 8448 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 8449 ; X86-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x51,0xc0] 8450 ; X86-NEXT: retl ## encoding: [0xc3] 8451 ; 8452 ; X64-LABEL: test_maskz_sqrt_round_pd_512: 8453 ; X64: ## %bb.0: 8454 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8455 ; X64-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x51,0xc0] 8456 ; X64-NEXT: retq ## encoding: [0xc3] 8457 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 %mask, i32 11) 8458 ret <8 x double> %res 8459 } 8460 declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone 8461 8462 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) { 8463 ; CHECK-LABEL: test_sqrt_ps_512: 8464 ; CHECK: ## %bb.0: 8465 ; CHECK-NEXT: vsqrtps %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x51,0xc0] 8466 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 8467 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> undef, i16 -1, i32 4) 8468 ret <16 x float> %res 8469 } 8470 define <16 x float> @test_mask_sqrt_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { 8471 ; X86-LABEL: test_mask_sqrt_ps_512: 8472 ; X86: ## %bb.0: 8473 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8474 ; X86-NEXT: vsqrtps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x51,0xc8] 8475 ; X86-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 8476 ; X86-NEXT: retl ## encoding: [0xc3] 8477 ; 8478 ; X64-LABEL: test_mask_sqrt_ps_512: 8479 ; X64: ## %bb.0: 8480 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8481 ; X64-NEXT: vsqrtps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x51,0xc8] 8482 ; X64-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 8483 ; X64-NEXT: retq ## encoding: [0xc3] 8484 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> %passthru, i16 %mask, i32 4) 8485 ret <16 x float> %res 8486 } 8487 define <16 x float> @test_maskz_sqrt_ps_512(<16 x float> %a0, i16 %mask) { 8488 ; X86-LABEL: test_maskz_sqrt_ps_512: 8489 ; X86: ## %bb.0: 8490 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8491 ; X86-NEXT: vsqrtps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x51,0xc0] 8492 ; X86-NEXT: retl ## encoding: [0xc3] 8493 ; 8494 ; X64-LABEL: test_maskz_sqrt_ps_512: 8495 ; X64: ## %bb.0: 8496 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8497 ; X64-NEXT: vsqrtps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x51,0xc0] 8498 ; X64-NEXT: retq ## encoding: [0xc3] 8499 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4) 8500 ret <16 x float> %res 8501 } 8502 define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) { 8503 ; CHECK-LABEL: test_sqrt_round_ps_512: 8504 ; CHECK: ## %bb.0: 8505 ; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x51,0xc0] 8506 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 8507 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 11) 8508 ret <16 x float> %res 8509 } 8510 define <16 x float> @test_mask_sqrt_round_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { 8511 ; X86-LABEL: test_mask_sqrt_round_ps_512: 8512 ; X86: ## %bb.0: 8513 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8514 ; X86-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x51,0xc8] 8515 ; X86-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 8516 ; X86-NEXT: retl ## encoding: [0xc3] 8517 ; 8518 ; X64-LABEL: test_mask_sqrt_round_ps_512: 8519 ; X64: ## %bb.0: 8520 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8521 ; X64-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x51,0xc8] 8522 ; X64-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 8523 ; X64-NEXT: retq ## encoding: [0xc3] 8524 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> %passthru, i16 %mask, i32 11) 8525 ret <16 x float> %res 8526 } 8527 define <16 x float> @test_maskz_sqrt_round_ps_512(<16 x float> %a0, i16 %mask) { 8528 ; X86-LABEL: test_maskz_sqrt_round_ps_512: 8529 ; X86: ## %bb.0: 8530 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8531 ; X86-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x51,0xc0] 8532 ; X86-NEXT: retl ## encoding: [0xc3] 8533 ; 8534 ; X64-LABEL: test_maskz_sqrt_round_ps_512: 8535 ; X64: ## %bb.0: 8536 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8537 ; X64-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x51,0xc0] 8538 ; X64-NEXT: retq ## encoding: [0xc3] 8539 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 %mask, i32 11) 8540 ret <16 x float> %res 8541 } 8542 declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone 8543 8544 declare <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 8545 8546 define <16 x i32>@test_int_x86_avx512_mask_prolv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 8547 ; X86-LABEL: test_int_x86_avx512_mask_prolv_d_512: 8548 ; X86: ## %bb.0: 8549 ; X86-NEXT: vprolvd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x15,0xd9] 8550 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8551 ; X86-NEXT: vprolvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x15,0xd1] 8552 ; X86-NEXT: vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x15,0xc1] 8553 ; X86-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3] 8554 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 8555 ; X86-NEXT: retl ## encoding: [0xc3] 8556 ; 8557 ; X64-LABEL: test_int_x86_avx512_mask_prolv_d_512: 8558 ; X64: ## %bb.0: 8559 ; X64-NEXT: vprolvd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x15,0xd9] 8560 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8561 ; X64-NEXT: vprolvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x15,0xd1] 8562 ; X64-NEXT: vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x15,0xc1] 8563 ; X64-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3] 8564 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 8565 ; X64-NEXT: retq ## encoding: [0xc3] 8566 %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 8567 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3) 8568 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 8569 %res3 = add <16 x i32> %res, %res1 8570 %res4 = add <16 x i32> %res3, %res2 8571 ret <16 x i32> %res4 8572 } 8573 8574 declare <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 8575 8576 define <8 x i64>@test_int_x86_avx512_mask_prolv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 8577 ; X86-LABEL: test_int_x86_avx512_mask_prolv_q_512: 8578 ; X86: ## %bb.0: 8579 ; X86-NEXT: vprolvq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x15,0xd9] 8580 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 8581 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 8582 ; X86-NEXT: vprolvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x15,0xd1] 8583 ; X86-NEXT: vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x15,0xc1] 8584 ; X86-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3] 8585 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 8586 ; X86-NEXT: retl ## encoding: [0xc3] 8587 ; 8588 ; X64-LABEL: test_int_x86_avx512_mask_prolv_q_512: 8589 ; X64: ## %bb.0: 8590 ; X64-NEXT: vprolvq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x15,0xd9] 8591 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8592 ; X64-NEXT: vprolvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x15,0xd1] 8593 ; X64-NEXT: vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x15,0xc1] 8594 ; X64-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3] 8595 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 8596 ; X64-NEXT: retq ## encoding: [0xc3] 8597 %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 8598 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3) 8599 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 8600 %res3 = add <8 x i64> %res, %res1 8601 %res4 = add <8 x i64> %res3, %res2 8602 ret <8 x i64> %res4 8603 } 8604 8605 declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 8606 8607 define <16 x i32>@test_int_x86_avx512_mask_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 8608 ; X86-LABEL: test_int_x86_avx512_mask_prorv_d_512: 8609 ; X86: ## %bb.0: 8610 ; X86-NEXT: vprorvd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x14,0xd9] 8611 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8612 ; X86-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x14,0xd1] 8613 ; X86-NEXT: vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x14,0xc1] 8614 ; X86-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3] 8615 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 8616 ; X86-NEXT: retl ## encoding: [0xc3] 8617 ; 8618 ; X64-LABEL: test_int_x86_avx512_mask_prorv_d_512: 8619 ; X64: ## %bb.0: 8620 ; X64-NEXT: vprorvd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x14,0xd9] 8621 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8622 ; X64-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x14,0xd1] 8623 ; X64-NEXT: vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x14,0xc1] 8624 ; X64-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3] 8625 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 8626 ; X64-NEXT: retq ## encoding: [0xc3] 8627 %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 8628 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3) 8629 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 8630 %res3 = add <16 x i32> %res, %res1 8631 %res4 = add <16 x i32> %res3, %res2 8632 ret <16 x i32> %res4 8633 } 8634 8635 declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 8636 8637 define <8 x i64>@test_int_x86_avx512_mask_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 8638 ; X86-LABEL: test_int_x86_avx512_mask_prorv_q_512: 8639 ; X86: ## %bb.0: 8640 ; X86-NEXT: vprorvq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x14,0xd9] 8641 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] 8642 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 8643 ; X86-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x14,0xd1] 8644 ; X86-NEXT: vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x14,0xc1] 8645 ; X86-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3] 8646 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 8647 ; X86-NEXT: retl ## encoding: [0xc3] 8648 ; 8649 ; X64-LABEL: test_int_x86_avx512_mask_prorv_q_512: 8650 ; X64: ## %bb.0: 8651 ; X64-NEXT: vprorvq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x14,0xd9] 8652 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8653 ; X64-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x14,0xd1] 8654 ; X64-NEXT: vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x14,0xc1] 8655 ; X64-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3] 8656 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 8657 ; X64-NEXT: retq ## encoding: [0xc3] 8658 %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 8659 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3) 8660 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 8661 %res3 = add <8 x i64> %res, %res1 8662 %res4 = add <8 x i64> %res3, %res2 8663 ret <8 x i64> %res4 8664 } 8665 8666 declare <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32>, i32, <16 x i32>, i16) 8667 8668 define <16 x i32>@test_int_x86_avx512_mask_prol_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { 8669 ; X86-LABEL: test_int_x86_avx512_mask_prol_d_512: 8670 ; X86: ## %bb.0: 8671 ; X86-NEXT: vprold $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc8,0x03] 8672 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8673 ; X86-NEXT: vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03] 8674 ; X86-NEXT: vprold $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xc8,0x03] 8675 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 8676 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 8677 ; X86-NEXT: retl ## encoding: [0xc3] 8678 ; 8679 ; X64-LABEL: test_int_x86_avx512_mask_prol_d_512: 8680 ; X64: ## %bb.0: 8681 ; X64-NEXT: vprold $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc8,0x03] 8682 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 8683 ; X64-NEXT: vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03] 8684 ; X64-NEXT: vprold $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xc8,0x03] 8685 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 8686 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 8687 ; X64-NEXT: retq ## encoding: [0xc3] 8688 %res = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3) 8689 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3) 8690 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1) 8691 %res3 = add <16 x i32> %res, %res1 8692 %res4 = add <16 x i32> %res3, %res2 8693 ret <16 x i32> %res4 8694 } 8695 8696 declare <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64>, i32, <8 x i64>, i8) 8697 8698 define <8 x i64>@test_int_x86_avx512_mask_prol_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) { 8699 ; X86-LABEL: test_int_x86_avx512_mask_prol_q_512: 8700 ; X86: ## %bb.0: 8701 ; X86-NEXT: vprolq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc8,0x03] 8702 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] 8703 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 8704 ; X86-NEXT: vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03] 8705 ; X86-NEXT: vprolq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xc8,0x03] 8706 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 8707 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 8708 ; X86-NEXT: retl ## encoding: [0xc3] 8709 ; 8710 ; X64-LABEL: test_int_x86_avx512_mask_prol_q_512: 8711 ; X64: ## %bb.0: 8712 ; X64-NEXT: vprolq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc8,0x03] 8713 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 8714 ; X64-NEXT: vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03] 8715 ; X64-NEXT: vprolq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xc8,0x03] 8716 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 8717 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 8718 ; X64-NEXT: retq ## encoding: [0xc3] 8719 %res = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3) 8720 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3) 8721 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1) 8722 %res3 = add <8 x i64> %res, %res1 8723 %res4 = add <8 x i64> %res3, %res2 8724 ret <8 x i64> %res4 8725 } 8726 8727 declare <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32>, i32, <16 x i32>, i16) 8728 8729 define <16 x i32>@test_int_x86_avx512_mask_pror_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { 8730 ; X86-LABEL: test_int_x86_avx512_mask_pror_d_512: 8731 ; X86: ## %bb.0: 8732 ; X86-NEXT: vprord $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc0,0x03] 8733 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8734 ; X86-NEXT: vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03] 8735 ; X86-NEXT: vprord $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xc0,0x03] 8736 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 8737 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 8738 ; X86-NEXT: retl ## encoding: [0xc3] 8739 ; 8740 ; X64-LABEL: test_int_x86_avx512_mask_pror_d_512: 8741 ; X64: ## %bb.0: 8742 ; X64-NEXT: vprord $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc0,0x03] 8743 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 8744 ; X64-NEXT: vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03] 8745 ; X64-NEXT: vprord $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xc0,0x03] 8746 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2] 8747 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 8748 ; X64-NEXT: retq ## encoding: [0xc3] 8749 %res = call <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3) 8750 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3) 8751 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1) 8752 %res3 = add <16 x i32> %res, %res1 8753 %res4 = add <16 x i32> %res3, %res2 8754 ret <16 x i32> %res4 8755 } 8756 8757 declare <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64>, i32, <8 x i64>, i8) 8758 8759 define <8 x i64>@test_int_x86_avx512_mask_pror_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) { 8760 ; X86-LABEL: test_int_x86_avx512_mask_pror_q_512: 8761 ; X86: ## %bb.0: 8762 ; X86-NEXT: vprorq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc0,0x03] 8763 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] 8764 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 8765 ; X86-NEXT: vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03] 8766 ; X86-NEXT: vprorq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xc0,0x03] 8767 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 8768 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 8769 ; X86-NEXT: retl ## encoding: [0xc3] 8770 ; 8771 ; X64-LABEL: test_int_x86_avx512_mask_pror_q_512: 8772 ; X64: ## %bb.0: 8773 ; X64-NEXT: vprorq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc0,0x03] 8774 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 8775 ; X64-NEXT: vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03] 8776 ; X64-NEXT: vprorq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xc0,0x03] 8777 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2] 8778 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 8779 ; X64-NEXT: retq ## encoding: [0xc3] 8780 %res = call <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3) 8781 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3) 8782 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1) 8783 %res3 = add <8 x i64> %res, %res1 8784 %res4 = add <8 x i64> %res3, %res2 8785 ret <8 x i64> %res4 8786 } 8787 8788 declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) 8789 8790 define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ 8791 ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_sd: 8792 ; X86: ## %bb.0: 8793 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 8794 ; X86-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8] 8795 ; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xda] 8796 ; X86-NEXT: ## xmm3 = (xmm1 * xmm3) + xmm2 8797 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 8798 ; X86-NEXT: vmovapd %xmm0, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe0] 8799 ; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xe2] 8800 ; X86-NEXT: ## xmm4 = (xmm1 * xmm4) + xmm2 8801 ; X86-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc] 8802 ; X86-NEXT: vmovapd %xmm0, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe0] 8803 ; X86-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf2,0xf5,0x78,0xa9,0xe2] 8804 ; X86-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xa9,0xc2] 8805 ; X86-NEXT: vaddpd %xmm0, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc0] 8806 ; X86-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0] 8807 ; X86-NEXT: retl ## encoding: [0xc3] 8808 ; 8809 ; X64-LABEL: test_int_x86_avx512_mask_vfmadd_sd: 8810 ; X64: ## %bb.0: 8811 ; X64-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8] 8812 ; X64-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xda] 8813 ; X64-NEXT: ## xmm3 = (xmm1 * xmm3) + xmm2 8814 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8815 ; X64-NEXT: vmovapd %xmm0, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe0] 8816 ; X64-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xe2] 8817 ; X64-NEXT: ## xmm4 = (xmm1 * xmm4) + xmm2 8818 ; X64-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc] 8819 ; X64-NEXT: vmovapd %xmm0, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe0] 8820 ; X64-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf2,0xf5,0x78,0xa9,0xe2] 8821 ; X64-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xa9,0xc2] 8822 ; X64-NEXT: vaddpd %xmm0, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc0] 8823 ; X64-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0] 8824 ; X64-NEXT: retq ## encoding: [0xc3] 8825 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) 8826 %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4) 8827 %res2 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3) 8828 %res3 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3) 8829 %res4 = fadd <2 x double> %res, %res1 8830 %res5 = fadd <2 x double> %res2, %res3 8831 %res6 = fadd <2 x double> %res4, %res5 8832 ret <2 x double> %res6 8833 } 8834 8835 declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) 8836 8837 define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ 8838 ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss: 8839 ; X86: ## %bb.0: 8840 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 8841 ; X86-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xd8] 8842 ; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xda] 8843 ; X86-NEXT: ## xmm3 = (xmm1 * xmm3) + xmm2 8844 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 8845 ; X86-NEXT: vmovaps %xmm0, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe0] 8846 ; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xe2] 8847 ; X86-NEXT: ## xmm4 = (xmm1 * xmm4) + xmm2 8848 ; X86-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc] 8849 ; X86-NEXT: vmovaps %xmm0, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe0] 8850 ; X86-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf2,0x75,0x78,0xa9,0xe2] 8851 ; X86-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xa9,0xc2] 8852 ; X86-NEXT: vaddps %xmm0, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc0] 8853 ; X86-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0] 8854 ; X86-NEXT: retl ## encoding: [0xc3] 8855 ; 8856 ; X64-LABEL: test_int_x86_avx512_mask_vfmadd_ss: 8857 ; X64: ## %bb.0: 8858 ; X64-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xd8] 8859 ; X64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xda] 8860 ; X64-NEXT: ## xmm3 = (xmm1 * xmm3) + xmm2 8861 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8862 ; X64-NEXT: vmovaps %xmm0, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe0] 8863 ; X64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xe2] 8864 ; X64-NEXT: ## xmm4 = (xmm1 * xmm4) + xmm2 8865 ; X64-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc] 8866 ; X64-NEXT: vmovaps %xmm0, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe0] 8867 ; X64-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf2,0x75,0x78,0xa9,0xe2] 8868 ; X64-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xa9,0xc2] 8869 ; X64-NEXT: vaddps %xmm0, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc0] 8870 ; X64-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0] 8871 ; X64-NEXT: retq ## encoding: [0xc3] 8872 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) 8873 %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4) 8874 %res2 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3) 8875 %res3 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3) 8876 %res4 = fadd <4 x float> %res, %res1 8877 %res5 = fadd <4 x float> %res2, %res3 8878 %res6 = fadd <4 x float> %res4, %res5 8879 ret <4 x float> %res6 8880 } 8881 8882 declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) 8883 8884 define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ 8885 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_sd: 8886 ; X86: ## %bb.0: 8887 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 8888 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 8889 ; X86-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8] 8890 ; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xda] 8891 ; X86-NEXT: ## xmm3 = (xmm1 * xmm3) + xmm2 8892 ; X86-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xf9,0xa9,0xc2] 8893 ; X86-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0] 8894 ; X86-NEXT: retl ## encoding: [0xc3] 8895 ; 8896 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_sd: 8897 ; X64: ## %bb.0: 8898 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8899 ; X64-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8] 8900 ; X64-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xda] 8901 ; X64-NEXT: ## xmm3 = (xmm1 * xmm3) + xmm2 8902 ; X64-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xf9,0xa9,0xc2] 8903 ; X64-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0] 8904 ; X64-NEXT: retq ## encoding: [0xc3] 8905 %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4) 8906 %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3) 8907 %res2 = fadd <2 x double> %res, %res1 8908 ret <2 x double> %res2 8909 } 8910 8911 declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) 8912 8913 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ 8914 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ss: 8915 ; X86: ## %bb.0: 8916 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 8917 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 8918 ; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2] 8919 ; X86-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 8920 ; X86-NEXT: retl ## encoding: [0xc3] 8921 ; 8922 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ss: 8923 ; X64: ## %bb.0: 8924 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8925 ; X64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2] 8926 ; X64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 8927 ; X64-NEXT: retq ## encoding: [0xc3] 8928 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4) 8929 %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3) 8930 %res2 = fadd <4 x float> %res, %res1 8931 ret <4 x float> %res 8932 } 8933 declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) 8934 8935 define <2 x double>@test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ 8936 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_sd: 8937 ; X86: ## %bb.0: 8938 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 8939 ; X86-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda] 8940 ; X86-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0xd9] 8941 ; X86-NEXT: ## xmm3 = (xmm0 * xmm1) + xmm3 8942 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 8943 ; X86-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2] 8944 ; X86-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xe1] 8945 ; X86-NEXT: ## xmm4 = (xmm0 * xmm1) + xmm4 8946 ; X86-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc] 8947 ; X86-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2] 8948 ; X86-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xb9,0xe1] 8949 ; X86-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x79,0xb9,0xd1] 8950 ; X86-NEXT: vaddpd %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc2] 8951 ; X86-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0] 8952 ; X86-NEXT: retl ## encoding: [0xc3] 8953 ; 8954 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_sd: 8955 ; X64: ## %bb.0: 8956 ; X64-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda] 8957 ; X64-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0xd9] 8958 ; X64-NEXT: ## xmm3 = (xmm0 * xmm1) + xmm3 8959 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 8960 ; X64-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2] 8961 ; X64-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xe1] 8962 ; X64-NEXT: ## xmm4 = (xmm0 * xmm1) + xmm4 8963 ; X64-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc] 8964 ; X64-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2] 8965 ; X64-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xb9,0xe1] 8966 ; X64-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x79,0xb9,0xd1] 8967 ; X64-NEXT: vaddpd %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc2] 8968 ; X64-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0] 8969 ; X64-NEXT: retq ## encoding: [0xc3] 8970 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) 8971 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4) 8972 %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3) 8973 %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3) 8974 %res4 = fadd <2 x double> %res, %res1 8975 %res5 = fadd <2 x double> %res2, %res3 8976 %res6 = fadd <2 x double> %res4, %res5 8977 ret <2 x double> %res6 8978 } 8979 8980 declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) 8981 8982 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ 8983 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss: 8984 ; X86: ## %bb.0: 8985 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 8986 ; X86-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda] 8987 ; X86-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0xd9] 8988 ; X86-NEXT: ## xmm3 = (xmm0 * xmm1) + xmm3 8989 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 8990 ; X86-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2] 8991 ; X86-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xe1] 8992 ; X86-NEXT: ## xmm4 = (xmm0 * xmm1) + xmm4 8993 ; X86-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc] 8994 ; X86-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2] 8995 ; X86-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xb9,0xe1] 8996 ; X86-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x79,0xb9,0xd1] 8997 ; X86-NEXT: vaddps %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc2] 8998 ; X86-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0] 8999 ; X86-NEXT: retl ## encoding: [0xc3] 9000 ; 9001 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ss: 9002 ; X64: ## %bb.0: 9003 ; X64-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda] 9004 ; X64-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0xd9] 9005 ; X64-NEXT: ## xmm3 = (xmm0 * xmm1) + xmm3 9006 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 9007 ; X64-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2] 9008 ; X64-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xe1] 9009 ; X64-NEXT: ## xmm4 = (xmm0 * xmm1) + xmm4 9010 ; X64-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc] 9011 ; X64-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2] 9012 ; X64-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xb9,0xe1] 9013 ; X64-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x79,0xb9,0xd1] 9014 ; X64-NEXT: vaddps %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc2] 9015 ; X64-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0] 9016 ; X64-NEXT: retq ## encoding: [0xc3] 9017 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) 9018 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4) 9019 %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3) 9020 %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3) 9021 %res4 = fadd <4 x float> %res, %res1 9022 %res5 = fadd <4 x float> %res2, %res3 9023 %res6 = fadd <4 x float> %res4, %res5 9024 ret <4 x float> %res6 9025 } 9026 9027 define void @fmadd_ss_mask_memfold(float* %a, float* %b, i8 %c) { 9028 ; X86-LABEL: fmadd_ss_mask_memfold: 9029 ; X86: ## %bb.0: 9030 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c] 9031 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08] 9032 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04] 9033 ; X86-NEXT: vmovss (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x02] 9034 ; X86-NEXT: ## xmm0 = mem[0],zero,zero,zero 9035 ; X86-NEXT: vmovss (%ecx), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x09] 9036 ; X86-NEXT: ## xmm1 = mem[0],zero,zero,zero 9037 ; X86-NEXT: vfmadd213ss %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xc8] 9038 ; X86-NEXT: ## xmm1 = (xmm0 * xmm1) + xmm0 9039 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 9040 ; X86-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x10,0xc1] 9041 ; X86-NEXT: vmovss %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x02] 9042 ; X86-NEXT: retl ## encoding: [0xc3] 9043 ; 9044 ; X64-LABEL: fmadd_ss_mask_memfold: 9045 ; X64: ## %bb.0: 9046 ; X64-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 9047 ; X64-NEXT: ## xmm0 = mem[0],zero,zero,zero 9048 ; X64-NEXT: vmovss (%rsi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x0e] 9049 ; X64-NEXT: ## xmm1 = mem[0],zero,zero,zero 9050 ; X64-NEXT: vfmadd213ss %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xc8] 9051 ; X64-NEXT: ## xmm1 = (xmm0 * xmm1) + xmm0 9052 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 9053 ; X64-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x10,0xc1] 9054 ; X64-NEXT: vmovss %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07] 9055 ; X64-NEXT: retq ## encoding: [0xc3] 9056 %a.val = load float, float* %a 9057 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 9058 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 9059 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 9060 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 9061 9062 %b.val = load float, float* %b 9063 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 9064 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 9065 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 9066 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 9067 9068 %vr = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av, i8 %c, i32 4) 9069 9070 %sr = extractelement <4 x float> %vr, i32 0 9071 store float %sr, float* %a 9072 ret void 9073 } 9074 9075 define void @fmadd_ss_maskz_memfold(float* %a, float* %b, i8 %c) { 9076 ; X86-LABEL: fmadd_ss_maskz_memfold: 9077 ; X86: ## %bb.0: 9078 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c] 9079 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08] 9080 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04] 9081 ; X86-NEXT: vmovss (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x02] 9082 ; X86-NEXT: ## xmm0 = mem[0],zero,zero,zero 9083 ; X86-NEXT: vfmadd231ss (%ecx), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0x01] 9084 ; X86-NEXT: ## xmm0 = (xmm0 * mem) + xmm0 9085 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 9086 ; X86-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc0] 9087 ; X86-NEXT: vmovss %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x02] 9088 ; X86-NEXT: retl ## encoding: [0xc3] 9089 ; 9090 ; X64-LABEL: fmadd_ss_maskz_memfold: 9091 ; X64: ## %bb.0: 9092 ; X64-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 9093 ; X64-NEXT: ## xmm0 = mem[0],zero,zero,zero 9094 ; X64-NEXT: vfmadd231ss (%rsi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0x06] 9095 ; X64-NEXT: ## xmm0 = (xmm0 * mem) + xmm0 9096 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 9097 ; X64-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc0] 9098 ; X64-NEXT: vmovss %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07] 9099 ; X64-NEXT: retq ## encoding: [0xc3] 9100 %a.val = load float, float* %a 9101 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 9102 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 9103 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 9104 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 9105 9106 %b.val = load float, float* %b 9107 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 9108 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 9109 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 9110 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 9111 9112 %vr = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av, i8 %c, i32 4) 9113 9114 %sr = extractelement <4 x float> %vr, i32 0 9115 store float %sr, float* %a 9116 ret void 9117 } 9118 9119 define void @fmadd_sd_mask_memfold(double* %a, double* %b, i8 %c) { 9120 ; X86-LABEL: fmadd_sd_mask_memfold: 9121 ; X86: ## %bb.0: 9122 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c] 9123 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08] 9124 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04] 9125 ; X86-NEXT: vmovsd (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x02] 9126 ; X86-NEXT: ## xmm0 = mem[0],zero 9127 ; X86-NEXT: vmovsd (%ecx), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x09] 9128 ; X86-NEXT: ## xmm1 = mem[0],zero 9129 ; X86-NEXT: vfmadd213sd %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0xc8] 9130 ; X86-NEXT: ## xmm1 = (xmm0 * xmm1) + xmm0 9131 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 9132 ; X86-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x10,0xc1] 9133 ; X86-NEXT: vmovsd %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x02] 9134 ; X86-NEXT: retl ## encoding: [0xc3] 9135 ; 9136 ; X64-LABEL: fmadd_sd_mask_memfold: 9137 ; X64: ## %bb.0: 9138 ; X64-NEXT: vmovsd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] 9139 ; X64-NEXT: ## xmm0 = mem[0],zero 9140 ; X64-NEXT: vmovsd (%rsi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0e] 9141 ; X64-NEXT: ## xmm1 = mem[0],zero 9142 ; X64-NEXT: vfmadd213sd %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0xc8] 9143 ; X64-NEXT: ## xmm1 = (xmm0 * xmm1) + xmm0 9144 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 9145 ; X64-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x10,0xc1] 9146 ; X64-NEXT: vmovsd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] 9147 ; X64-NEXT: retq ## encoding: [0xc3] 9148 %a.val = load double, double* %a 9149 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 9150 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 9151 9152 %b.val = load double, double* %b 9153 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 9154 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 9155 9156 %vr = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av, i8 %c, i32 4) 9157 9158 %sr = extractelement <2 x double> %vr, i32 0 9159 store double %sr, double* %a 9160 ret void 9161 } 9162 9163 define void @fmadd_sd_maskz_memfold(double* %a, double* %b, i8 %c) { 9164 ; X86-LABEL: fmadd_sd_maskz_memfold: 9165 ; X86: ## %bb.0: 9166 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c] 9167 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08] 9168 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04] 9169 ; X86-NEXT: vmovsd (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x02] 9170 ; X86-NEXT: ## xmm0 = mem[0],zero 9171 ; X86-NEXT: vfmadd231sd (%ecx), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0x01] 9172 ; X86-NEXT: ## xmm0 = (xmm0 * mem) + xmm0 9173 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 9174 ; X86-NEXT: vmovsd %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc0] 9175 ; X86-NEXT: vmovsd %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x02] 9176 ; X86-NEXT: retl ## encoding: [0xc3] 9177 ; 9178 ; X64-LABEL: fmadd_sd_maskz_memfold: 9179 ; X64: ## %bb.0: 9180 ; X64-NEXT: vmovsd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] 9181 ; X64-NEXT: ## xmm0 = mem[0],zero 9182 ; X64-NEXT: vfmadd231sd (%rsi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0x06] 9183 ; X64-NEXT: ## xmm0 = (xmm0 * mem) + xmm0 9184 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] 9185 ; X64-NEXT: vmovsd %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc0] 9186 ; X64-NEXT: vmovsd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] 9187 ; X64-NEXT: retq ## encoding: [0xc3] 9188 %a.val = load double, double* %a 9189 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 9190 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 9191 9192 %b.val = load double, double* %b 9193 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 9194 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 9195 9196 %vr = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av, i8 %c, i32 4) 9197 9198 %sr = extractelement <2 x double> %vr, i32 0 9199 store double %sr, double* %a 9200 ret void 9201 } 9202 9203 declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) 9204 9205 define <2 x double>@test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ 9206 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_sd: 9207 ; X86: ## %bb.0: 9208 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 9209 ; X86-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda] 9210 ; X86-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbb,0xd9] 9211 ; X86-NEXT: ## xmm3 = (xmm0 * xmm1) - xmm3 9212 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 9213 ; X86-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2] 9214 ; X86-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xe1] 9215 ; X86-NEXT: ## xmm4 = (xmm0 * xmm1) - xmm4 9216 ; X86-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc] 9217 ; X86-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2] 9218 ; X86-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xbb,0xe1] 9219 ; X86-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x79,0xbb,0xd1] 9220 ; X86-NEXT: vaddpd %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc2] 9221 ; X86-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0] 9222 ; X86-NEXT: retl ## encoding: [0xc3] 9223 ; 9224 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_sd: 9225 ; X64: ## %bb.0: 9226 ; X64-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda] 9227 ; X64-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbb,0xd9] 9228 ; X64-NEXT: ## xmm3 = (xmm0 * xmm1) - xmm3 9229 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 9230 ; X64-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2] 9231 ; X64-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xe1] 9232 ; X64-NEXT: ## xmm4 = (xmm0 * xmm1) - xmm4 9233 ; X64-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc] 9234 ; X64-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2] 9235 ; X64-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xbb,0xe1] 9236 ; X64-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x79,0xbb,0xd1] 9237 ; X64-NEXT: vaddpd %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc2] 9238 ; X64-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0] 9239 ; X64-NEXT: retq ## encoding: [0xc3] 9240 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) 9241 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4) 9242 %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3) 9243 %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3) 9244 %res4 = fadd <2 x double> %res, %res1 9245 %res5 = fadd <2 x double> %res2, %res3 9246 %res6 = fadd <2 x double> %res4, %res5 9247 ret <2 x double> %res6 9248 } 9249 9250 declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) 9251 9252 define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ 9253 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ss: 9254 ; X86: ## %bb.0: 9255 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 9256 ; X86-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda] 9257 ; X86-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbb,0xd9] 9258 ; X86-NEXT: ## xmm3 = (xmm0 * xmm1) - xmm3 9259 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 9260 ; X86-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2] 9261 ; X86-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xe1] 9262 ; X86-NEXT: ## xmm4 = (xmm0 * xmm1) - xmm4 9263 ; X86-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc] 9264 ; X86-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2] 9265 ; X86-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xbb,0xe1] 9266 ; X86-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x79,0xbb,0xd1] 9267 ; X86-NEXT: vaddps %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc2] 9268 ; X86-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0] 9269 ; X86-NEXT: retl ## encoding: [0xc3] 9270 ; 9271 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ss: 9272 ; X64: ## %bb.0: 9273 ; X64-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda] 9274 ; X64-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbb,0xd9] 9275 ; X64-NEXT: ## xmm3 = (xmm0 * xmm1) - xmm3 9276 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 9277 ; X64-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2] 9278 ; X64-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xe1] 9279 ; X64-NEXT: ## xmm4 = (xmm0 * xmm1) - xmm4 9280 ; X64-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc] 9281 ; X64-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2] 9282 ; X64-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xbb,0xe1] 9283 ; X64-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x79,0xbb,0xd1] 9284 ; X64-NEXT: vaddps %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc2] 9285 ; X64-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0] 9286 ; X64-NEXT: retq ## encoding: [0xc3] 9287 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) 9288 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4) 9289 %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3) 9290 %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3) 9291 %res4 = fadd <4 x float> %res, %res1 9292 %res5 = fadd <4 x float> %res2, %res3 9293 %res6 = fadd <4 x float> %res4, %res5 9294 ret <4 x float> %res6 9295 } 9296 9297 declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) 9298 9299 define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ 9300 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd: 9301 ; X86: ## %bb.0: 9302 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 9303 ; X86-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda] 9304 ; X86-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbf,0xd9] 9305 ; X86-NEXT: ## xmm3 = -(xmm0 * xmm1) - xmm3 9306 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 9307 ; X86-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2] 9308 ; X86-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xe1] 9309 ; X86-NEXT: ## xmm4 = -(xmm0 * xmm1) - xmm4 9310 ; X86-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc] 9311 ; X86-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2] 9312 ; X86-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xbf,0xe1] 9313 ; X86-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x79,0xbf,0xd1] 9314 ; X86-NEXT: vaddpd %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc2] 9315 ; X86-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0] 9316 ; X86-NEXT: retl ## encoding: [0xc3] 9317 ; 9318 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd: 9319 ; X64: ## %bb.0: 9320 ; X64-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda] 9321 ; X64-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbf,0xd9] 9322 ; X64-NEXT: ## xmm3 = -(xmm0 * xmm1) - xmm3 9323 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 9324 ; X64-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2] 9325 ; X64-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xe1] 9326 ; X64-NEXT: ## xmm4 = -(xmm0 * xmm1) - xmm4 9327 ; X64-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc] 9328 ; X64-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2] 9329 ; X64-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xbf,0xe1] 9330 ; X64-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x79,0xbf,0xd1] 9331 ; X64-NEXT: vaddpd %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc2] 9332 ; X64-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0] 9333 ; X64-NEXT: retq ## encoding: [0xc3] 9334 %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4) 9335 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4) 9336 %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3) 9337 %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3) 9338 %res4 = fadd <2 x double> %res, %res1 9339 %res5 = fadd <2 x double> %res2, %res3 9340 %res6 = fadd <2 x double> %res4, %res5 9341 ret <2 x double> %res6 9342 } 9343 9344 declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) 9345 9346 define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ 9347 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss: 9348 ; X86: ## %bb.0: 9349 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] 9350 ; X86-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda] 9351 ; X86-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbf,0xd9] 9352 ; X86-NEXT: ## xmm3 = -(xmm0 * xmm1) - xmm3 9353 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] 9354 ; X86-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2] 9355 ; X86-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xe1] 9356 ; X86-NEXT: ## xmm4 = -(xmm0 * xmm1) - xmm4 9357 ; X86-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc] 9358 ; X86-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2] 9359 ; X86-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xbf,0xe1] 9360 ; X86-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x79,0xbf,0xd1] 9361 ; X86-NEXT: vaddps %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc2] 9362 ; X86-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0] 9363 ; X86-NEXT: retl ## encoding: [0xc3] 9364 ; 9365 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss: 9366 ; X64: ## %bb.0: 9367 ; X64-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda] 9368 ; X64-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbf,0xd9] 9369 ; X64-NEXT: ## xmm3 = -(xmm0 * xmm1) - xmm3 9370 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 9371 ; X64-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2] 9372 ; X64-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xe1] 9373 ; X64-NEXT: ## xmm4 = -(xmm0 * xmm1) - xmm4 9374 ; X64-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc] 9375 ; X64-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2] 9376 ; X64-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xbf,0xe1] 9377 ; X64-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x79,0xbf,0xd1] 9378 ; X64-NEXT: vaddps %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc2] 9379 ; X64-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0] 9380 ; X64-NEXT: retq ## encoding: [0xc3] 9381 %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4) 9382 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4) 9383 %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3) 9384 %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3) 9385 %res4 = fadd <4 x float> %res, %res1 9386 %res5 = fadd <4 x float> %res2, %res3 9387 %res6 = fadd <4 x float> %res4, %res5 9388 ret <4 x float> %res6 9389 } 9390 9391 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1, float *%ptr_b ,i8 %x3,i32 %x4) { 9392 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm: 9393 ; X86: ## %bb.0: 9394 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 9395 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ## encoding: [0x8a,0x4c,0x24,0x08] 9396 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 9397 ; X86-NEXT: vfmadd231ss (%eax), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0x08] 9398 ; X86-NEXT: ## xmm1 = (xmm0 * mem) + xmm1 9399 ; X86-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc1] 9400 ; X86-NEXT: retl ## encoding: [0xc3] 9401 ; 9402 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm: 9403 ; X64: ## %bb.0: 9404 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 9405 ; X64-NEXT: vfmadd231ss (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0x0f] 9406 ; X64-NEXT: ## xmm1 = (xmm0 * mem) + xmm1 9407 ; X64-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc1] 9408 ; X64-NEXT: retq ## encoding: [0xc3] 9409 %q = load float, float* %ptr_b 9410 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 9411 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %vecinit.i, <4 x float> %x1, i8 %x3, i32 4) 9412 ret < 4 x float> %res 9413 } 9414 9415 define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,float *%ptr_b ,i8 %x3,i32 %x4) { 9416 ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm: 9417 ; X86: ## %bb.0: 9418 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 9419 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ## encoding: [0x8a,0x4c,0x24,0x08] 9420 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] 9421 ; X86-NEXT: vfmadd132ss (%eax), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x99,0x00] 9422 ; X86-NEXT: ## xmm0 = (xmm0 * mem) + xmm1 9423 ; X86-NEXT: retl ## encoding: [0xc3] 9424 ; 9425 ; X64-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm: 9426 ; X64: ## %bb.0: 9427 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 9428 ; X64-NEXT: vfmadd132ss (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x99,0x07] 9429 ; X64-NEXT: ## xmm0 = (xmm0 * mem) + xmm1 9430 ; X64-NEXT: retq ## encoding: [0xc3] 9431 %q = load float, float* %ptr_b 9432 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 9433 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0,<4 x float> %vecinit.i, <4 x float> %x1, i8 %x3, i32 4) 9434 ret < 4 x float> %res 9435 } 9436 9437 9438 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,float *%ptr_b ,i8 %x3,i32 %x4) { 9439 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss_rm: 9440 ; CHECK: ## %bb.0: 9441 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9] 9442 ; CHECK-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 9443 ; CHECK-NEXT: ## xmm0 = xmm1[0],xmm0[1,2,3] 9444 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 9445 %q = load float, float* %ptr_b 9446 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 9447 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %vecinit.i, i8 0, i32 4) 9448 ret < 4 x float> %res 9449 } 9450