1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512ifma --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5 declare <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64>, <8 x i64>, <8 x i64>) 6 7 define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 8 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512: 9 ; X86: # %bb.0: 10 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 11 ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0xda] 12 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 14 ; X86-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0] 15 ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0xe2] 16 ; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2] 17 ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0xc2] 18 ; X86-NEXT: vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0] 19 ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xd2] 20 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 21 ; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] 22 ; X86-NEXT: retl # encoding: [0xc3] 23 ; 24 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512: 25 ; X64: # %bb.0: 26 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 27 ; X64-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0xda] 28 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 29 ; X64-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0] 30 ; X64-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0xe2] 31 ; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2] 32 ; X64-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0xc2] 33 ; X64-NEXT: vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0] 34 ; X64-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xd2] 35 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 36 ; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] 37 ; X64-NEXT: retq # encoding: [0xc3] 38 39 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 40 %2 = bitcast i8 %x3 to <8 x i1> 41 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0 42 %4 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer) 43 %5 = bitcast i8 %x3 to <8 x i1> 44 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> %x0 45 %7 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> zeroinitializer, <8 x i64> %x1, <8 x i64> zeroinitializer) 46 %8 = bitcast i8 %x3 to <8 x i1> 47 %9 = select <8 x i1> %8, <8 x i64> %7, <8 x i64> zeroinitializer 48 %10 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 49 %res4 = add <8 x i64> %3, %6 50 %res5 = add <8 x i64> %10, %9 51 %res6 = add <8 x i64> %res5, %res4 52 ret <8 x i64> %res6 53 } 54 55 define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 56 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512: 57 ; X86: # %bb.0: 58 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 59 ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0xda] 60 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 61 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 62 ; X86-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0] 63 ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xe2] 64 ; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2] 65 ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xc2] 66 ; X86-NEXT: vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0] 67 ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xd2] 68 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 69 ; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] 70 ; X86-NEXT: retl # encoding: [0xc3] 71 ; 72 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512: 73 ; X64: # %bb.0: 74 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 75 ; X64-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0xda] 76 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 77 ; X64-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0] 78 ; X64-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xe2] 79 ; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2] 80 ; X64-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xc2] 81 ; X64-NEXT: vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0] 82 ; X64-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xd2] 83 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 84 ; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] 85 ; X64-NEXT: retq # encoding: [0xc3] 86 87 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 88 %2 = bitcast i8 %x3 to <8 x i1> 89 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer 90 %4 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer) 91 %5 = bitcast i8 %x3 to <8 x i1> 92 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer 93 %7 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> zeroinitializer, <8 x i64> %x1, <8 x i64> zeroinitializer) 94 %8 = bitcast i8 %x3 to <8 x i1> 95 %9 = select <8 x i1> %8, <8 x i64> %7, <8 x i64> zeroinitializer 96 %10 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 97 %res4 = add <8 x i64> %3, %6 98 %res5 = add <8 x i64> %10, %9 99 %res6 = add <8 x i64> %res5, %res4 100 ret <8 x i64> %res6 101 } 102 103 declare <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64>, <8 x i64>, <8 x i64>) 104 105 define <8 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 106 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_512: 107 ; X86: # %bb.0: 108 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 109 ; X86-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0xb4,0xda] 110 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 111 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 112 ; X86-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0] 113 ; X86-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb4,0xe2] 114 ; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2] 115 ; X86-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb4,0xc2] 116 ; X86-NEXT: vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0] 117 ; X86-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xd2] 118 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 119 ; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] 120 ; X86-NEXT: retl # encoding: [0xc3] 121 ; 122 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_512: 123 ; X64: # %bb.0: 124 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 125 ; X64-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0xb4,0xda] 126 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 127 ; X64-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0] 128 ; X64-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb4,0xe2] 129 ; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2] 130 ; X64-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb4,0xc2] 131 ; X64-NEXT: vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0] 132 ; X64-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xd2] 133 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 134 ; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] 135 ; X64-NEXT: retq # encoding: [0xc3] 136 137 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 138 %2 = bitcast i8 %x3 to <8 x i1> 139 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0 140 %4 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer) 141 %5 = bitcast i8 %x3 to <8 x i1> 142 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> %x0 143 %7 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> zeroinitializer, <8 x i64> %x1, <8 x i64> zeroinitializer) 144 %8 = bitcast i8 %x3 to <8 x i1> 145 %9 = select <8 x i1> %8, <8 x i64> %7, <8 x i64> zeroinitializer 146 %10 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 147 %res4 = add <8 x i64> %3, %6 148 %res5 = add <8 x i64> %10, %9 149 %res6 = add <8 x i64> %res5, %res4 150 ret <8 x i64> %res6 151 } 152 153 define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 154 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_512: 155 ; X86: # %bb.0: 156 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 157 ; X86-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0xb4,0xda] 158 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 159 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 160 ; X86-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0] 161 ; X86-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xe2] 162 ; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2] 163 ; X86-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xc2] 164 ; X86-NEXT: vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0] 165 ; X86-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xd2] 166 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 167 ; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] 168 ; X86-NEXT: retl # encoding: [0xc3] 169 ; 170 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_512: 171 ; X64: # %bb.0: 172 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 173 ; X64-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0xb4,0xda] 174 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 175 ; X64-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0] 176 ; X64-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xe2] 177 ; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2] 178 ; X64-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xc2] 179 ; X64-NEXT: vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0] 180 ; X64-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xd2] 181 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] 182 ; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0] 183 ; X64-NEXT: retq # encoding: [0xc3] 184 185 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 186 %2 = bitcast i8 %x3 to <8 x i1> 187 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer 188 %4 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer) 189 %5 = bitcast i8 %x3 to <8 x i1> 190 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer 191 %7 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> zeroinitializer, <8 x i64> %x1, <8 x i64> zeroinitializer) 192 %8 = bitcast i8 %x3 to <8 x i1> 193 %9 = select <8 x i1> %8, <8 x i64> %7, <8 x i64> zeroinitializer 194 %10 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 195 %res4 = add <8 x i64> %3, %6 196 %res5 = add <8 x i64> %10, %9 197 %res6 = add <8 x i64> %res5, %res4 198 ret <8 x i64> %res6 199 } 200 201 define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr) { 202 ; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load: 203 ; X86: # %bb.0: 204 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 205 ; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x00] 206 ; X86-NEXT: retl # encoding: [0xc3] 207 ; 208 ; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load: 209 ; X64: # %bb.0: 210 ; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x07] 211 ; X64-NEXT: retq # encoding: [0xc3] 212 213 %x2 = load <8 x i64>, <8 x i64>* %x2ptr 214 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 215 ret <8 x i64> %1 216 } 217 218 define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr) { 219 ; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_bcast: 220 ; X86: # %bb.0: 221 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 222 ; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10] 223 ; X86-NEXT: # xmm2 = mem[0],zero 224 ; X86-NEXT: vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2] 225 ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0xc2] 226 ; X86-NEXT: retl # encoding: [0xc3] 227 ; 228 ; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_bcast: 229 ; X64: # %bb.0: 230 ; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xb5,0x07] 231 ; X64-NEXT: retq # encoding: [0xc3] 232 233 %x2load = load i64, i64* %x2ptr 234 %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0 235 %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer 236 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 237 ret <8 x i64> %1 238 } 239 240 define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2) { 241 ; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute: 242 ; X86: # %bb.0: 243 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 244 ; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x00] 245 ; X86-NEXT: retl # encoding: [0xc3] 246 ; 247 ; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute: 248 ; X64: # %bb.0: 249 ; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x07] 250 ; X64-NEXT: retq # encoding: [0xc3] 251 252 %x1 = load <8 x i64>, <8 x i64>* %x1ptr 253 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 254 ret <8 x i64> %1 255 } 256 257 define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2) { 258 ; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast: 259 ; X86: # %bb.0: 260 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 261 ; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10] 262 ; X86-NEXT: # xmm2 = mem[0],zero 263 ; X86-NEXT: vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2] 264 ; X86-NEXT: vpmadd52huq %zmm1, %zmm2, %zmm0 # encoding: [0x62,0xf2,0xed,0x48,0xb5,0xc1] 265 ; X86-NEXT: retl # encoding: [0xc3] 266 ; 267 ; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast: 268 ; X64: # %bb.0: 269 ; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xb5,0x07] 270 ; X64-NEXT: retq # encoding: [0xc3] 271 272 %x1load = load i64, i64* %x1ptr 273 %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0 274 %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer 275 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 276 ret <8 x i64> %1 277 } 278 279 define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr, i8 %x3) { 280 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load: 281 ; X86: # %bb.0: 282 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 283 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 284 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 285 ; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x00] 286 ; X86-NEXT: retl # encoding: [0xc3] 287 ; 288 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load: 289 ; X64: # %bb.0: 290 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 291 ; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x07] 292 ; X64-NEXT: retq # encoding: [0xc3] 293 294 %x2 = load <8 x i64>, <8 x i64>* %x2ptr 295 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 296 %2 = bitcast i8 %x3 to <8 x i1> 297 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0 298 ret <8 x i64> %3 299 } 300 301 define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr, i8 %x3) { 302 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast: 303 ; X86: # %bb.0: 304 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 305 ; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10] 306 ; X86-NEXT: # xmm2 = mem[0],zero 307 ; X86-NEXT: vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2] 308 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 309 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 310 ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0xc2] 311 ; X86-NEXT: retl # encoding: [0xc3] 312 ; 313 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast: 314 ; X64: # %bb.0: 315 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 316 ; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x59,0xb5,0x07] 317 ; X64-NEXT: retq # encoding: [0xc3] 318 319 %x2load = load i64, i64* %x2ptr 320 %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0 321 %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer 322 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 323 %2 = bitcast i8 %x3 to <8 x i1> 324 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0 325 ret <8 x i64> %3 326 } 327 328 define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2, i8 %x3) { 329 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute: 330 ; X86: # %bb.0: 331 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 332 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 333 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 334 ; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x00] 335 ; X86-NEXT: retl # encoding: [0xc3] 336 ; 337 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute: 338 ; X64: # %bb.0: 339 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 340 ; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x07] 341 ; X64-NEXT: retq # encoding: [0xc3] 342 343 %x1 = load <8 x i64>, <8 x i64>* %x1ptr 344 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 345 %2 = bitcast i8 %x3 to <8 x i1> 346 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0 347 ret <8 x i64> %3 348 } 349 350 define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2, i8 %x3) { 351 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast: 352 ; X86: # %bb.0: 353 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 354 ; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10] 355 ; X86-NEXT: # xmm2 = mem[0],zero 356 ; X86-NEXT: vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2] 357 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 358 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 359 ; X86-NEXT: vpmadd52huq %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0xb5,0xc1] 360 ; X86-NEXT: retl # encoding: [0xc3] 361 ; 362 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast: 363 ; X64: # %bb.0: 364 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 365 ; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x59,0xb5,0x07] 366 ; X64-NEXT: retq # encoding: [0xc3] 367 368 %x1load = load i64, i64* %x1ptr 369 %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0 370 %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer 371 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 372 %2 = bitcast i8 %x3 to <8 x i1> 373 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0 374 ret <8 x i64> %3 375 } 376 377 define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr, i8 %x3) { 378 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load: 379 ; X86: # %bb.0: 380 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 381 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 382 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 383 ; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x00] 384 ; X86-NEXT: retl # encoding: [0xc3] 385 ; 386 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load: 387 ; X64: # %bb.0: 388 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 389 ; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x07] 390 ; X64-NEXT: retq # encoding: [0xc3] 391 392 %x2 = load <8 x i64>, <8 x i64>* %x2ptr 393 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 394 %2 = bitcast i8 %x3 to <8 x i1> 395 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer 396 ret <8 x i64> %3 397 } 398 399 define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr, i8 %x3) { 400 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast: 401 ; X86: # %bb.0: 402 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 403 ; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10] 404 ; X86-NEXT: # xmm2 = mem[0],zero 405 ; X86-NEXT: vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2] 406 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 407 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 408 ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xc2] 409 ; X86-NEXT: retl # encoding: [0xc3] 410 ; 411 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast: 412 ; X64: # %bb.0: 413 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 414 ; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xd9,0xb5,0x07] 415 ; X64-NEXT: retq # encoding: [0xc3] 416 417 %x2load = load i64, i64* %x2ptr 418 %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0 419 %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer 420 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 421 %2 = bitcast i8 %x3 to <8 x i1> 422 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer 423 ret <8 x i64> %3 424 } 425 426 define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2, i8 %x3) { 427 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute: 428 ; X86: # %bb.0: 429 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 430 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 431 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 432 ; X86-NEXT: vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x00] 433 ; X86-NEXT: retl # encoding: [0xc3] 434 ; 435 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute: 436 ; X64: # %bb.0: 437 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 438 ; X64-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x07] 439 ; X64-NEXT: retq # encoding: [0xc3] 440 441 %x1 = load <8 x i64>, <8 x i64>* %x1ptr 442 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 443 %2 = bitcast i8 %x3 to <8 x i1> 444 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer 445 ret <8 x i64> %3 446 } 447 448 define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2, i8 %x3) { 449 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast: 450 ; X86: # %bb.0: 451 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 452 ; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10] 453 ; X86-NEXT: # xmm2 = mem[0],zero 454 ; X86-NEXT: vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2] 455 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 456 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 457 ; X86-NEXT: vpmadd52huq %zmm1, %zmm2, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xed,0xc9,0xb5,0xc1] 458 ; X86-NEXT: retl # encoding: [0xc3] 459 ; 460 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast: 461 ; X64: # %bb.0: 462 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 463 ; X64-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xd9,0xb5,0x07] 464 ; X64-NEXT: retq # encoding: [0xc3] 465 466 %x1load = load i64, i64* %x1ptr 467 %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0 468 %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer 469 %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 470 %2 = bitcast i8 %x3 to <8 x i1> 471 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer 472 ret <8 x i64> %3 473 } 474