Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512ifma --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
      4 
      5 declare <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64>, <8 x i64>, <8 x i64>)
      6 
      7 define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
      8 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512:
      9 ; X86:       # %bb.0:
     10 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
     11 ; X86-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0xda]
     12 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
     13 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
     14 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
     15 ; X86-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0xe2]
     16 ; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2]
     17 ; X86-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0xc2]
     18 ; X86-NEXT:    vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0]
     19 ; X86-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xd2]
     20 ; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
     21 ; X86-NEXT:    vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
     22 ; X86-NEXT:    retl # encoding: [0xc3]
     23 ;
     24 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512:
     25 ; X64:       # %bb.0:
     26 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
     27 ; X64-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0xda]
     28 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
     29 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
     30 ; X64-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0xe2]
     31 ; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2]
     32 ; X64-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0xc2]
     33 ; X64-NEXT:    vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0]
     34 ; X64-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xd2]
     35 ; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
     36 ; X64-NEXT:    vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
     37 ; X64-NEXT:    retq # encoding: [0xc3]
     38 
     39   %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
     40   %2 = bitcast i8 %x3 to <8 x i1>
     41   %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
     42   %4 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer)
     43   %5 = bitcast i8 %x3 to <8 x i1>
     44   %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> %x0
     45   %7 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> zeroinitializer, <8 x i64> %x1, <8 x i64> zeroinitializer)
     46   %8 = bitcast i8 %x3 to <8 x i1>
     47   %9 = select <8 x i1> %8, <8 x i64> %7, <8 x i64> zeroinitializer
     48   %10 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
     49   %res4 = add <8 x i64> %3, %6
     50   %res5 = add <8 x i64> %10, %9
     51   %res6 = add <8 x i64> %res5, %res4
     52   ret <8 x i64> %res6
     53 }
     54 
     55 define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
     56 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512:
     57 ; X86:       # %bb.0:
     58 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
     59 ; X86-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0xda]
     60 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
     61 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
     62 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
     63 ; X86-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xe2]
     64 ; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2]
     65 ; X86-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xc2]
     66 ; X86-NEXT:    vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0]
     67 ; X86-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xd2]
     68 ; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
     69 ; X86-NEXT:    vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
     70 ; X86-NEXT:    retl # encoding: [0xc3]
     71 ;
     72 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512:
     73 ; X64:       # %bb.0:
     74 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
     75 ; X64-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0xda]
     76 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
     77 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
     78 ; X64-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xe2]
     79 ; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2]
     80 ; X64-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xc2]
     81 ; X64-NEXT:    vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0]
     82 ; X64-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xd2]
     83 ; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
     84 ; X64-NEXT:    vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
     85 ; X64-NEXT:    retq # encoding: [0xc3]
     86 
     87   %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
     88   %2 = bitcast i8 %x3 to <8 x i1>
     89   %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
     90   %4 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer)
     91   %5 = bitcast i8 %x3 to <8 x i1>
     92   %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer
     93   %7 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> zeroinitializer, <8 x i64> %x1, <8 x i64> zeroinitializer)
     94   %8 = bitcast i8 %x3 to <8 x i1>
     95   %9 = select <8 x i1> %8, <8 x i64> %7, <8 x i64> zeroinitializer
     96   %10 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
     97   %res4 = add <8 x i64> %3, %6
     98   %res5 = add <8 x i64> %10, %9
     99   %res6 = add <8 x i64> %res5, %res4
    100   ret <8 x i64> %res6
    101 }
    102 
    103 declare <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64>, <8 x i64>, <8 x i64>)
    104 
    105 define <8 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
    106 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_512:
    107 ; X86:       # %bb.0:
    108 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
    109 ; X86-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0xb4,0xda]
    110 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    111 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    112 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
    113 ; X86-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb4,0xe2]
    114 ; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2]
    115 ; X86-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb4,0xc2]
    116 ; X86-NEXT:    vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0]
    117 ; X86-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xd2]
    118 ; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
    119 ; X86-NEXT:    vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
    120 ; X86-NEXT:    retl # encoding: [0xc3]
    121 ;
    122 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_512:
    123 ; X64:       # %bb.0:
    124 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
    125 ; X64-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0xb4,0xda]
    126 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    127 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
    128 ; X64-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb4,0xe2]
    129 ; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2]
    130 ; X64-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb4,0xc2]
    131 ; X64-NEXT:    vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0]
    132 ; X64-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xd2]
    133 ; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
    134 ; X64-NEXT:    vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
    135 ; X64-NEXT:    retq # encoding: [0xc3]
    136 
    137   %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
    138   %2 = bitcast i8 %x3 to <8 x i1>
    139   %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
    140   %4 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer)
    141   %5 = bitcast i8 %x3 to <8 x i1>
    142   %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> %x0
    143   %7 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> zeroinitializer, <8 x i64> %x1, <8 x i64> zeroinitializer)
    144   %8 = bitcast i8 %x3 to <8 x i1>
    145   %9 = select <8 x i1> %8, <8 x i64> %7, <8 x i64> zeroinitializer
    146   %10 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
    147   %res4 = add <8 x i64> %3, %6
    148   %res5 = add <8 x i64> %10, %9
    149   %res6 = add <8 x i64> %res5, %res4
    150   ret <8 x i64> %res6
    151 }
    152 
    153 define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
    154 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_512:
    155 ; X86:       # %bb.0:
    156 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
    157 ; X86-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0xb4,0xda]
    158 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    159 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    160 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
    161 ; X86-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xe2]
    162 ; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2]
    163 ; X86-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xc2]
    164 ; X86-NEXT:    vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0]
    165 ; X86-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xd2]
    166 ; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
    167 ; X86-NEXT:    vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
    168 ; X86-NEXT:    retl # encoding: [0xc3]
    169 ;
    170 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_512:
    171 ; X64:       # %bb.0:
    172 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
    173 ; X64-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0xb4,0xda]
    174 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    175 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
    176 ; X64-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xe2]
    177 ; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2]
    178 ; X64-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xc2]
    179 ; X64-NEXT:    vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0]
    180 ; X64-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb4,0xd2]
    181 ; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
    182 ; X64-NEXT:    vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
    183 ; X64-NEXT:    retq # encoding: [0xc3]
    184 
    185   %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
    186   %2 = bitcast i8 %x3 to <8 x i1>
    187   %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
    188   %4 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer)
    189   %5 = bitcast i8 %x3 to <8 x i1>
    190   %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer
    191   %7 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> zeroinitializer, <8 x i64> %x1, <8 x i64> zeroinitializer)
    192   %8 = bitcast i8 %x3 to <8 x i1>
    193   %9 = select <8 x i1> %8, <8 x i64> %7, <8 x i64> zeroinitializer
    194   %10 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
    195   %res4 = add <8 x i64> %3, %6
    196   %res5 = add <8 x i64> %10, %9
    197   %res6 = add <8 x i64> %res5, %res4
    198   ret <8 x i64> %res6
    199 }
    200 
    201 define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr) {
    202 ; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load:
    203 ; X86:       # %bb.0:
    204 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    205 ; X86-NEXT:    vpmadd52huq (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x00]
    206 ; X86-NEXT:    retl # encoding: [0xc3]
    207 ;
    208 ; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load:
    209 ; X64:       # %bb.0:
    210 ; X64-NEXT:    vpmadd52huq (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x07]
    211 ; X64-NEXT:    retq # encoding: [0xc3]
    212 
    213   %x2 = load <8 x i64>, <8 x i64>* %x2ptr
    214   %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
    215   ret <8 x i64> %1
    216 }
    217 
    218 define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr) {
    219 ; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_bcast:
    220 ; X86:       # %bb.0:
    221 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    222 ; X86-NEXT:    vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
    223 ; X86-NEXT:    # xmm2 = mem[0],zero
    224 ; X86-NEXT:    vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
    225 ; X86-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0xc2]
    226 ; X86-NEXT:    retl # encoding: [0xc3]
    227 ;
    228 ; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_bcast:
    229 ; X64:       # %bb.0:
    230 ; X64-NEXT:    vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xb5,0x07]
    231 ; X64-NEXT:    retq # encoding: [0xc3]
    232 
    233   %x2load = load i64, i64* %x2ptr
    234   %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0
    235   %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer
    236   %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
    237   ret <8 x i64> %1
    238 }
    239 
    240 define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2) {
    241 ; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute:
    242 ; X86:       # %bb.0:
    243 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    244 ; X86-NEXT:    vpmadd52huq (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x00]
    245 ; X86-NEXT:    retl # encoding: [0xc3]
    246 ;
    247 ; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute:
    248 ; X64:       # %bb.0:
    249 ; X64-NEXT:    vpmadd52huq (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0x07]
    250 ; X64-NEXT:    retq # encoding: [0xc3]
    251 
    252   %x1 = load <8 x i64>, <8 x i64>* %x1ptr
    253   %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
    254   ret <8 x i64> %1
    255 }
    256 
    257 define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2) {
    258 ; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast:
    259 ; X86:       # %bb.0:
    260 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    261 ; X86-NEXT:    vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
    262 ; X86-NEXT:    # xmm2 = mem[0],zero
    263 ; X86-NEXT:    vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
    264 ; X86-NEXT:    vpmadd52huq %zmm1, %zmm2, %zmm0 # encoding: [0x62,0xf2,0xed,0x48,0xb5,0xc1]
    265 ; X86-NEXT:    retl # encoding: [0xc3]
    266 ;
    267 ; X64-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast:
    268 ; X64:       # %bb.0:
    269 ; X64-NEXT:    vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xb5,0x07]
    270 ; X64-NEXT:    retq # encoding: [0xc3]
    271 
    272   %x1load = load i64, i64* %x1ptr
    273   %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0
    274   %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer
    275   %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
    276   ret <8 x i64> %1
    277 }
    278 
    279 define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr, i8 %x3) {
    280 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load:
    281 ; X86:       # %bb.0:
    282 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    283 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
    284 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
    285 ; X86-NEXT:    vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x00]
    286 ; X86-NEXT:    retl # encoding: [0xc3]
    287 ;
    288 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load:
    289 ; X64:       # %bb.0:
    290 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    291 ; X64-NEXT:    vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x07]
    292 ; X64-NEXT:    retq # encoding: [0xc3]
    293 
    294   %x2 = load <8 x i64>, <8 x i64>* %x2ptr
    295   %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
    296   %2 = bitcast i8 %x3 to <8 x i1>
    297   %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
    298   ret <8 x i64> %3
    299 }
    300 
    301 define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr, i8 %x3) {
    302 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast:
    303 ; X86:       # %bb.0:
    304 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    305 ; X86-NEXT:    vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
    306 ; X86-NEXT:    # xmm2 = mem[0],zero
    307 ; X86-NEXT:    vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
    308 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
    309 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    310 ; X86-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0xc2]
    311 ; X86-NEXT:    retl # encoding: [0xc3]
    312 ;
    313 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast:
    314 ; X64:       # %bb.0:
    315 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    316 ; X64-NEXT:    vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x59,0xb5,0x07]
    317 ; X64-NEXT:    retq # encoding: [0xc3]
    318 
    319   %x2load = load i64, i64* %x2ptr
    320   %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0
    321   %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer
    322   %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
    323   %2 = bitcast i8 %x3 to <8 x i1>
    324   %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
    325   ret <8 x i64> %3
    326 }
    327 
    328 define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2, i8 %x3) {
    329 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute:
    330 ; X86:       # %bb.0:
    331 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    332 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
    333 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
    334 ; X86-NEXT:    vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x00]
    335 ; X86-NEXT:    retl # encoding: [0xc3]
    336 ;
    337 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute:
    338 ; X64:       # %bb.0:
    339 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    340 ; X64-NEXT:    vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0x07]
    341 ; X64-NEXT:    retq # encoding: [0xc3]
    342 
    343   %x1 = load <8 x i64>, <8 x i64>* %x1ptr
    344   %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
    345   %2 = bitcast i8 %x3 to <8 x i1>
    346   %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
    347   ret <8 x i64> %3
    348 }
    349 
    350 define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2, i8 %x3) {
    351 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast:
    352 ; X86:       # %bb.0:
    353 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    354 ; X86-NEXT:    vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
    355 ; X86-NEXT:    # xmm2 = mem[0],zero
    356 ; X86-NEXT:    vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
    357 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
    358 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    359 ; X86-NEXT:    vpmadd52huq %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0xb5,0xc1]
    360 ; X86-NEXT:    retl # encoding: [0xc3]
    361 ;
    362 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast:
    363 ; X64:       # %bb.0:
    364 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    365 ; X64-NEXT:    vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x59,0xb5,0x07]
    366 ; X64-NEXT:    retq # encoding: [0xc3]
    367 
    368   %x1load = load i64, i64* %x1ptr
    369   %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0
    370   %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer
    371   %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
    372   %2 = bitcast i8 %x3 to <8 x i1>
    373   %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
    374   ret <8 x i64> %3
    375 }
    376 
    377 define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr, i8 %x3) {
    378 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load:
    379 ; X86:       # %bb.0:
    380 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    381 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
    382 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
    383 ; X86-NEXT:    vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x00]
    384 ; X86-NEXT:    retl # encoding: [0xc3]
    385 ;
    386 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load:
    387 ; X64:       # %bb.0:
    388 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    389 ; X64-NEXT:    vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x07]
    390 ; X64-NEXT:    retq # encoding: [0xc3]
    391 
    392   %x2 = load <8 x i64>, <8 x i64>* %x2ptr
    393   %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
    394   %2 = bitcast i8 %x3 to <8 x i1>
    395   %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
    396   ret <8 x i64> %3
    397 }
    398 
    399 define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr, i8 %x3) {
    400 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast:
    401 ; X86:       # %bb.0:
    402 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    403 ; X86-NEXT:    vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
    404 ; X86-NEXT:    # xmm2 = mem[0],zero
    405 ; X86-NEXT:    vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
    406 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
    407 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    408 ; X86-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xc2]
    409 ; X86-NEXT:    retl # encoding: [0xc3]
    410 ;
    411 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast:
    412 ; X64:       # %bb.0:
    413 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    414 ; X64-NEXT:    vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xd9,0xb5,0x07]
    415 ; X64-NEXT:    retq # encoding: [0xc3]
    416 
    417   %x2load = load i64, i64* %x2ptr
    418   %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0
    419   %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer
    420   %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
    421   %2 = bitcast i8 %x3 to <8 x i1>
    422   %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
    423   ret <8 x i64> %3
    424 }
    425 
    426 define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2, i8 %x3) {
    427 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute:
    428 ; X86:       # %bb.0:
    429 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    430 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
    431 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
    432 ; X86-NEXT:    vpmadd52huq (%eax), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x00]
    433 ; X86-NEXT:    retl # encoding: [0xc3]
    434 ;
    435 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute:
    436 ; X64:       # %bb.0:
    437 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    438 ; X64-NEXT:    vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0x07]
    439 ; X64-NEXT:    retq # encoding: [0xc3]
    440 
    441   %x1 = load <8 x i64>, <8 x i64>* %x1ptr
    442   %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
    443   %2 = bitcast i8 %x3 to <8 x i1>
    444   %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
    445   ret <8 x i64> %3
    446 }
    447 
    448 define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2, i8 %x3) {
    449 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast:
    450 ; X86:       # %bb.0:
    451 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    452 ; X86-NEXT:    vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
    453 ; X86-NEXT:    # xmm2 = mem[0],zero
    454 ; X86-NEXT:    vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
    455 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
    456 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    457 ; X86-NEXT:    vpmadd52huq %zmm1, %zmm2, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xed,0xc9,0xb5,0xc1]
    458 ; X86-NEXT:    retl # encoding: [0xc3]
    459 ;
    460 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast:
    461 ; X64:       # %bb.0:
    462 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    463 ; X64-NEXT:    vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xd9,0xb5,0x07]
    464 ; X64-NEXT:    retq # encoding: [0xc3]
    465 
    466   %x1load = load i64, i64* %x1ptr
    467   %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0
    468   %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer
    469   %1 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
    470   %2 = bitcast i8 %x3 to <8 x i1>
    471   %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
    472   ret <8 x i64> %3
    473 }
    474