Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512ifma,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
      4 
      5 declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
      6 
      7 define <2 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
      8 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_128:
      9 ; X86:       # %bb.0:
     10 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
     11 ; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0xb5,0xda]
     12 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
     13 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
     14 ; X86-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0]
     15 ; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb5,0xe2]
     16 ; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
     17 ; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb5,0xc2]
     18 ; X86-NEXT:    vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0]
     19 ; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb5,0xd2]
     20 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
     21 ; X86-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
     22 ; X86-NEXT:    retl # encoding: [0xc3]
     23 ;
     24 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_128:
     25 ; X64:       # %bb.0:
     26 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
     27 ; X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0xb5,0xda]
     28 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
     29 ; X64-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0]
     30 ; X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb5,0xe2]
     31 ; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
     32 ; X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb5,0xc2]
     33 ; X64-NEXT:    vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0]
     34 ; X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb5,0xd2]
     35 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
     36 ; X64-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
     37 ; X64-NEXT:    retq # encoding: [0xc3]
     38 
     39   %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
     40   %2 = bitcast i8 %x3 to <8 x i1>
     41   %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
     42   %3 = select <2 x i1> %extract2, <2 x i64> %1, <2 x i64> %x0
     43   %4 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer)
     44   %5 = bitcast i8 %x3 to <8 x i1>
     45   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
     46   %6 = select <2 x i1> %extract1, <2 x i64> %4, <2 x i64> %x0
     47   %7 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer)
     48   %8 = bitcast i8 %x3 to <8 x i1>
     49   %extract = shufflevector <8 x i1> %8, <8 x i1> %8, <2 x i32> <i32 0, i32 1>
     50   %9 = select <2 x i1> %extract, <2 x i64> %7, <2 x i64> zeroinitializer
     51   %10 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
     52   %res4 = add <2 x i64> %3, %6
     53   %res5 = add <2 x i64> %10, %9
     54   %res6 = add <2 x i64> %res5, %res4
     55   ret <2 x i64> %res6
     56 }
     57 
     58 declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
     59 
     60 define <4 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
     61 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_256:
     62 ; X86:       # %bb.0:
     63 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
     64 ; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0xb5,0xda]
     65 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
     66 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
     67 ; X86-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0]
     68 ; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb5,0xe2]
     69 ; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
     70 ; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb5,0xc2]
     71 ; X86-NEXT:    vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0]
     72 ; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb5,0xd2]
     73 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
     74 ; X86-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
     75 ; X86-NEXT:    retl # encoding: [0xc3]
     76 ;
     77 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_256:
     78 ; X64:       # %bb.0:
     79 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
     80 ; X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0xb5,0xda]
     81 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
     82 ; X64-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0]
     83 ; X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb5,0xe2]
     84 ; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
     85 ; X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb5,0xc2]
     86 ; X64-NEXT:    vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0]
     87 ; X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb5,0xd2]
     88 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
     89 ; X64-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
     90 ; X64-NEXT:    retq # encoding: [0xc3]
     91 
     92   %1 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
     93   %2 = bitcast i8 %x3 to <8 x i1>
     94   %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     95   %3 = select <4 x i1> %extract2, <4 x i64> %1, <4 x i64> %x0
     96   %4 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer)
     97   %5 = bitcast i8 %x3 to <8 x i1>
     98   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     99   %6 = select <4 x i1> %extract1, <4 x i64> %4, <4 x i64> %x0
    100   %7 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer)
    101   %8 = bitcast i8 %x3 to <8 x i1>
    102   %extract = shufflevector <8 x i1> %8, <8 x i1> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    103   %9 = select <4 x i1> %extract, <4 x i64> %7, <4 x i64> zeroinitializer
    104   %10 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
    105   %res4 = add <4 x i64> %3, %6
    106   %res5 = add <4 x i64> %10, %9
    107   %res6 = add <4 x i64> %res5, %res4
    108   ret <4 x i64> %res6
    109 }
    110 
    111 define <2 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
    112 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_128:
    113 ; X86:       # %bb.0:
    114 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
    115 ; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0xb5,0xda]
    116 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    117 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    118 ; X86-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0]
    119 ; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb5,0xe2]
    120 ; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
    121 ; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb5,0xc2]
    122 ; X86-NEXT:    vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0]
    123 ; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb5,0xd2]
    124 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
    125 ; X86-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
    126 ; X86-NEXT:    retl # encoding: [0xc3]
    127 ;
    128 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_128:
    129 ; X64:       # %bb.0:
    130 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
    131 ; X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0xb5,0xda]
    132 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    133 ; X64-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0]
    134 ; X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb5,0xe2]
    135 ; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
    136 ; X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb5,0xc2]
    137 ; X64-NEXT:    vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0]
    138 ; X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb5,0xd2]
    139 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
    140 ; X64-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
    141 ; X64-NEXT:    retq # encoding: [0xc3]
    142 
    143   %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
    144   %2 = bitcast i8 %x3 to <8 x i1>
    145   %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
    146   %3 = select <2 x i1> %extract2, <2 x i64> %1, <2 x i64> zeroinitializer
    147   %4 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer)
    148   %5 = bitcast i8 %x3 to <8 x i1>
    149   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
    150   %6 = select <2 x i1> %extract1, <2 x i64> %4, <2 x i64> zeroinitializer
    151   %7 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer)
    152   %8 = bitcast i8 %x3 to <8 x i1>
    153   %extract = shufflevector <8 x i1> %8, <8 x i1> %8, <2 x i32> <i32 0, i32 1>
    154   %9 = select <2 x i1> %extract, <2 x i64> %7, <2 x i64> zeroinitializer
    155   %10 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
    156   %res4 = add <2 x i64> %3, %6
    157   %res5 = add <2 x i64> %10, %9
    158   %res6 = add <2 x i64> %res5, %res4
    159   ret <2 x i64> %res6
    160 }
    161 
    162 define <4 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
    163 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_256:
    164 ; X86:       # %bb.0:
    165 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
    166 ; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0xb5,0xda]
    167 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    168 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    169 ; X86-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0]
    170 ; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb5,0xe2]
    171 ; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
    172 ; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb5,0xc2]
    173 ; X86-NEXT:    vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0]
    174 ; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb5,0xd2]
    175 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
    176 ; X86-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
    177 ; X86-NEXT:    retl # encoding: [0xc3]
    178 ;
    179 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_256:
    180 ; X64:       # %bb.0:
    181 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
    182 ; X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0xb5,0xda]
    183 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    184 ; X64-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0]
    185 ; X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb5,0xe2]
    186 ; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
    187 ; X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb5,0xc2]
    188 ; X64-NEXT:    vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0]
    189 ; X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb5,0xd2]
    190 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
    191 ; X64-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
    192 ; X64-NEXT:    retq # encoding: [0xc3]
    193 
    194   %1 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
    195   %2 = bitcast i8 %x3 to <8 x i1>
    196   %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    197   %3 = select <4 x i1> %extract2, <4 x i64> %1, <4 x i64> zeroinitializer
    198   %4 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer)
    199   %5 = bitcast i8 %x3 to <8 x i1>
    200   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    201   %6 = select <4 x i1> %extract1, <4 x i64> %4, <4 x i64> zeroinitializer
    202   %7 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer)
    203   %8 = bitcast i8 %x3 to <8 x i1>
    204   %extract = shufflevector <8 x i1> %8, <8 x i1> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    205   %9 = select <4 x i1> %extract, <4 x i64> %7, <4 x i64> zeroinitializer
    206   %10 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
    207   %res4 = add <4 x i64> %3, %6
    208   %res5 = add <4 x i64> %10, %9
    209   %res6 = add <4 x i64> %res5, %res4
    210   ret <4 x i64> %res6
    211 }
    212 
    213 declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
    214 
    215 define <2 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
    216 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_128:
    217 ; X86:       # %bb.0:
    218 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
    219 ; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0xb4,0xda]
    220 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    221 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    222 ; X86-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0]
    223 ; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb4,0xe2]
    224 ; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
    225 ; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb4,0xc2]
    226 ; X86-NEXT:    vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0]
    227 ; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb4,0xd2]
    228 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
    229 ; X86-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
    230 ; X86-NEXT:    retl # encoding: [0xc3]
    231 ;
    232 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_128:
    233 ; X64:       # %bb.0:
    234 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
    235 ; X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0xb4,0xda]
    236 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    237 ; X64-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0]
    238 ; X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb4,0xe2]
    239 ; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
    240 ; X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb4,0xc2]
    241 ; X64-NEXT:    vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0]
    242 ; X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb4,0xd2]
    243 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
    244 ; X64-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
    245 ; X64-NEXT:    retq # encoding: [0xc3]
    246 
    247   %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
    248   %2 = bitcast i8 %x3 to <8 x i1>
    249   %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
    250   %3 = select <2 x i1> %extract2, <2 x i64> %1, <2 x i64> %x0
    251   %4 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer)
    252   %5 = bitcast i8 %x3 to <8 x i1>
    253   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
    254   %6 = select <2 x i1> %extract1, <2 x i64> %4, <2 x i64> %x0
    255   %7 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer)
    256   %8 = bitcast i8 %x3 to <8 x i1>
    257   %extract = shufflevector <8 x i1> %8, <8 x i1> %8, <2 x i32> <i32 0, i32 1>
    258   %9 = select <2 x i1> %extract, <2 x i64> %7, <2 x i64> zeroinitializer
    259   %10 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
    260   %res4 = add <2 x i64> %3, %6
    261   %res5 = add <2 x i64> %10, %9
    262   %res6 = add <2 x i64> %res5, %res4
    263   ret <2 x i64> %res6
    264 }
    265 
    266 declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
    267 
    268 define <4 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
    269 ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_256:
    270 ; X86:       # %bb.0:
    271 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
    272 ; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0xb4,0xda]
    273 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    274 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    275 ; X86-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0]
    276 ; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb4,0xe2]
    277 ; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
    278 ; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb4,0xc2]
    279 ; X86-NEXT:    vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0]
    280 ; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb4,0xd2]
    281 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
    282 ; X86-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
    283 ; X86-NEXT:    retl # encoding: [0xc3]
    284 ;
    285 ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_256:
    286 ; X64:       # %bb.0:
    287 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
    288 ; X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0xb4,0xda]
    289 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    290 ; X64-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0]
    291 ; X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb4,0xe2]
    292 ; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
    293 ; X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb4,0xc2]
    294 ; X64-NEXT:    vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0]
    295 ; X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb4,0xd2]
    296 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
    297 ; X64-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
    298 ; X64-NEXT:    retq # encoding: [0xc3]
    299 
    300   %1 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
    301   %2 = bitcast i8 %x3 to <8 x i1>
    302   %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    303   %3 = select <4 x i1> %extract2, <4 x i64> %1, <4 x i64> %x0
    304   %4 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer)
    305   %5 = bitcast i8 %x3 to <8 x i1>
    306   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    307   %6 = select <4 x i1> %extract1, <4 x i64> %4, <4 x i64> %x0
    308   %7 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer)
    309   %8 = bitcast i8 %x3 to <8 x i1>
    310   %extract = shufflevector <8 x i1> %8, <8 x i1> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    311   %9 = select <4 x i1> %extract, <4 x i64> %7, <4 x i64> zeroinitializer
    312   %10 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
    313   %res4 = add <4 x i64> %3, %6
    314   %res5 = add <4 x i64> %10, %9
    315   %res6 = add <4 x i64> %res5, %res4
    316   ret <4 x i64> %res6
    317 }
    318 
    319 define <2 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
    320 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_128:
    321 ; X86:       # %bb.0:
    322 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
    323 ; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0xb4,0xda]
    324 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    325 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    326 ; X86-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0]
    327 ; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb4,0xe2]
    328 ; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
    329 ; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb4,0xc2]
    330 ; X86-NEXT:    vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0]
    331 ; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb4,0xd2]
    332 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
    333 ; X86-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
    334 ; X86-NEXT:    retl # encoding: [0xc3]
    335 ;
    336 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_128:
    337 ; X64:       # %bb.0:
    338 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
    339 ; X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0xb4,0xda]
    340 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    341 ; X64-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0]
    342 ; X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb4,0xe2]
    343 ; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
    344 ; X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb4,0xc2]
    345 ; X64-NEXT:    vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0]
    346 ; X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb4,0xd2]
    347 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
    348 ; X64-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
    349 ; X64-NEXT:    retq # encoding: [0xc3]
    350 
    351   %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
    352   %2 = bitcast i8 %x3 to <8 x i1>
    353   %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
    354   %3 = select <2 x i1> %extract2, <2 x i64> %1, <2 x i64> zeroinitializer
    355   %4 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer)
    356   %5 = bitcast i8 %x3 to <8 x i1>
    357   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
    358   %6 = select <2 x i1> %extract1, <2 x i64> %4, <2 x i64> zeroinitializer
    359   %7 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer)
    360   %8 = bitcast i8 %x3 to <8 x i1>
    361   %extract = shufflevector <8 x i1> %8, <8 x i1> %8, <2 x i32> <i32 0, i32 1>
    362   %9 = select <2 x i1> %extract, <2 x i64> %7, <2 x i64> zeroinitializer
    363   %10 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
    364   %res4 = add <2 x i64> %3, %6
    365   %res5 = add <2 x i64> %10, %9
    366   %res6 = add <2 x i64> %res5, %res4
    367   ret <2 x i64> %res6
    368 }
    369 
    370 define <4 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
    371 ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_256:
    372 ; X86:       # %bb.0:
    373 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
    374 ; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0xb4,0xda]
    375 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    376 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    377 ; X86-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0]
    378 ; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb4,0xe2]
    379 ; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
    380 ; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb4,0xc2]
    381 ; X86-NEXT:    vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0]
    382 ; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb4,0xd2]
    383 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
    384 ; X86-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
    385 ; X86-NEXT:    retl # encoding: [0xc3]
    386 ;
    387 ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_256:
    388 ; X64:       # %bb.0:
    389 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
    390 ; X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0xb4,0xda]
    391 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    392 ; X64-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0]
    393 ; X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb4,0xe2]
    394 ; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
    395 ; X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb4,0xc2]
    396 ; X64-NEXT:    vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0]
    397 ; X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb4,0xd2]
    398 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
    399 ; X64-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
    400 ; X64-NEXT:    retq # encoding: [0xc3]
    401 
    402   %1 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
    403   %2 = bitcast i8 %x3 to <8 x i1>
    404   %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    405   %3 = select <4 x i1> %extract2, <4 x i64> %1, <4 x i64> zeroinitializer
    406   %4 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer)
    407   %5 = bitcast i8 %x3 to <8 x i1>
    408   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    409   %6 = select <4 x i1> %extract1, <4 x i64> %4, <4 x i64> zeroinitializer
    410   %7 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer)
    411   %8 = bitcast i8 %x3 to <8 x i1>
    412   %extract = shufflevector <8 x i1> %8, <8 x i1> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    413   %9 = select <4 x i1> %extract, <4 x i64> %7, <4 x i64> zeroinitializer
    414   %10 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
    415   %res4 = add <4 x i64> %3, %6
    416   %res5 = add <4 x i64> %10, %9
    417   %res6 = add <4 x i64> %res5, %res4
    418   ret <4 x i64> %res6
    419 }
    420