Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
      3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
      4 
      5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512ifmavl-builtins.c
      6 
      7 define <2 x i64> @test_mm_madd52hi_epu64(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) {
      8 ; CHECK-LABEL: test_mm_madd52hi_epu64:
      9 ; CHECK:       # %bb.0: # %entry
     10 ; CHECK-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0
     11 ; CHECK-NEXT:    ret{{[l|q]}}
     12 entry:
     13   %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z)
     14   ret <2 x i64> %0
     15 }
     16 
     17 define <2 x i64> @test_mm_mask_madd52hi_epu64(<2 x i64> %__W, i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) {
     18 ; X86-LABEL: test_mm_mask_madd52hi_epu64:
     19 ; X86:       # %bb.0: # %entry
     20 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
     21 ; X86-NEXT:    kmovw %eax, %k1
     22 ; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1}
     23 ; X86-NEXT:    retl
     24 ;
     25 ; X64-LABEL: test_mm_mask_madd52hi_epu64:
     26 ; X64:       # %bb.0: # %entry
     27 ; X64-NEXT:    kmovw %edi, %k1
     28 ; X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1}
     29 ; X64-NEXT:    retq
     30 entry:
     31   %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %__W, <2 x i64> %__X, <2 x i64> %__Y)
     32   %1 = bitcast i8 %__M to <8 x i1>
     33   %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
     34   %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W
     35   ret <2 x i64> %2
     36 }
     37 
     38 define <2 x i64> @test_mm_maskz_madd52hi_epu64(i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) {
     39 ; X86-LABEL: test_mm_maskz_madd52hi_epu64:
     40 ; X86:       # %bb.0: # %entry
     41 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
     42 ; X86-NEXT:    kmovw %eax, %k1
     43 ; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z}
     44 ; X86-NEXT:    retl
     45 ;
     46 ; X64-LABEL: test_mm_maskz_madd52hi_epu64:
     47 ; X64:       # %bb.0: # %entry
     48 ; X64-NEXT:    kmovw %edi, %k1
     49 ; X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z}
     50 ; X64-NEXT:    retq
     51 entry:
     52   %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z)
     53   %1 = bitcast i8 %__M to <8 x i1>
     54   %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
     55   %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer
     56   ret <2 x i64> %2
     57 }
     58 
     59 define <4 x i64> @test_mm256_madd52hi_epu64(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) {
     60 ; CHECK-LABEL: test_mm256_madd52hi_epu64:
     61 ; CHECK:       # %bb.0: # %entry
     62 ; CHECK-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0
     63 ; CHECK-NEXT:    ret{{[l|q]}}
     64 entry:
     65   %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z)
     66   ret <4 x i64> %0
     67 }
     68 
     69 define <4 x i64> @test_mm256_mask_madd52hi_epu64(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) {
     70 ; X86-LABEL: test_mm256_mask_madd52hi_epu64:
     71 ; X86:       # %bb.0: # %entry
     72 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
     73 ; X86-NEXT:    kmovw %eax, %k1
     74 ; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1}
     75 ; X86-NEXT:    retl
     76 ;
     77 ; X64-LABEL: test_mm256_mask_madd52hi_epu64:
     78 ; X64:       # %bb.0: # %entry
     79 ; X64-NEXT:    kmovw %edi, %k1
     80 ; X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1}
     81 ; X64-NEXT:    retq
     82 entry:
     83   %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %__W, <4 x i64> %__X, <4 x i64> %__Y)
     84   %1 = bitcast i8 %__M to <8 x i1>
     85   %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     86   %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W
     87   ret <4 x i64> %2
     88 }
     89 
     90 define <4 x i64> @test_mm256_maskz_madd52hi_epu64(i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) {
     91 ; X86-LABEL: test_mm256_maskz_madd52hi_epu64:
     92 ; X86:       # %bb.0: # %entry
     93 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
     94 ; X86-NEXT:    kmovw %eax, %k1
     95 ; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z}
     96 ; X86-NEXT:    retl
     97 ;
     98 ; X64-LABEL: test_mm256_maskz_madd52hi_epu64:
     99 ; X64:       # %bb.0: # %entry
    100 ; X64-NEXT:    kmovw %edi, %k1
    101 ; X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z}
    102 ; X64-NEXT:    retq
    103 entry:
    104   %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z)
    105   %1 = bitcast i8 %__M to <8 x i1>
    106   %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    107   %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer
    108   ret <4 x i64> %2
    109 }
    110 
    111 define <2 x i64> @test_mm_madd52lo_epu64(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) {
    112 ; CHECK-LABEL: test_mm_madd52lo_epu64:
    113 ; CHECK:       # %bb.0: # %entry
    114 ; CHECK-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0
    115 ; CHECK-NEXT:    ret{{[l|q]}}
    116 entry:
    117   %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z)
    118   ret <2 x i64> %0
    119 }
    120 
    121 define <2 x i64> @test_mm_mask_madd52lo_epu64(<2 x i64> %__W, i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) {
    122 ; X86-LABEL: test_mm_mask_madd52lo_epu64:
    123 ; X86:       # %bb.0: # %entry
    124 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    125 ; X86-NEXT:    kmovw %eax, %k1
    126 ; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1}
    127 ; X86-NEXT:    retl
    128 ;
    129 ; X64-LABEL: test_mm_mask_madd52lo_epu64:
    130 ; X64:       # %bb.0: # %entry
    131 ; X64-NEXT:    kmovw %edi, %k1
    132 ; X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1}
    133 ; X64-NEXT:    retq
    134 entry:
    135   %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %__W, <2 x i64> %__X, <2 x i64> %__Y)
    136   %1 = bitcast i8 %__M to <8 x i1>
    137   %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    138   %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W
    139   ret <2 x i64> %2
    140 }
    141 
    142 define <2 x i64> @test_mm_maskz_madd52lo_epu64(i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) {
    143 ; X86-LABEL: test_mm_maskz_madd52lo_epu64:
    144 ; X86:       # %bb.0: # %entry
    145 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    146 ; X86-NEXT:    kmovw %eax, %k1
    147 ; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z}
    148 ; X86-NEXT:    retl
    149 ;
    150 ; X64-LABEL: test_mm_maskz_madd52lo_epu64:
    151 ; X64:       # %bb.0: # %entry
    152 ; X64-NEXT:    kmovw %edi, %k1
    153 ; X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z}
    154 ; X64-NEXT:    retq
    155 entry:
    156   %0 = tail call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z)
    157   %1 = bitcast i8 %__M to <8 x i1>
    158   %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    159   %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer
    160   ret <2 x i64> %2
    161 }
    162 
    163 define <4 x i64> @test_mm256_madd52lo_epu64(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) {
    164 ; CHECK-LABEL: test_mm256_madd52lo_epu64:
    165 ; CHECK:       # %bb.0: # %entry
    166 ; CHECK-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0
    167 ; CHECK-NEXT:    ret{{[l|q]}}
    168 entry:
    169   %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z)
    170   ret <4 x i64> %0
    171 }
    172 
    173 define <4 x i64> @test_mm256_mask_madd52lo_epu64(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) {
    174 ; X86-LABEL: test_mm256_mask_madd52lo_epu64:
    175 ; X86:       # %bb.0: # %entry
    176 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    177 ; X86-NEXT:    kmovw %eax, %k1
    178 ; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1}
    179 ; X86-NEXT:    retl
    180 ;
    181 ; X64-LABEL: test_mm256_mask_madd52lo_epu64:
    182 ; X64:       # %bb.0: # %entry
    183 ; X64-NEXT:    kmovw %edi, %k1
    184 ; X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1}
    185 ; X64-NEXT:    retq
    186 entry:
    187   %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %__W, <4 x i64> %__X, <4 x i64> %__Y)
    188   %1 = bitcast i8 %__M to <8 x i1>
    189   %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    190   %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W
    191   ret <4 x i64> %2
    192 }
    193 
    194 define <4 x i64> @test_mm256_maskz_madd52lo_epu64(i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) {
    195 ; X86-LABEL: test_mm256_maskz_madd52lo_epu64:
    196 ; X86:       # %bb.0: # %entry
    197 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    198 ; X86-NEXT:    kmovw %eax, %k1
    199 ; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z}
    200 ; X86-NEXT:    retl
    201 ;
    202 ; X64-LABEL: test_mm256_maskz_madd52lo_epu64:
    203 ; X64:       # %bb.0: # %entry
    204 ; X64-NEXT:    kmovw %edi, %k1
    205 ; X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z}
    206 ; X64-NEXT:    retq
    207 entry:
    208   %0 = tail call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z)
    209   %1 = bitcast i8 %__M to <8 x i1>
    210   %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    211   %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer
    212   ret <4 x i64> %2
    213 }
    214 
    215 declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
    216 declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
    217 declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
    218 declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
    219