Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl -mattr=+avx512ifma | FileCheck %s
      3 
      4 declare <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
      5 
      6 define <2 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
      7 ; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_128:
      8 ; CHECK:       ## BB#0:
      9 ; CHECK-NEXT:    kmovw %edi, %k1
     10 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
     11 ; CHECK-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm3 {%k1}
     12 ; CHECK-NEXT:    vmovaps %zmm0, %zmm4
     13 ; CHECK-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm4
     14 ; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2
     15 ; CHECK-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1}
     16 ; CHECK-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z}
     17 ; CHECK-NEXT:    vpaddq %xmm0, %xmm3, %xmm0
     18 ; CHECK-NEXT:    vpaddq %xmm2, %xmm4, %xmm1
     19 ; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
     20 ; CHECK-NEXT:    retq
     21 
     22   %res = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
     23   %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
     24   %res2 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
     25   %res3 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
     26   %res4 = add <2 x i64> %res, %res1
     27   %res5 = add <2 x i64> %res3, %res2
     28   %res6 = add <2 x i64> %res5, %res4
     29   ret <2 x i64> %res6
     30 }
     31 
     32 declare <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
     33 
     34 define <4 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
     35 ; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_256:
     36 ; CHECK:       ## BB#0:
     37 ; CHECK-NEXT:    kmovw %edi, %k1
     38 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
     39 ; CHECK-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm3 {%k1}
     40 ; CHECK-NEXT:    vmovaps %zmm0, %zmm4
     41 ; CHECK-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm4
     42 ; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2
     43 ; CHECK-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1}
     44 ; CHECK-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z}
     45 ; CHECK-NEXT:    vpaddq %ymm0, %ymm3, %ymm0
     46 ; CHECK-NEXT:    vpaddq %ymm2, %ymm4, %ymm1
     47 ; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
     48 ; CHECK-NEXT:    retq
     49 
     50   %res = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
     51   %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
     52   %res2 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
     53   %res3 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
     54   %res4 = add <4 x i64> %res, %res1
     55   %res5 = add <4 x i64> %res3, %res2
     56   %res6 = add <4 x i64> %res5, %res4
     57   ret <4 x i64> %res6
     58 }
     59 
     60 declare <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
     61 
     62 define <2 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
     63 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_128:
     64 ; CHECK:       ## BB#0:
     65 ; CHECK-NEXT:    kmovw %edi, %k1
     66 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
     67 ; CHECK-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm3 {%k1} {z}
     68 ; CHECK-NEXT:    vmovaps %zmm0, %zmm4
     69 ; CHECK-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm4
     70 ; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2
     71 ; CHECK-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z}
     72 ; CHECK-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z}
     73 ; CHECK-NEXT:    vpaddq %xmm0, %xmm3, %xmm0
     74 ; CHECK-NEXT:    vpaddq %xmm2, %xmm4, %xmm1
     75 ; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
     76 ; CHECK-NEXT:    retq
     77 
     78   %res = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
     79   %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
     80   %res2 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
     81   %res3 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
     82   %res4 = add <2 x i64> %res, %res1
     83   %res5 = add <2 x i64> %res3, %res2
     84   %res6 = add <2 x i64> %res5, %res4
     85   ret <2 x i64> %res6
     86 }
     87 
     88 declare <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
     89 
     90 define <4 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
     91 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_256:
     92 ; CHECK:       ## BB#0:
     93 ; CHECK-NEXT:    kmovw %edi, %k1
     94 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
     95 ; CHECK-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm3 {%k1} {z}
     96 ; CHECK-NEXT:    vmovaps %zmm0, %zmm4
     97 ; CHECK-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm4
     98 ; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2
     99 ; CHECK-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z}
    100 ; CHECK-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z}
    101 ; CHECK-NEXT:    vpaddq %ymm0, %ymm3, %ymm0
    102 ; CHECK-NEXT:    vpaddq %ymm2, %ymm4, %ymm1
    103 ; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
    104 ; CHECK-NEXT:    retq
    105 
    106   %res = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
    107   %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
    108   %res2 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
    109   %res3 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
    110   %res4 = add <4 x i64> %res, %res1
    111   %res5 = add <4 x i64> %res3, %res2
    112   %res6 = add <4 x i64> %res5, %res4
    113   ret <4 x i64> %res6
    114 }
    115 
    116 declare <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
    117 
    118 define <2 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
    119 ; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_128:
    120 ; CHECK:       ## BB#0:
    121 ; CHECK-NEXT:    kmovw %edi, %k1
    122 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    123 ; CHECK-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm3 {%k1}
    124 ; CHECK-NEXT:    vmovaps %zmm0, %zmm4
    125 ; CHECK-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm4
    126 ; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2
    127 ; CHECK-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1}
    128 ; CHECK-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z}
    129 ; CHECK-NEXT:    vpaddq %xmm0, %xmm3, %xmm0
    130 ; CHECK-NEXT:    vpaddq %xmm2, %xmm4, %xmm1
    131 ; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
    132 ; CHECK-NEXT:    retq
    133 
    134   %res = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
    135   %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
    136   %res2 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
    137   %res3 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
    138   %res4 = add <2 x i64> %res, %res1
    139   %res5 = add <2 x i64> %res3, %res2
    140   %res6 = add <2 x i64> %res5, %res4
    141   ret <2 x i64> %res6
    142 }
    143 
    144 declare <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
    145 
    146 define <4 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
    147 ; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_256:
    148 ; CHECK:       ## BB#0:
    149 ; CHECK-NEXT:    kmovw %edi, %k1
    150 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    151 ; CHECK-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm3 {%k1}
    152 ; CHECK-NEXT:    vmovaps %zmm0, %zmm4
    153 ; CHECK-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm4
    154 ; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2
    155 ; CHECK-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1}
    156 ; CHECK-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z}
    157 ; CHECK-NEXT:    vpaddq %ymm0, %ymm3, %ymm0
    158 ; CHECK-NEXT:    vpaddq %ymm2, %ymm4, %ymm1
    159 ; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
    160 ; CHECK-NEXT:    retq
    161 
    162   %res = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
    163   %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
    164   %res2 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
    165   %res3 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
    166   %res4 = add <4 x i64> %res, %res1
    167   %res5 = add <4 x i64> %res3, %res2
    168   %res6 = add <4 x i64> %res5, %res4
    169   ret <4 x i64> %res6
    170 }
    171 
    172 declare <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
    173 
    174 define <2 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
    175 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_128:
    176 ; CHECK:       ## BB#0:
    177 ; CHECK-NEXT:    kmovw %edi, %k1
    178 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    179 ; CHECK-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm3 {%k1} {z}
    180 ; CHECK-NEXT:    vmovaps %zmm0, %zmm4
    181 ; CHECK-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm4
    182 ; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2
    183 ; CHECK-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z}
    184 ; CHECK-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z}
    185 ; CHECK-NEXT:    vpaddq %xmm0, %xmm3, %xmm0
    186 ; CHECK-NEXT:    vpaddq %xmm2, %xmm4, %xmm1
    187 ; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
    188 ; CHECK-NEXT:    retq
    189 
    190   %res = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
    191   %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
    192   %res2 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
    193   %res3 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
    194   %res4 = add <2 x i64> %res, %res1
    195   %res5 = add <2 x i64> %res3, %res2
    196   %res6 = add <2 x i64> %res5, %res4
    197   ret <2 x i64> %res6
    198 }
    199 
    200 declare <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
    201 
    202 define <4 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
    203 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_256:
    204 ; CHECK:       ## BB#0:
    205 ; CHECK-NEXT:    kmovw %edi, %k1
    206 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    207 ; CHECK-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm3 {%k1} {z}
    208 ; CHECK-NEXT:    vmovaps %zmm0, %zmm4
    209 ; CHECK-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm4
    210 ; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2
    211 ; CHECK-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z}
    212 ; CHECK-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z}
    213 ; CHECK-NEXT:    vpaddq %ymm0, %ymm3, %ymm0
    214 ; CHECK-NEXT:    vpaddq %ymm2, %ymm4, %ymm1
    215 ; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
    216 ; CHECK-NEXT:    retq
    217 
    218   %res = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
    219   %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
    220   %res2 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
    221   %res3 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
    222   %res4 = add <4 x i64> %res, %res1
    223   %res5 = add <4 x i64> %res3, %res2
    224   %res6 = add <4 x i64> %res5, %res4
    225   ret <4 x i64> %res6
    226 }
    227