Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=SKX
      3 
      4 ; CHECK-LABEL: test_x86_fmadd_ps_z
      5 ; CHECK: vfmadd213ps     %zmm2, %zmm1, %zmm0
      6 ; CHECK: ret
      7 define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
      8   %x = fmul <16 x float> %a0, %a1
      9   %res = fadd <16 x float> %x, %a2
     10   ret <16 x float> %res
     11 }
     12 
     13 ; CHECK-LABEL: test_x86_fmsub_ps_z
     14 ; CHECK: vfmsub213ps     %zmm2, %zmm1, %zmm0
     15 ; CHECK: ret
     16 define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
     17   %x = fmul <16 x float> %a0, %a1
     18   %res = fsub <16 x float> %x, %a2
     19   ret <16 x float> %res
     20 }
     21 
     22 ; CHECK-LABEL: test_x86_fnmadd_ps_z
     23 ; CHECK: vfnmadd213ps     %zmm2, %zmm1, %zmm0
     24 ; CHECK: ret
     25 define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
     26   %x = fmul <16 x float> %a0, %a1
     27   %res = fsub <16 x float> %a2, %x
     28   ret <16 x float> %res
     29 }
     30 
     31 ; CHECK-LABEL: test_x86_fnmsub_ps_z
     32 ; CHECK: vfnmsub213ps     %zmm2, %zmm1, %zmm0
     33 ; CHECK: ret
     34 define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
     35   %x = fmul <16 x float> %a0, %a1
     36   %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 
     37                           float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
     38 						  float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 
     39 						  float -0.000000e+00>, %x
     40   %res = fsub <16 x float> %y, %a2
     41   ret <16 x float> %res
     42 }
     43 
     44 ; CHECK-LABEL: test_x86_fmadd_pd_z
     45 ; CHECK: vfmadd213pd     %zmm2, %zmm1, %zmm0
     46 ; CHECK: ret
     47 define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
     48   %x = fmul <8 x double> %a0, %a1
     49   %res = fadd <8 x double> %x, %a2
     50   ret <8 x double> %res
     51 }
     52 
     53 ; CHECK-LABEL: test_x86_fmsub_pd_z
     54 ; CHECK: vfmsub213pd     %zmm2, %zmm1, %zmm0
     55 ; CHECK: ret
     56 define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
     57   %x = fmul <8 x double> %a0, %a1
     58   %res = fsub <8 x double> %x, %a2
     59   ret <8 x double> %res
     60 }
     61 
     62 define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
     63 ; CHECK-LABEL: test_x86_fmsub_213:
     64 ; CHECK:       ## BB#0:
     65 ; CHECK-NEXT:    vfmsub213sd %xmm2, %xmm0, %xmm1
     66 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
     67 ; CHECK-NEXT:    retq
     68   %x = fmul double %a0, %a1
     69   %res = fsub double %x, %a2
     70   ret double %res
     71 }
     72 
     73 define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
     74 ; CHECK-LABEL: test_x86_fmsub_213_m:
     75 ; CHECK:       ## BB#0:
     76 ; CHECK-NEXT:    vfmsub213sd (%rdi), %xmm0, %xmm1
     77 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
     78 ; CHECK-NEXT:    retq
     79   %a2 = load double , double *%a2_ptr
     80   %x = fmul double %a0, %a1
     81   %res = fsub double %x, %a2
     82   ret double %res
     83 }
     84 
     85 define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
     86 ; CHECK-LABEL: test_x86_fmsub_231_m:
     87 ; CHECK:       ## BB#0:
     88 ; CHECK-NEXT:    vfmsub231sd (%rdi), %xmm0, %xmm1
     89 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
     90 ; CHECK-NEXT:    retq
     91   %a2 = load double , double *%a2_ptr
     92   %x = fmul double %a0, %a2
     93   %res = fsub double %x, %a1
     94   ret double %res
     95 }
     96 
     97 define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
     98 ; CHECK-LABEL: test231_br:
     99 ; CHECK:       ## BB#0:
    100 ; CHECK-NEXT:    vfmadd231ps {{.*}}(%rip){1to16}, %zmm0, %zmm1
    101 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
    102 ; CHECK-NEXT:    retq
    103   %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
    104   %b2 = fadd <16 x float> %b1, %a2
    105   ret <16 x float> %b2
    106 }
    107 
    108 define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
    109 ; CHECK-LABEL: test213_br:
    110 ; CHECK:       ## BB#0:
    111 ; CHECK-NEXT:    vfmadd213ps {{.*}}(%rip){1to16}, %zmm1, %zmm0
    112 ; CHECK-NEXT:    retq
    113   %b1 = fmul <16 x float> %a1, %a2
    114   %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
    115   ret <16 x float> %b2
    116 }
    117 
    118 ;mask (a*c+b , a)
    119 define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
    120 ; CHECK-LABEL: test_x86_fmadd132_ps:
    121 ; CHECK:       ## BB#0:
    122 ; CHECK-NEXT:    vpmovsxbd %xmm2, %zmm2
    123 ; CHECK-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
    124 ; CHECK-NEXT:    vptestmd %zmm2, %zmm2, %k1
    125 ; CHECK-NEXT:    vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
    126 ; CHECK-NEXT:    retq
    127 ;
    128 ; SKX-LABEL: test_x86_fmadd132_ps:
    129 ; SKX:       ## BB#0:
    130 ; SKX-NEXT:    vpmovb2m %xmm2, %k1
    131 ; SKX-NEXT:    vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
    132 ; SKX-NEXT:    retq
    133   %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
    134   %x = fmul <16 x float> %a0, %a2
    135   %y = fadd <16 x float> %x, %a1
    136   %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
    137   ret <16 x float> %res
    138 }
    139 
    140 ;mask (a*c+b , b)
    141 define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
    142 ; CHECK-LABEL: test_x86_fmadd231_ps:
    143 ; CHECK:       ## BB#0:
    144 ; CHECK-NEXT:    vpmovsxbd %xmm2, %zmm2
    145 ; CHECK-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
    146 ; CHECK-NEXT:    vptestmd %zmm2, %zmm2, %k1
    147 ; CHECK-NEXT:    vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
    148 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
    149 ; CHECK-NEXT:    retq
    150 ;
    151 ; SKX-LABEL: test_x86_fmadd231_ps:
    152 ; SKX:       ## BB#0:
    153 ; SKX-NEXT:    vpmovb2m %xmm2, %k1
    154 ; SKX-NEXT:    vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
    155 ; SKX-NEXT:    vmovaps %zmm1, %zmm0
    156 ; SKX-NEXT:    retq
    157   %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
    158   %x = fmul <16 x float> %a0, %a2
    159   %y = fadd <16 x float> %x, %a1
    160   %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
    161   ret <16 x float> %res
    162 }
    163 
    164 ;mask (b*a+c , b)
    165 define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
    166 ; CHECK-LABEL: test_x86_fmadd213_ps:
    167 ; CHECK:       ## BB#0:
    168 ; CHECK-NEXT:    vpmovsxbd %xmm2, %zmm2
    169 ; CHECK-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
    170 ; CHECK-NEXT:    vptestmd %zmm2, %zmm2, %k1
    171 ; CHECK-NEXT:    vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
    172 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
    173 ; CHECK-NEXT:    retq
    174 ;
    175 ; SKX-LABEL: test_x86_fmadd213_ps:
    176 ; SKX:       ## BB#0:
    177 ; SKX-NEXT:    vpmovb2m %xmm2, %k1
    178 ; SKX-NEXT:    vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
    179 ; SKX-NEXT:    vmovaps %zmm1, %zmm0
    180 ; SKX-NEXT:    retq
    181   %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
    182   %x = fmul <16 x float> %a1, %a0
    183   %y = fadd <16 x float> %x, %a2
    184   %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
    185   ret <16 x float> %res
    186 }
    187 
    188