Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma | FileCheck -check-prefix=FMA3 -check-prefix=FMA3_256 %s
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma,+avx512f | FileCheck -check-prefix=FMA3 -check-prefix=FMA3_512 %s
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma4 | FileCheck -check-prefix=FMA4 %s
      5 
      6 ; This test checks the fusing of MUL + SUB/ADD to FMSUBADD.
      7 
      8 define <2 x double> @mul_subadd_pd128(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 {
      9 ; FMA3_256-LABEL: mul_subadd_pd128:
     10 ; FMA3_256:       # %bb.0: # %entry
     11 ; FMA3_256-NEXT:    vfmsubadd213pd %xmm2, %xmm1, %xmm0
     12 ; FMA3_256-NEXT:    retq
     13 ;
     14 ; FMA3_512-LABEL: mul_subadd_pd128:
     15 ; FMA3_512:       # %bb.0: # %entry
     16 ; FMA3_512-NEXT:    vfmsubadd213pd %xmm2, %xmm1, %xmm0
     17 ; FMA3_512-NEXT:    retq
     18 ;
     19 ; FMA4-LABEL: mul_subadd_pd128:
     20 ; FMA4:       # %bb.0: # %entry
     21 ; FMA4-NEXT:    vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
     22 ; FMA4-NEXT:    retq
     23 entry:
     24   %AB = fmul <2 x double> %A, %B
     25   %Sub = fsub <2 x double> %AB, %C
     26   %Add = fadd <2 x double> %AB, %C
     27   %subadd = shufflevector <2 x double> %Add, <2 x double> %Sub, <2 x i32> <i32 0, i32 3>
     28   ret <2 x double> %subadd
     29 }
     30 
     31 define <4 x float> @mul_subadd_ps128(<4 x float> %A, <4 x float> %B, <4 x float> %C) #0 {
     32 ; FMA3-LABEL: mul_subadd_ps128:
     33 ; FMA3:       # %bb.0: # %entry
     34 ; FMA3-NEXT:    vfmsubadd213ps  %xmm2, %xmm1, %xmm0
     35 ; FMA3-NEXT:    retq
     36 ;
     37 ; FMA4-LABEL: mul_subadd_ps128:
     38 ; FMA4:       # %bb.0: # %entry
     39 ; FMA4-NEXT:    vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
     40 ; FMA4-NEXT:    retq
     41 entry:
     42   %AB = fmul <4 x float> %A, %B
     43   %Sub = fsub <4 x float> %AB, %C
     44   %Add = fadd <4 x float> %AB, %C
     45   %subadd = shufflevector <4 x float> %Add, <4 x float> %Sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
     46   ret <4 x float> %subadd
     47 }
     48 
     49 define <4 x double> @mul_subadd_pd256(<4 x double> %A, <4 x double> %B, <4 x double> %C) #0 {
     50 ; FMA3-LABEL: mul_subadd_pd256:
     51 ; FMA3:       # %bb.0: # %entry
     52 ; FMA3-NEXT:    vfmsubadd213pd  %ymm2, %ymm1, %ymm0
     53 ; FMA3-NEXT:    retq
     54 ;
     55 ; FMA4-LABEL: mul_subadd_pd256:
     56 ; FMA4:       # %bb.0: # %entry
     57 ; FMA4-NEXT:    vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
     58 ; FMA4-NEXT:    retq
     59 entry:
     60   %AB = fmul <4 x double> %A, %B
     61   %Sub = fsub <4 x double> %AB, %C
     62   %Add = fadd <4 x double> %AB, %C
     63   %subadd = shufflevector <4 x double> %Add, <4 x double> %Sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
     64   ret <4 x double> %subadd
     65 }
     66 
     67 define <8 x float> @mul_subadd_ps256(<8 x float> %A, <8 x float> %B, <8 x float> %C) #0 {
     68 ; FMA3-LABEL: mul_subadd_ps256:
     69 ; FMA3:       # %bb.0: # %entry
     70 ; FMA3-NEXT:    vfmsubadd213ps  %ymm2, %ymm1, %ymm0
     71 ; FMA3-NEXT:    retq
     72 ;
     73 ; FMA4-LABEL: mul_subadd_ps256:
     74 ; FMA4:       # %bb.0: # %entry
     75 ; FMA4-NEXT:    vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
     76 ; FMA4-NEXT:    retq
     77 entry:
     78   %AB = fmul <8 x float> %A, %B
     79   %Sub = fsub <8 x float> %AB, %C
     80   %Add = fadd <8 x float> %AB, %C
     81   %subadd = shufflevector <8 x float> %Add, <8 x float> %Sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
     82   ret <8 x float> %subadd
     83 }
     84 
     85 define <8 x double> @mul_subadd_pd512(<8 x double> %A, <8 x double> %B, <8 x double> %C) #0 {
     86 ; FMA3_256-LABEL: mul_subadd_pd512:
     87 ; FMA3_256:       # %bb.0: # %entry
     88 ; FMA3_256-NEXT:    vfmsubadd213pd  %ymm4, %ymm2, %ymm0
     89 ; FMA3_256-NEXT:    vfmsubadd213pd  %ymm5, %ymm3, %ymm1
     90 ; FMA3_256-NEXT:    retq
     91 ;
     92 ; FMA3_512-LABEL: mul_subadd_pd512:
     93 ; FMA3_512:       # %bb.0: # %entry
     94 ; FMA3_512-NEXT:    vfmsubadd213pd  %zmm2, %zmm1, %zmm0
     95 ; FMA3_512-NEXT:    retq
     96 ;
     97 ; FMA4-LABEL: mul_subadd_pd512:
     98 ; FMA4:       # %bb.0: # %entry
     99 ; FMA4-NEXT:    vfmsubaddpd %ymm4, %ymm2, %ymm0, %ymm0
    100 ; FMA4-NEXT:    vfmsubaddpd %ymm5, %ymm3, %ymm1, %ymm1
    101 ; FMA4-NEXT:    retq
    102 entry:
    103   %AB = fmul <8 x double> %A, %B
    104   %Sub = fsub <8 x double> %AB, %C
    105   %Add = fadd <8 x double> %AB, %C
    106   %subadd = shufflevector <8 x double> %Add, <8 x double> %Sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
    107   ret <8 x double> %subadd
    108 }
    109 
    110 define <16 x float> @mul_subadd_ps512(<16 x float> %A, <16 x float> %B, <16 x float> %C) #0 {
    111 ; FMA3_256-LABEL: mul_subadd_ps512:
    112 ; FMA3_256:       # %bb.0: # %entry
    113 ; FMA3_256-NEXT:    vfmsubadd213ps  %ymm4, %ymm2, %ymm0
    114 ; FMA3_256-NEXT:    vfmsubadd213ps  %ymm5, %ymm3, %ymm1
    115 ; FMA3_256-NEXT:    retq
    116 ;
    117 ; FMA3_512-LABEL: mul_subadd_ps512:
    118 ; FMA3_512:       # %bb.0: # %entry
    119 ; FMA3_512-NEXT:    vfmsubadd213ps  %zmm2, %zmm1, %zmm0
    120 ; FMA3_512-NEXT:    retq
    121 ;
    122 ; FMA4-LABEL: mul_subadd_ps512:
    123 ; FMA4:       # %bb.0: # %entry
    124 ; FMA4-NEXT:    vfmsubaddps %ymm4, %ymm2, %ymm0, %ymm0
    125 ; FMA4-NEXT:    vfmsubaddps %ymm5, %ymm3, %ymm1, %ymm1
    126 ; FMA4-NEXT:    retq
    127 entry:
    128   %AB = fmul <16 x float> %A, %B
    129   %Sub = fsub <16 x float> %AB, %C
    130   %Add = fadd <16 x float> %AB, %C
    131   %subadd = shufflevector <16 x float> %Add, <16 x float> %Sub, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
    132   ret <16 x float> %subadd
    133 }
    134 
    135 ; This should not be matched to fmsubadd because the mul is on the wrong side of the fsub.
    136 define <2 x double> @mul_subadd_bad_commute(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 {
    137 ; FMA3-LABEL: mul_subadd_bad_commute:
    138 ; FMA3:       # %bb.0: # %entry
    139 ; FMA3-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
    140 ; FMA3-NEXT:    vsubpd %xmm0, %xmm2, %xmm1
    141 ; FMA3-NEXT:    vaddpd %xmm2, %xmm0, %xmm0
    142 ; FMA3-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
    143 ; FMA3-NEXT:    retq
    144 ;
    145 ; FMA4-LABEL: mul_subadd_bad_commute:
    146 ; FMA4:       # %bb.0: # %entry
    147 ; FMA4-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
    148 ; FMA4-NEXT:    vsubpd %xmm0, %xmm2, %xmm1
    149 ; FMA4-NEXT:    vaddpd %xmm2, %xmm0, %xmm0
    150 ; FMA4-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
    151 ; FMA4-NEXT:    retq
    152 entry:
    153   %AB = fmul <2 x double> %A, %B
    154   %Sub = fsub <2 x double> %C, %AB
    155   %Add = fadd <2 x double> %AB, %C
    156   %subadd = shufflevector <2 x double> %Add, <2 x double> %Sub, <2 x i32> <i32 0, i32 3>
    157   ret <2 x double> %subadd
    158 }
    159 
    160 attributes #0 = { nounwind "unsafe-fp-math"="true" }
    161