1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=SKX 3 4 ; CHECK-LABEL: test_x86_fmadd_ps_z 5 ; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 6 ; CHECK: ret 7 define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 8 %x = fmul <16 x float> %a0, %a1 9 %res = fadd <16 x float> %x, %a2 10 ret <16 x float> %res 11 } 12 13 ; CHECK-LABEL: test_x86_fmsub_ps_z 14 ; CHECK: vfmsub213ps %zmm2, %zmm1, %zmm0 15 ; CHECK: ret 16 define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 17 %x = fmul <16 x float> %a0, %a1 18 %res = fsub <16 x float> %x, %a2 19 ret <16 x float> %res 20 } 21 22 ; CHECK-LABEL: test_x86_fnmadd_ps_z 23 ; CHECK: vfnmadd213ps %zmm2, %zmm1, %zmm0 24 ; CHECK: ret 25 define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 26 %x = fmul <16 x float> %a0, %a1 27 %res = fsub <16 x float> %a2, %x 28 ret <16 x float> %res 29 } 30 31 ; CHECK-LABEL: test_x86_fnmsub_ps_z 32 ; CHECK: vfnmsub213ps %zmm2, %zmm1, %zmm0 33 ; CHECK: ret 34 define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 35 %x = fmul <16 x float> %a0, %a1 36 %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 37 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 38 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 39 float -0.000000e+00>, %x 40 %res = fsub <16 x float> %y, %a2 41 ret <16 x float> %res 42 } 43 44 ; CHECK-LABEL: test_x86_fmadd_pd_z 45 ; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 46 ; CHECK: ret 47 define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 48 %x = fmul <8 x double> %a0, %a1 49 %res = fadd <8 x double> %x, %a2 50 ret <8 x double> %res 51 } 52 53 ; CHECK-LABEL: test_x86_fmsub_pd_z 54 ; CHECK: vfmsub213pd %zmm2, %zmm1, %zmm0 55 ; CHECK: ret 56 define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 57 %x = fmul <8 x double> %a0, %a1 58 %res = fsub <8 x double> %x, %a2 59 ret <8 x double> %res 60 } 61 62 define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) { 63 ; CHECK-LABEL: test_x86_fmsub_213: 64 ; CHECK: ## BB#0: 65 ; CHECK-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 66 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 67 ; CHECK-NEXT: retq 68 %x = fmul double %a0, %a1 69 %res = fsub double %x, %a2 70 ret double %res 71 } 72 73 define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) { 74 ; CHECK-LABEL: test_x86_fmsub_213_m: 75 ; CHECK: ## BB#0: 76 ; CHECK-NEXT: vfmsub213sd (%rdi), %xmm0, %xmm1 77 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 78 ; CHECK-NEXT: retq 79 %a2 = load double , double *%a2_ptr 80 %x = fmul double %a0, %a1 81 %res = fsub double %x, %a2 82 ret double %res 83 } 84 85 define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) { 86 ; CHECK-LABEL: test_x86_fmsub_231_m: 87 ; CHECK: ## BB#0: 88 ; CHECK-NEXT: vfmsub231sd (%rdi), %xmm0, %xmm1 89 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 90 ; CHECK-NEXT: retq 91 %a2 = load double , double *%a2_ptr 92 %x = fmul double %a0, %a2 93 %res = fsub double %x, %a1 94 ret double %res 95 } 96 97 define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind { 98 ; CHECK-LABEL: test231_br: 99 ; CHECK: ## BB#0: 100 ; CHECK-NEXT: vfmadd231ps {{.*}}(%rip){1to16}, %zmm0, %zmm1 101 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 102 ; CHECK-NEXT: retq 103 %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 104 %b2 = fadd <16 x float> %b1, %a2 105 ret <16 x float> %b2 106 } 107 108 define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind { 109 ; CHECK-LABEL: test213_br: 110 ; CHECK: ## BB#0: 111 ; CHECK-NEXT: vfmadd213ps {{.*}}(%rip){1to16}, %zmm1, %zmm0 112 ; CHECK-NEXT: retq 113 %b1 = fmul <16 x float> %a1, %a2 114 %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 115 ret <16 x float> %b2 116 } 117 118 ;mask (a*c+b , a) 119 define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { 120 ; CHECK-LABEL: test_x86_fmadd132_ps: 121 ; CHECK: ## BB#0: 122 ; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2 123 ; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2 124 ; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1 125 ; CHECK-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1} 126 ; CHECK-NEXT: retq 127 ; 128 ; SKX-LABEL: test_x86_fmadd132_ps: 129 ; SKX: ## BB#0: 130 ; SKX-NEXT: vpmovb2m %xmm2, %k1 131 ; SKX-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1} 132 ; SKX-NEXT: retq 133 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 134 %x = fmul <16 x float> %a0, %a2 135 %y = fadd <16 x float> %x, %a1 136 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0 137 ret <16 x float> %res 138 } 139 140 ;mask (a*c+b , b) 141 define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { 142 ; CHECK-LABEL: test_x86_fmadd231_ps: 143 ; CHECK: ## BB#0: 144 ; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2 145 ; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2 146 ; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1 147 ; CHECK-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1} 148 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 149 ; CHECK-NEXT: retq 150 ; 151 ; SKX-LABEL: test_x86_fmadd231_ps: 152 ; SKX: ## BB#0: 153 ; SKX-NEXT: vpmovb2m %xmm2, %k1 154 ; SKX-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1} 155 ; SKX-NEXT: vmovaps %zmm1, %zmm0 156 ; SKX-NEXT: retq 157 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 158 %x = fmul <16 x float> %a0, %a2 159 %y = fadd <16 x float> %x, %a1 160 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1 161 ret <16 x float> %res 162 } 163 164 ;mask (b*a+c , b) 165 define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { 166 ; CHECK-LABEL: test_x86_fmadd213_ps: 167 ; CHECK: ## BB#0: 168 ; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2 169 ; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2 170 ; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1 171 ; CHECK-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1} 172 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 173 ; CHECK-NEXT: retq 174 ; 175 ; SKX-LABEL: test_x86_fmadd213_ps: 176 ; SKX: ## BB#0: 177 ; SKX-NEXT: vpmovb2m %xmm2, %k1 178 ; SKX-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1} 179 ; SKX-NEXT: vmovaps %zmm1, %zmm0 180 ; SKX-NEXT: retq 181 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 182 %x = fmul <16 x float> %a1, %a0 183 %y = fadd <16 x float> %x, %a2 184 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1 185 ret <16 x float> %res 186 } 187 188