1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 9 10 ; 11 ; VFMADD 12 ; 13 14 define void @test_vfmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { 15 ; GENERIC-LABEL: test_vfmaddpd_128: 16 ; GENERIC: # %bb.0: 17 ; GENERIC-NEXT: #APP 18 ; GENERIC-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] 19 ; GENERIC-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] 20 ; GENERIC-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] 21 ; GENERIC-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] 22 ; GENERIC-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] 23 ; GENERIC-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] 24 ; GENERIC-NEXT: #NO_APP 25 ; GENERIC-NEXT: retq # sched: [1:1.00] 26 ; 27 ; HASWELL-LABEL: test_vfmaddpd_128: 28 ; HASWELL: # %bb.0: 29 ; HASWELL-NEXT: #APP 30 ; HASWELL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] 31 ; HASWELL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] 32 ; HASWELL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] 33 ; HASWELL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50] 34 ; HASWELL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50] 35 ; HASWELL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50] 36 ; HASWELL-NEXT: #NO_APP 37 ; HASWELL-NEXT: retq # sched: [7:1.00] 38 ; 39 ; BROADWELL-LABEL: test_vfmaddpd_128: 40 ; BROADWELL: # %bb.0: 41 ; BROADWELL-NEXT: #APP 42 ; BROADWELL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] 43 ; BROADWELL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] 44 ; BROADWELL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] 45 ; BROADWELL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] 46 ; BROADWELL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] 47 ; BROADWELL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] 48 ; BROADWELL-NEXT: #NO_APP 49 ; BROADWELL-NEXT: retq # sched: [7:1.00] 50 ; 51 ; SKYLAKE-LABEL: test_vfmaddpd_128: 52 ; SKYLAKE: # %bb.0: 53 ; SKYLAKE-NEXT: #APP 54 ; SKYLAKE-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] 55 ; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] 56 ; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] 57 ; SKYLAKE-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] 58 ; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] 59 ; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] 60 ; SKYLAKE-NEXT: #NO_APP 61 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 62 ; 63 ; KNL-LABEL: test_vfmaddpd_128: 64 ; KNL: # %bb.0: 65 ; KNL-NEXT: #APP 66 ; KNL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] 67 ; KNL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] 68 ; KNL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] 69 ; KNL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50] 70 ; KNL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50] 71 ; KNL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50] 72 ; KNL-NEXT: #NO_APP 73 ; KNL-NEXT: retq # sched: [7:1.00] 74 ; 75 ; SKX-LABEL: test_vfmaddpd_128: 76 ; SKX: # %bb.0: 77 ; SKX-NEXT: #APP 78 ; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] 79 ; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] 80 ; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] 81 ; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] 82 ; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] 83 ; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] 84 ; SKX-NEXT: #NO_APP 85 ; SKX-NEXT: retq # sched: [7:1.00] 86 ; 87 ; ZNVER1-LABEL: test_vfmaddpd_128: 88 ; ZNVER1: # %bb.0: 89 ; ZNVER1-NEXT: #APP 90 ; ZNVER1-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] 91 ; ZNVER1-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] 92 ; ZNVER1-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] 93 ; ZNVER1-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50] 94 ; ZNVER1-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50] 95 ; ZNVER1-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50] 96 ; ZNVER1-NEXT: #NO_APP 97 ; ZNVER1-NEXT: retq # sched: [1:0.50] 98 tail call void asm "vfmadd132pd $2, $1, $0 \0A\09 vfmadd213pd $2, $1, $0 \0A\09 vfmadd231pd $2, $1, $0 \0A\09 vfmadd132pd $3, $1, $0 \0A\09 vfmadd213pd $3, $1, $0 \0A\09 vfmadd231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind 99 ret void 100 } 101 102 define void @test_vfmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize { 103 ; GENERIC-LABEL: test_vfmaddpd_256: 104 ; GENERIC: # %bb.0: 105 ; GENERIC-NEXT: #APP 106 ; GENERIC-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] 107 ; GENERIC-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] 108 ; GENERIC-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] 109 ; GENERIC-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50] 110 ; GENERIC-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50] 111 ; GENERIC-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50] 112 ; GENERIC-NEXT: #NO_APP 113 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 114 ; GENERIC-NEXT: retq # sched: [1:1.00] 115 ; 116 ; HASWELL-LABEL: test_vfmaddpd_256: 117 ; HASWELL: # %bb.0: 118 ; HASWELL-NEXT: #APP 119 ; HASWELL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] 120 ; HASWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] 121 ; HASWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] 122 ; HASWELL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50] 123 ; HASWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50] 124 ; HASWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50] 125 ; HASWELL-NEXT: #NO_APP 126 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 127 ; HASWELL-NEXT: retq # sched: [7:1.00] 128 ; 129 ; BROADWELL-LABEL: test_vfmaddpd_256: 130 ; BROADWELL: # %bb.0: 131 ; BROADWELL-NEXT: #APP 132 ; BROADWELL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] 133 ; BROADWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] 134 ; BROADWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] 135 ; BROADWELL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50] 136 ; BROADWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] 137 ; BROADWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] 138 ; BROADWELL-NEXT: #NO_APP 139 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 140 ; BROADWELL-NEXT: retq # sched: [7:1.00] 141 ; 142 ; SKYLAKE-LABEL: test_vfmaddpd_256: 143 ; SKYLAKE: # %bb.0: 144 ; SKYLAKE-NEXT: #APP 145 ; SKYLAKE-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50] 146 ; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50] 147 ; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50] 148 ; SKYLAKE-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50] 149 ; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] 150 ; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] 151 ; SKYLAKE-NEXT: #NO_APP 152 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 153 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 154 ; 155 ; KNL-LABEL: test_vfmaddpd_256: 156 ; KNL: # %bb.0: 157 ; KNL-NEXT: #APP 158 ; KNL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] 159 ; KNL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] 160 ; KNL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] 161 ; KNL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50] 162 ; KNL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50] 163 ; KNL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50] 164 ; KNL-NEXT: #NO_APP 165 ; KNL-NEXT: retq # sched: [7:1.00] 166 ; 167 ; SKX-LABEL: test_vfmaddpd_256: 168 ; SKX: # %bb.0: 169 ; SKX-NEXT: #APP 170 ; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50] 171 ; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50] 172 ; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50] 173 ; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50] 174 ; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] 175 ; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] 176 ; SKX-NEXT: #NO_APP 177 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 178 ; SKX-NEXT: retq # sched: [7:1.00] 179 ; 180 ; ZNVER1-LABEL: test_vfmaddpd_256: 181 ; ZNVER1: # %bb.0: 182 ; ZNVER1-NEXT: #APP 183 ; ZNVER1-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] 184 ; ZNVER1-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] 185 ; ZNVER1-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] 186 ; ZNVER1-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50] 187 ; ZNVER1-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50] 188 ; ZNVER1-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50] 189 ; ZNVER1-NEXT: #NO_APP 190 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 191 ; ZNVER1-NEXT: retq # sched: [1:0.50] 192 tail call void asm "vfmadd132pd $2, $1, $0 \0A\09 vfmadd213pd $2, $1, $0 \0A\09 vfmadd231pd $2, $1, $0 \0A\09 vfmadd132pd $3, $1, $0 \0A\09 vfmadd213pd $3, $1, $0 \0A\09 vfmadd231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind 193 ret void 194 } 195 196 define void @test_vfmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { 197 ; GENERIC-LABEL: test_vfmaddps_128: 198 ; GENERIC: # %bb.0: 199 ; GENERIC-NEXT: #APP 200 ; GENERIC-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] 201 ; GENERIC-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] 202 ; GENERIC-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] 203 ; GENERIC-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] 204 ; GENERIC-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] 205 ; GENERIC-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] 206 ; GENERIC-NEXT: #NO_APP 207 ; GENERIC-NEXT: retq # sched: [1:1.00] 208 ; 209 ; HASWELL-LABEL: test_vfmaddps_128: 210 ; HASWELL: # %bb.0: 211 ; HASWELL-NEXT: #APP 212 ; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] 213 ; HASWELL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] 214 ; HASWELL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] 215 ; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50] 216 ; HASWELL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50] 217 ; HASWELL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50] 218 ; HASWELL-NEXT: #NO_APP 219 ; HASWELL-NEXT: retq # sched: [7:1.00] 220 ; 221 ; BROADWELL-LABEL: test_vfmaddps_128: 222 ; BROADWELL: # %bb.0: 223 ; BROADWELL-NEXT: #APP 224 ; BROADWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] 225 ; BROADWELL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] 226 ; BROADWELL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] 227 ; BROADWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] 228 ; BROADWELL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] 229 ; BROADWELL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] 230 ; BROADWELL-NEXT: #NO_APP 231 ; BROADWELL-NEXT: retq # sched: [7:1.00] 232 ; 233 ; SKYLAKE-LABEL: test_vfmaddps_128: 234 ; SKYLAKE: # %bb.0: 235 ; SKYLAKE-NEXT: #APP 236 ; SKYLAKE-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] 237 ; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] 238 ; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] 239 ; SKYLAKE-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] 240 ; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] 241 ; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] 242 ; SKYLAKE-NEXT: #NO_APP 243 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 244 ; 245 ; KNL-LABEL: test_vfmaddps_128: 246 ; KNL: # %bb.0: 247 ; KNL-NEXT: #APP 248 ; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] 249 ; KNL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] 250 ; KNL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] 251 ; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50] 252 ; KNL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50] 253 ; KNL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50] 254 ; KNL-NEXT: #NO_APP 255 ; KNL-NEXT: retq # sched: [7:1.00] 256 ; 257 ; SKX-LABEL: test_vfmaddps_128: 258 ; SKX: # %bb.0: 259 ; SKX-NEXT: #APP 260 ; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] 261 ; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] 262 ; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] 263 ; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] 264 ; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] 265 ; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] 266 ; SKX-NEXT: #NO_APP 267 ; SKX-NEXT: retq # sched: [7:1.00] 268 ; 269 ; ZNVER1-LABEL: test_vfmaddps_128: 270 ; ZNVER1: # %bb.0: 271 ; ZNVER1-NEXT: #APP 272 ; ZNVER1-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] 273 ; ZNVER1-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] 274 ; ZNVER1-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] 275 ; ZNVER1-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50] 276 ; ZNVER1-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50] 277 ; ZNVER1-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50] 278 ; ZNVER1-NEXT: #NO_APP 279 ; ZNVER1-NEXT: retq # sched: [1:0.50] 280 tail call void asm "vfmadd132ps $2, $1, $0 \0A\09 vfmadd213ps $2, $1, $0 \0A\09 vfmadd231ps $2, $1, $0 \0A\09 vfmadd132ps $3, $1, $0 \0A\09 vfmadd213ps $3, $1, $0 \0A\09 vfmadd231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind 281 ret void 282 } 283 284 define void @test_vfmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize { 285 ; GENERIC-LABEL: test_vfmaddps_256: 286 ; GENERIC: # %bb.0: 287 ; GENERIC-NEXT: #APP 288 ; GENERIC-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] 289 ; GENERIC-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] 290 ; GENERIC-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] 291 ; GENERIC-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50] 292 ; GENERIC-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50] 293 ; GENERIC-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50] 294 ; GENERIC-NEXT: #NO_APP 295 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 296 ; GENERIC-NEXT: retq # sched: [1:1.00] 297 ; 298 ; HASWELL-LABEL: test_vfmaddps_256: 299 ; HASWELL: # %bb.0: 300 ; HASWELL-NEXT: #APP 301 ; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] 302 ; HASWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] 303 ; HASWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] 304 ; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50] 305 ; HASWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50] 306 ; HASWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50] 307 ; HASWELL-NEXT: #NO_APP 308 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 309 ; HASWELL-NEXT: retq # sched: [7:1.00] 310 ; 311 ; BROADWELL-LABEL: test_vfmaddps_256: 312 ; BROADWELL: # %bb.0: 313 ; BROADWELL-NEXT: #APP 314 ; BROADWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] 315 ; BROADWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] 316 ; BROADWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] 317 ; BROADWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50] 318 ; BROADWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] 319 ; BROADWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] 320 ; BROADWELL-NEXT: #NO_APP 321 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 322 ; BROADWELL-NEXT: retq # sched: [7:1.00] 323 ; 324 ; SKYLAKE-LABEL: test_vfmaddps_256: 325 ; SKYLAKE: # %bb.0: 326 ; SKYLAKE-NEXT: #APP 327 ; SKYLAKE-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50] 328 ; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50] 329 ; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50] 330 ; SKYLAKE-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50] 331 ; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] 332 ; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] 333 ; SKYLAKE-NEXT: #NO_APP 334 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 335 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 336 ; 337 ; KNL-LABEL: test_vfmaddps_256: 338 ; KNL: # %bb.0: 339 ; KNL-NEXT: #APP 340 ; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] 341 ; KNL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] 342 ; KNL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] 343 ; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50] 344 ; KNL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50] 345 ; KNL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50] 346 ; KNL-NEXT: #NO_APP 347 ; KNL-NEXT: retq # sched: [7:1.00] 348 ; 349 ; SKX-LABEL: test_vfmaddps_256: 350 ; SKX: # %bb.0: 351 ; SKX-NEXT: #APP 352 ; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50] 353 ; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50] 354 ; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50] 355 ; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50] 356 ; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] 357 ; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] 358 ; SKX-NEXT: #NO_APP 359 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 360 ; SKX-NEXT: retq # sched: [7:1.00] 361 ; 362 ; ZNVER1-LABEL: test_vfmaddps_256: 363 ; ZNVER1: # %bb.0: 364 ; ZNVER1-NEXT: #APP 365 ; ZNVER1-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] 366 ; ZNVER1-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] 367 ; ZNVER1-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] 368 ; ZNVER1-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50] 369 ; ZNVER1-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50] 370 ; ZNVER1-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50] 371 ; ZNVER1-NEXT: #NO_APP 372 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 373 ; ZNVER1-NEXT: retq # sched: [1:0.50] 374 tail call void asm "vfmadd132ps $2, $1, $0 \0A\09 vfmadd213ps $2, $1, $0 \0A\09 vfmadd231ps $2, $1, $0 \0A\09 vfmadd132ps $3, $1, $0 \0A\09 vfmadd213ps $3, $1, $0 \0A\09 vfmadd231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind 375 ret void 376 } 377 378 define void @test_vfmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { 379 ; GENERIC-LABEL: test_vfmaddsd_128: 380 ; GENERIC: # %bb.0: 381 ; GENERIC-NEXT: #APP 382 ; GENERIC-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] 383 ; GENERIC-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] 384 ; GENERIC-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] 385 ; GENERIC-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] 386 ; GENERIC-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] 387 ; GENERIC-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] 388 ; GENERIC-NEXT: #NO_APP 389 ; GENERIC-NEXT: retq # sched: [1:1.00] 390 ; 391 ; HASWELL-LABEL: test_vfmaddsd_128: 392 ; HASWELL: # %bb.0: 393 ; HASWELL-NEXT: #APP 394 ; HASWELL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] 395 ; HASWELL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] 396 ; HASWELL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] 397 ; HASWELL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] 398 ; HASWELL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] 399 ; HASWELL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] 400 ; HASWELL-NEXT: #NO_APP 401 ; HASWELL-NEXT: retq # sched: [7:1.00] 402 ; 403 ; BROADWELL-LABEL: test_vfmaddsd_128: 404 ; BROADWELL: # %bb.0: 405 ; BROADWELL-NEXT: #APP 406 ; BROADWELL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] 407 ; BROADWELL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] 408 ; BROADWELL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] 409 ; BROADWELL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] 410 ; BROADWELL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] 411 ; BROADWELL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] 412 ; BROADWELL-NEXT: #NO_APP 413 ; BROADWELL-NEXT: retq # sched: [7:1.00] 414 ; 415 ; SKYLAKE-LABEL: test_vfmaddsd_128: 416 ; SKYLAKE: # %bb.0: 417 ; SKYLAKE-NEXT: #APP 418 ; SKYLAKE-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] 419 ; SKYLAKE-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] 420 ; SKYLAKE-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] 421 ; SKYLAKE-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50] 422 ; SKYLAKE-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50] 423 ; SKYLAKE-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50] 424 ; SKYLAKE-NEXT: #NO_APP 425 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 426 ; 427 ; KNL-LABEL: test_vfmaddsd_128: 428 ; KNL: # %bb.0: 429 ; KNL-NEXT: #APP 430 ; KNL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] 431 ; KNL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] 432 ; KNL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] 433 ; KNL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] 434 ; KNL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] 435 ; KNL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] 436 ; KNL-NEXT: #NO_APP 437 ; KNL-NEXT: retq # sched: [7:1.00] 438 ; 439 ; SKX-LABEL: test_vfmaddsd_128: 440 ; SKX: # %bb.0: 441 ; SKX-NEXT: #APP 442 ; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] 443 ; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] 444 ; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] 445 ; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50] 446 ; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50] 447 ; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50] 448 ; SKX-NEXT: #NO_APP 449 ; SKX-NEXT: retq # sched: [7:1.00] 450 ; 451 ; ZNVER1-LABEL: test_vfmaddsd_128: 452 ; ZNVER1: # %bb.0: 453 ; ZNVER1-NEXT: #APP 454 ; ZNVER1-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] 455 ; ZNVER1-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] 456 ; ZNVER1-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] 457 ; ZNVER1-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50] 458 ; ZNVER1-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50] 459 ; ZNVER1-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50] 460 ; ZNVER1-NEXT: #NO_APP 461 ; ZNVER1-NEXT: retq # sched: [1:0.50] 462 tail call void asm "vfmadd132sd $2, $1, $0 \0A\09 vfmadd213sd $2, $1, $0 \0A\09 vfmadd231sd $2, $1, $0 \0A\09 vfmadd132sd $3, $1, $0 \0A\09 vfmadd213sd $3, $1, $0 \0A\09 vfmadd231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind 463 ret void 464 } 465 466 define void @test_vfmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { 467 ; GENERIC-LABEL: test_vfmaddss_128: 468 ; GENERIC: # %bb.0: 469 ; GENERIC-NEXT: #APP 470 ; GENERIC-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] 471 ; GENERIC-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] 472 ; GENERIC-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] 473 ; GENERIC-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] 474 ; GENERIC-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] 475 ; GENERIC-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] 476 ; GENERIC-NEXT: #NO_APP 477 ; GENERIC-NEXT: retq # sched: [1:1.00] 478 ; 479 ; HASWELL-LABEL: test_vfmaddss_128: 480 ; HASWELL: # %bb.0: 481 ; HASWELL-NEXT: #APP 482 ; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] 483 ; HASWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] 484 ; HASWELL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] 485 ; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] 486 ; HASWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] 487 ; HASWELL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] 488 ; HASWELL-NEXT: #NO_APP 489 ; HASWELL-NEXT: retq # sched: [7:1.00] 490 ; 491 ; BROADWELL-LABEL: test_vfmaddss_128: 492 ; BROADWELL: # %bb.0: 493 ; BROADWELL-NEXT: #APP 494 ; BROADWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] 495 ; BROADWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] 496 ; BROADWELL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] 497 ; BROADWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] 498 ; BROADWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] 499 ; BROADWELL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] 500 ; BROADWELL-NEXT: #NO_APP 501 ; BROADWELL-NEXT: retq # sched: [7:1.00] 502 ; 503 ; SKYLAKE-LABEL: test_vfmaddss_128: 504 ; SKYLAKE: # %bb.0: 505 ; SKYLAKE-NEXT: #APP 506 ; SKYLAKE-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] 507 ; SKYLAKE-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] 508 ; SKYLAKE-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] 509 ; SKYLAKE-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50] 510 ; SKYLAKE-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50] 511 ; SKYLAKE-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50] 512 ; SKYLAKE-NEXT: #NO_APP 513 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 514 ; 515 ; KNL-LABEL: test_vfmaddss_128: 516 ; KNL: # %bb.0: 517 ; KNL-NEXT: #APP 518 ; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] 519 ; KNL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] 520 ; KNL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] 521 ; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] 522 ; KNL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] 523 ; KNL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] 524 ; KNL-NEXT: #NO_APP 525 ; KNL-NEXT: retq # sched: [7:1.00] 526 ; 527 ; SKX-LABEL: test_vfmaddss_128: 528 ; SKX: # %bb.0: 529 ; SKX-NEXT: #APP 530 ; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] 531 ; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] 532 ; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] 533 ; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50] 534 ; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50] 535 ; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50] 536 ; SKX-NEXT: #NO_APP 537 ; SKX-NEXT: retq # sched: [7:1.00] 538 ; 539 ; ZNVER1-LABEL: test_vfmaddss_128: 540 ; ZNVER1: # %bb.0: 541 ; ZNVER1-NEXT: #APP 542 ; ZNVER1-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] 543 ; ZNVER1-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] 544 ; ZNVER1-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] 545 ; ZNVER1-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50] 546 ; ZNVER1-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50] 547 ; ZNVER1-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50] 548 ; ZNVER1-NEXT: #NO_APP 549 ; ZNVER1-NEXT: retq # sched: [1:0.50] 550 tail call void asm "vfmadd132ss $2, $1, $0 \0A\09 vfmadd213ss $2, $1, $0 \0A\09 vfmadd231ss $2, $1, $0 \0A\09 vfmadd132ss $3, $1, $0 \0A\09 vfmadd213ss $3, $1, $0 \0A\09 vfmadd231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind 551 ret void 552 } 553 554 ; 555 ; VFMADDSUB 556 ; 557 558 define void @test_vfmaddsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { 559 ; GENERIC-LABEL: test_vfmaddsubpd_128: 560 ; GENERIC: # %bb.0: 561 ; GENERIC-NEXT: #APP 562 ; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] 563 ; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] 564 ; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] 565 ; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] 566 ; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] 567 ; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] 568 ; GENERIC-NEXT: #NO_APP 569 ; GENERIC-NEXT: retq # sched: [1:1.00] 570 ; 571 ; HASWELL-LABEL: test_vfmaddsubpd_128: 572 ; HASWELL: # %bb.0: 573 ; HASWELL-NEXT: #APP 574 ; HASWELL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] 575 ; HASWELL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] 576 ; HASWELL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] 577 ; HASWELL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50] 578 ; HASWELL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50] 579 ; HASWELL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50] 580 ; HASWELL-NEXT: #NO_APP 581 ; HASWELL-NEXT: retq # sched: [7:1.00] 582 ; 583 ; BROADWELL-LABEL: test_vfmaddsubpd_128: 584 ; BROADWELL: # %bb.0: 585 ; BROADWELL-NEXT: #APP 586 ; BROADWELL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] 587 ; BROADWELL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] 588 ; BROADWELL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] 589 ; BROADWELL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] 590 ; BROADWELL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] 591 ; BROADWELL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] 592 ; BROADWELL-NEXT: #NO_APP 593 ; BROADWELL-NEXT: retq # sched: [7:1.00] 594 ; 595 ; SKYLAKE-LABEL: test_vfmaddsubpd_128: 596 ; SKYLAKE: # %bb.0: 597 ; SKYLAKE-NEXT: #APP 598 ; SKYLAKE-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50] 599 ; SKYLAKE-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50] 600 ; SKYLAKE-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50] 601 ; SKYLAKE-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] 602 ; SKYLAKE-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] 603 ; SKYLAKE-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] 604 ; SKYLAKE-NEXT: #NO_APP 605 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 606 ; 607 ; KNL-LABEL: test_vfmaddsubpd_128: 608 ; KNL: # %bb.0: 609 ; KNL-NEXT: #APP 610 ; KNL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] 611 ; KNL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] 612 ; KNL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] 613 ; KNL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50] 614 ; KNL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50] 615 ; KNL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50] 616 ; KNL-NEXT: #NO_APP 617 ; KNL-NEXT: retq # sched: [7:1.00] 618 ; 619 ; SKX-LABEL: test_vfmaddsubpd_128: 620 ; SKX: # %bb.0: 621 ; SKX-NEXT: #APP 622 ; SKX-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50] 623 ; SKX-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50] 624 ; SKX-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50] 625 ; SKX-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] 626 ; SKX-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] 627 ; SKX-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] 628 ; SKX-NEXT: #NO_APP 629 ; SKX-NEXT: retq # sched: [7:1.00] 630 ; 631 ; ZNVER1-LABEL: test_vfmaddsubpd_128: 632 ; ZNVER1: # %bb.0: 633 ; ZNVER1-NEXT: #APP 634 ; ZNVER1-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] 635 ; ZNVER1-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] 636 ; ZNVER1-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] 637 ; ZNVER1-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [12:0.50] 638 ; ZNVER1-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [12:0.50] 639 ; ZNVER1-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [12:0.50] 640 ; ZNVER1-NEXT: #NO_APP 641 ; ZNVER1-NEXT: retq # sched: [1:0.50] 642 tail call void asm "vfmaddsub132pd $2, $1, $0 \0A\09 vfmaddsub213pd $2, $1, $0 \0A\09 vfmaddsub231pd $2, $1, $0 \0A\09 vfmaddsub132pd $3, $1, $0 \0A\09 vfmaddsub213pd $3, $1, $0 \0A\09 vfmaddsub231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind 643 ret void 644 } 645 646 define void @test_vfmaddsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize { 647 ; GENERIC-LABEL: test_vfmaddsubpd_256: 648 ; GENERIC: # %bb.0: 649 ; GENERIC-NEXT: #APP 650 ; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] 651 ; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] 652 ; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] 653 ; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50] 654 ; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50] 655 ; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50] 656 ; GENERIC-NEXT: #NO_APP 657 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 658 ; GENERIC-NEXT: retq # sched: [1:1.00] 659 ; 660 ; HASWELL-LABEL: test_vfmaddsubpd_256: 661 ; HASWELL: # %bb.0: 662 ; HASWELL-NEXT: #APP 663 ; HASWELL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] 664 ; HASWELL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] 665 ; HASWELL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] 666 ; HASWELL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50] 667 ; HASWELL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50] 668 ; HASWELL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50] 669 ; HASWELL-NEXT: #NO_APP 670 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 671 ; HASWELL-NEXT: retq # sched: [7:1.00] 672 ; 673 ; BROADWELL-LABEL: test_vfmaddsubpd_256: 674 ; BROADWELL: # %bb.0: 675 ; BROADWELL-NEXT: #APP 676 ; BROADWELL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] 677 ; BROADWELL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] 678 ; BROADWELL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] 679 ; BROADWELL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50] 680 ; BROADWELL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] 681 ; BROADWELL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] 682 ; BROADWELL-NEXT: #NO_APP 683 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 684 ; BROADWELL-NEXT: retq # sched: [7:1.00] 685 ; 686 ; SKYLAKE-LABEL: test_vfmaddsubpd_256: 687 ; SKYLAKE: # %bb.0: 688 ; SKYLAKE-NEXT: #APP 689 ; SKYLAKE-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50] 690 ; SKYLAKE-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50] 691 ; SKYLAKE-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50] 692 ; SKYLAKE-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50] 693 ; SKYLAKE-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] 694 ; SKYLAKE-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] 695 ; SKYLAKE-NEXT: #NO_APP 696 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 697 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 698 ; 699 ; KNL-LABEL: test_vfmaddsubpd_256: 700 ; KNL: # %bb.0: 701 ; KNL-NEXT: #APP 702 ; KNL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] 703 ; KNL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] 704 ; KNL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] 705 ; KNL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50] 706 ; KNL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50] 707 ; KNL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50] 708 ; KNL-NEXT: #NO_APP 709 ; KNL-NEXT: retq # sched: [7:1.00] 710 ; 711 ; SKX-LABEL: test_vfmaddsubpd_256: 712 ; SKX: # %bb.0: 713 ; SKX-NEXT: #APP 714 ; SKX-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50] 715 ; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50] 716 ; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50] 717 ; SKX-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50] 718 ; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] 719 ; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] 720 ; SKX-NEXT: #NO_APP 721 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 722 ; SKX-NEXT: retq # sched: [7:1.00] 723 ; 724 ; ZNVER1-LABEL: test_vfmaddsubpd_256: 725 ; ZNVER1: # %bb.0: 726 ; ZNVER1-NEXT: #APP 727 ; ZNVER1-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] 728 ; ZNVER1-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] 729 ; ZNVER1-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] 730 ; ZNVER1-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50] 731 ; ZNVER1-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50] 732 ; ZNVER1-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50] 733 ; ZNVER1-NEXT: #NO_APP 734 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 735 ; ZNVER1-NEXT: retq # sched: [1:0.50] 736 tail call void asm "vfmaddsub132pd $2, $1, $0 \0A\09 vfmaddsub213pd $2, $1, $0 \0A\09 vfmaddsub231pd $2, $1, $0 \0A\09 vfmaddsub132pd $3, $1, $0 \0A\09 vfmaddsub213pd $3, $1, $0 \0A\09 vfmaddsub231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind 737 ret void 738 } 739 740 define void @test_vfmaddsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { 741 ; GENERIC-LABEL: test_vfmaddsubps_128: 742 ; GENERIC: # %bb.0: 743 ; GENERIC-NEXT: #APP 744 ; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] 745 ; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] 746 ; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] 747 ; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] 748 ; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] 749 ; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] 750 ; GENERIC-NEXT: #NO_APP 751 ; GENERIC-NEXT: retq # sched: [1:1.00] 752 ; 753 ; HASWELL-LABEL: test_vfmaddsubps_128: 754 ; HASWELL: # %bb.0: 755 ; HASWELL-NEXT: #APP 756 ; HASWELL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] 757 ; HASWELL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] 758 ; HASWELL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] 759 ; HASWELL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50] 760 ; HASWELL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50] 761 ; HASWELL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50] 762 ; HASWELL-NEXT: #NO_APP 763 ; HASWELL-NEXT: retq # sched: [7:1.00] 764 ; 765 ; BROADWELL-LABEL: test_vfmaddsubps_128: 766 ; BROADWELL: # %bb.0: 767 ; BROADWELL-NEXT: #APP 768 ; BROADWELL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] 769 ; BROADWELL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] 770 ; BROADWELL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] 771 ; BROADWELL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] 772 ; BROADWELL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] 773 ; BROADWELL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] 774 ; BROADWELL-NEXT: #NO_APP 775 ; BROADWELL-NEXT: retq # sched: [7:1.00] 776 ; 777 ; SKYLAKE-LABEL: test_vfmaddsubps_128: 778 ; SKYLAKE: # %bb.0: 779 ; SKYLAKE-NEXT: #APP 780 ; SKYLAKE-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50] 781 ; SKYLAKE-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50] 782 ; SKYLAKE-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50] 783 ; SKYLAKE-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] 784 ; SKYLAKE-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] 785 ; SKYLAKE-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] 786 ; SKYLAKE-NEXT: #NO_APP 787 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 788 ; 789 ; KNL-LABEL: test_vfmaddsubps_128: 790 ; KNL: # %bb.0: 791 ; KNL-NEXT: #APP 792 ; KNL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] 793 ; KNL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] 794 ; KNL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] 795 ; KNL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50] 796 ; KNL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50] 797 ; KNL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50] 798 ; KNL-NEXT: #NO_APP 799 ; KNL-NEXT: retq # sched: [7:1.00] 800 ; 801 ; SKX-LABEL: test_vfmaddsubps_128: 802 ; SKX: # %bb.0: 803 ; SKX-NEXT: #APP 804 ; SKX-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50] 805 ; SKX-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50] 806 ; SKX-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50] 807 ; SKX-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] 808 ; SKX-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] 809 ; SKX-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] 810 ; SKX-NEXT: #NO_APP 811 ; SKX-NEXT: retq # sched: [7:1.00] 812 ; 813 ; ZNVER1-LABEL: test_vfmaddsubps_128: 814 ; ZNVER1: # %bb.0: 815 ; ZNVER1-NEXT: #APP 816 ; ZNVER1-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] 817 ; ZNVER1-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] 818 ; ZNVER1-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] 819 ; ZNVER1-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [12:0.50] 820 ; ZNVER1-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [12:0.50] 821 ; ZNVER1-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [12:0.50] 822 ; ZNVER1-NEXT: #NO_APP 823 ; ZNVER1-NEXT: retq # sched: [1:0.50] 824 tail call void asm "vfmaddsub132ps $2, $1, $0 \0A\09 vfmaddsub213ps $2, $1, $0 \0A\09 vfmaddsub231ps $2, $1, $0 \0A\09 vfmaddsub132ps $3, $1, $0 \0A\09 vfmaddsub213ps $3, $1, $0 \0A\09 vfmaddsub231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind 825 ret void 826 } 827 828 define void @test_vfmaddsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize { 829 ; GENERIC-LABEL: test_vfmaddsubps_256: 830 ; GENERIC: # %bb.0: 831 ; GENERIC-NEXT: #APP 832 ; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] 833 ; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] 834 ; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] 835 ; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50] 836 ; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50] 837 ; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50] 838 ; GENERIC-NEXT: #NO_APP 839 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 840 ; GENERIC-NEXT: retq # sched: [1:1.00] 841 ; 842 ; HASWELL-LABEL: test_vfmaddsubps_256: 843 ; HASWELL: # %bb.0: 844 ; HASWELL-NEXT: #APP 845 ; HASWELL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] 846 ; HASWELL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] 847 ; HASWELL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] 848 ; HASWELL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50] 849 ; HASWELL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50] 850 ; HASWELL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50] 851 ; HASWELL-NEXT: #NO_APP 852 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 853 ; HASWELL-NEXT: retq # sched: [7:1.00] 854 ; 855 ; BROADWELL-LABEL: test_vfmaddsubps_256: 856 ; BROADWELL: # %bb.0: 857 ; BROADWELL-NEXT: #APP 858 ; BROADWELL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] 859 ; BROADWELL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] 860 ; BROADWELL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] 861 ; BROADWELL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50] 862 ; BROADWELL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] 863 ; BROADWELL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] 864 ; BROADWELL-NEXT: #NO_APP 865 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 866 ; BROADWELL-NEXT: retq # sched: [7:1.00] 867 ; 868 ; SKYLAKE-LABEL: test_vfmaddsubps_256: 869 ; SKYLAKE: # %bb.0: 870 ; SKYLAKE-NEXT: #APP 871 ; SKYLAKE-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50] 872 ; SKYLAKE-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50] 873 ; SKYLAKE-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50] 874 ; SKYLAKE-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50] 875 ; SKYLAKE-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] 876 ; SKYLAKE-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] 877 ; SKYLAKE-NEXT: #NO_APP 878 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 879 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 880 ; 881 ; KNL-LABEL: test_vfmaddsubps_256: 882 ; KNL: # %bb.0: 883 ; KNL-NEXT: #APP 884 ; KNL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] 885 ; KNL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] 886 ; KNL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] 887 ; KNL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50] 888 ; KNL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50] 889 ; KNL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50] 890 ; KNL-NEXT: #NO_APP 891 ; KNL-NEXT: retq # sched: [7:1.00] 892 ; 893 ; SKX-LABEL: test_vfmaddsubps_256: 894 ; SKX: # %bb.0: 895 ; SKX-NEXT: #APP 896 ; SKX-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50] 897 ; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50] 898 ; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50] 899 ; SKX-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50] 900 ; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] 901 ; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] 902 ; SKX-NEXT: #NO_APP 903 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 904 ; SKX-NEXT: retq # sched: [7:1.00] 905 ; 906 ; ZNVER1-LABEL: test_vfmaddsubps_256: 907 ; ZNVER1: # %bb.0: 908 ; ZNVER1-NEXT: #APP 909 ; ZNVER1-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] 910 ; ZNVER1-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] 911 ; ZNVER1-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] 912 ; ZNVER1-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50] 913 ; ZNVER1-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50] 914 ; ZNVER1-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50] 915 ; ZNVER1-NEXT: #NO_APP 916 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 917 ; ZNVER1-NEXT: retq # sched: [1:0.50] 918 tail call void asm "vfmaddsub132ps $2, $1, $0 \0A\09 vfmaddsub213ps $2, $1, $0 \0A\09 vfmaddsub231ps $2, $1, $0 \0A\09 vfmaddsub132ps $3, $1, $0 \0A\09 vfmaddsub213ps $3, $1, $0 \0A\09 vfmaddsub231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind 919 ret void 920 } 921 922 ; 923 ; VFMSUBADD 924 ; 925 926 define void @test_vfmsubaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { 927 ; GENERIC-LABEL: test_vfmsubaddpd_128: 928 ; GENERIC: # %bb.0: 929 ; GENERIC-NEXT: #APP 930 ; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] 931 ; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] 932 ; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] 933 ; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] 934 ; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] 935 ; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] 936 ; GENERIC-NEXT: #NO_APP 937 ; GENERIC-NEXT: retq # sched: [1:1.00] 938 ; 939 ; HASWELL-LABEL: test_vfmsubaddpd_128: 940 ; HASWELL: # %bb.0: 941 ; HASWELL-NEXT: #APP 942 ; HASWELL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] 943 ; HASWELL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] 944 ; HASWELL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] 945 ; HASWELL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50] 946 ; HASWELL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50] 947 ; HASWELL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50] 948 ; HASWELL-NEXT: #NO_APP 949 ; HASWELL-NEXT: retq # sched: [7:1.00] 950 ; 951 ; BROADWELL-LABEL: test_vfmsubaddpd_128: 952 ; BROADWELL: # %bb.0: 953 ; BROADWELL-NEXT: #APP 954 ; BROADWELL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] 955 ; BROADWELL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] 956 ; BROADWELL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] 957 ; BROADWELL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] 958 ; BROADWELL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] 959 ; BROADWELL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] 960 ; BROADWELL-NEXT: #NO_APP 961 ; BROADWELL-NEXT: retq # sched: [7:1.00] 962 ; 963 ; SKYLAKE-LABEL: test_vfmsubaddpd_128: 964 ; SKYLAKE: # %bb.0: 965 ; SKYLAKE-NEXT: #APP 966 ; SKYLAKE-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50] 967 ; SKYLAKE-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50] 968 ; SKYLAKE-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50] 969 ; SKYLAKE-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] 970 ; SKYLAKE-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] 971 ; SKYLAKE-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] 972 ; SKYLAKE-NEXT: #NO_APP 973 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 974 ; 975 ; KNL-LABEL: test_vfmsubaddpd_128: 976 ; KNL: # %bb.0: 977 ; KNL-NEXT: #APP 978 ; KNL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] 979 ; KNL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] 980 ; KNL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] 981 ; KNL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50] 982 ; KNL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50] 983 ; KNL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50] 984 ; KNL-NEXT: #NO_APP 985 ; KNL-NEXT: retq # sched: [7:1.00] 986 ; 987 ; SKX-LABEL: test_vfmsubaddpd_128: 988 ; SKX: # %bb.0: 989 ; SKX-NEXT: #APP 990 ; SKX-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50] 991 ; SKX-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50] 992 ; SKX-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50] 993 ; SKX-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] 994 ; SKX-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] 995 ; SKX-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] 996 ; SKX-NEXT: #NO_APP 997 ; SKX-NEXT: retq # sched: [7:1.00] 998 ; 999 ; ZNVER1-LABEL: test_vfmsubaddpd_128: 1000 ; ZNVER1: # %bb.0: 1001 ; ZNVER1-NEXT: #APP 1002 ; ZNVER1-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] 1003 ; ZNVER1-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] 1004 ; ZNVER1-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] 1005 ; ZNVER1-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [12:0.50] 1006 ; ZNVER1-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [12:0.50] 1007 ; ZNVER1-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [12:0.50] 1008 ; ZNVER1-NEXT: #NO_APP 1009 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1010 tail call void asm "vfmsubadd132pd $2, $1, $0 \0A\09 vfmsubadd213pd $2, $1, $0 \0A\09 vfmsubadd231pd $2, $1, $0 \0A\09 vfmsubadd132pd $3, $1, $0 \0A\09 vfmsubadd213pd $3, $1, $0 \0A\09 vfmsubadd231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind 1011 ret void 1012 } 1013 1014 define void @test_vfmsubaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize { 1015 ; GENERIC-LABEL: test_vfmsubaddpd_256: 1016 ; GENERIC: # %bb.0: 1017 ; GENERIC-NEXT: #APP 1018 ; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] 1019 ; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] 1020 ; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] 1021 ; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50] 1022 ; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50] 1023 ; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50] 1024 ; GENERIC-NEXT: #NO_APP 1025 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1026 ; GENERIC-NEXT: retq # sched: [1:1.00] 1027 ; 1028 ; HASWELL-LABEL: test_vfmsubaddpd_256: 1029 ; HASWELL: # %bb.0: 1030 ; HASWELL-NEXT: #APP 1031 ; HASWELL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] 1032 ; HASWELL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] 1033 ; HASWELL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] 1034 ; HASWELL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50] 1035 ; HASWELL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50] 1036 ; HASWELL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50] 1037 ; HASWELL-NEXT: #NO_APP 1038 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 1039 ; HASWELL-NEXT: retq # sched: [7:1.00] 1040 ; 1041 ; BROADWELL-LABEL: test_vfmsubaddpd_256: 1042 ; BROADWELL: # %bb.0: 1043 ; BROADWELL-NEXT: #APP 1044 ; BROADWELL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] 1045 ; BROADWELL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] 1046 ; BROADWELL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] 1047 ; BROADWELL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50] 1048 ; BROADWELL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] 1049 ; BROADWELL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] 1050 ; BROADWELL-NEXT: #NO_APP 1051 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 1052 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1053 ; 1054 ; SKYLAKE-LABEL: test_vfmsubaddpd_256: 1055 ; SKYLAKE: # %bb.0: 1056 ; SKYLAKE-NEXT: #APP 1057 ; SKYLAKE-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50] 1058 ; SKYLAKE-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50] 1059 ; SKYLAKE-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50] 1060 ; SKYLAKE-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50] 1061 ; SKYLAKE-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] 1062 ; SKYLAKE-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] 1063 ; SKYLAKE-NEXT: #NO_APP 1064 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 1065 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1066 ; 1067 ; KNL-LABEL: test_vfmsubaddpd_256: 1068 ; KNL: # %bb.0: 1069 ; KNL-NEXT: #APP 1070 ; KNL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] 1071 ; KNL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] 1072 ; KNL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] 1073 ; KNL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50] 1074 ; KNL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50] 1075 ; KNL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50] 1076 ; KNL-NEXT: #NO_APP 1077 ; KNL-NEXT: retq # sched: [7:1.00] 1078 ; 1079 ; SKX-LABEL: test_vfmsubaddpd_256: 1080 ; SKX: # %bb.0: 1081 ; SKX-NEXT: #APP 1082 ; SKX-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50] 1083 ; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50] 1084 ; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50] 1085 ; SKX-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50] 1086 ; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] 1087 ; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] 1088 ; SKX-NEXT: #NO_APP 1089 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 1090 ; SKX-NEXT: retq # sched: [7:1.00] 1091 ; 1092 ; ZNVER1-LABEL: test_vfmsubaddpd_256: 1093 ; ZNVER1: # %bb.0: 1094 ; ZNVER1-NEXT: #APP 1095 ; ZNVER1-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] 1096 ; ZNVER1-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] 1097 ; ZNVER1-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] 1098 ; ZNVER1-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50] 1099 ; ZNVER1-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50] 1100 ; ZNVER1-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50] 1101 ; ZNVER1-NEXT: #NO_APP 1102 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 1103 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1104 tail call void asm "vfmsubadd132pd $2, $1, $0 \0A\09 vfmsubadd213pd $2, $1, $0 \0A\09 vfmsubadd231pd $2, $1, $0 \0A\09 vfmsubadd132pd $3, $1, $0 \0A\09 vfmsubadd213pd $3, $1, $0 \0A\09 vfmsubadd231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind 1105 ret void 1106 } 1107 1108 define void @test_vfmsubaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { 1109 ; GENERIC-LABEL: test_vfmsubaddps_128: 1110 ; GENERIC: # %bb.0: 1111 ; GENERIC-NEXT: #APP 1112 ; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] 1113 ; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] 1114 ; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] 1115 ; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] 1116 ; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] 1117 ; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] 1118 ; GENERIC-NEXT: #NO_APP 1119 ; GENERIC-NEXT: retq # sched: [1:1.00] 1120 ; 1121 ; HASWELL-LABEL: test_vfmsubaddps_128: 1122 ; HASWELL: # %bb.0: 1123 ; HASWELL-NEXT: #APP 1124 ; HASWELL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] 1125 ; HASWELL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] 1126 ; HASWELL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] 1127 ; HASWELL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50] 1128 ; HASWELL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50] 1129 ; HASWELL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50] 1130 ; HASWELL-NEXT: #NO_APP 1131 ; HASWELL-NEXT: retq # sched: [7:1.00] 1132 ; 1133 ; BROADWELL-LABEL: test_vfmsubaddps_128: 1134 ; BROADWELL: # %bb.0: 1135 ; BROADWELL-NEXT: #APP 1136 ; BROADWELL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] 1137 ; BROADWELL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] 1138 ; BROADWELL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] 1139 ; BROADWELL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] 1140 ; BROADWELL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] 1141 ; BROADWELL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] 1142 ; BROADWELL-NEXT: #NO_APP 1143 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1144 ; 1145 ; SKYLAKE-LABEL: test_vfmsubaddps_128: 1146 ; SKYLAKE: # %bb.0: 1147 ; SKYLAKE-NEXT: #APP 1148 ; SKYLAKE-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50] 1149 ; SKYLAKE-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50] 1150 ; SKYLAKE-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50] 1151 ; SKYLAKE-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] 1152 ; SKYLAKE-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] 1153 ; SKYLAKE-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] 1154 ; SKYLAKE-NEXT: #NO_APP 1155 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1156 ; 1157 ; KNL-LABEL: test_vfmsubaddps_128: 1158 ; KNL: # %bb.0: 1159 ; KNL-NEXT: #APP 1160 ; KNL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] 1161 ; KNL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] 1162 ; KNL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] 1163 ; KNL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50] 1164 ; KNL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50] 1165 ; KNL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50] 1166 ; KNL-NEXT: #NO_APP 1167 ; KNL-NEXT: retq # sched: [7:1.00] 1168 ; 1169 ; SKX-LABEL: test_vfmsubaddps_128: 1170 ; SKX: # %bb.0: 1171 ; SKX-NEXT: #APP 1172 ; SKX-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50] 1173 ; SKX-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50] 1174 ; SKX-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50] 1175 ; SKX-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] 1176 ; SKX-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] 1177 ; SKX-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] 1178 ; SKX-NEXT: #NO_APP 1179 ; SKX-NEXT: retq # sched: [7:1.00] 1180 ; 1181 ; ZNVER1-LABEL: test_vfmsubaddps_128: 1182 ; ZNVER1: # %bb.0: 1183 ; ZNVER1-NEXT: #APP 1184 ; ZNVER1-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] 1185 ; ZNVER1-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] 1186 ; ZNVER1-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] 1187 ; ZNVER1-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [12:0.50] 1188 ; ZNVER1-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [12:0.50] 1189 ; ZNVER1-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [12:0.50] 1190 ; ZNVER1-NEXT: #NO_APP 1191 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1192 tail call void asm "vfmsubadd132ps $2, $1, $0 \0A\09 vfmsubadd213ps $2, $1, $0 \0A\09 vfmsubadd231ps $2, $1, $0 \0A\09 vfmsubadd132ps $3, $1, $0 \0A\09 vfmsubadd213ps $3, $1, $0 \0A\09 vfmsubadd231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind 1193 ret void 1194 } 1195 1196 define void @test_vfmsubaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize { 1197 ; GENERIC-LABEL: test_vfmsubaddps_256: 1198 ; GENERIC: # %bb.0: 1199 ; GENERIC-NEXT: #APP 1200 ; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] 1201 ; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] 1202 ; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] 1203 ; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50] 1204 ; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50] 1205 ; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50] 1206 ; GENERIC-NEXT: #NO_APP 1207 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1208 ; GENERIC-NEXT: retq # sched: [1:1.00] 1209 ; 1210 ; HASWELL-LABEL: test_vfmsubaddps_256: 1211 ; HASWELL: # %bb.0: 1212 ; HASWELL-NEXT: #APP 1213 ; HASWELL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] 1214 ; HASWELL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] 1215 ; HASWELL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] 1216 ; HASWELL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50] 1217 ; HASWELL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50] 1218 ; HASWELL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50] 1219 ; HASWELL-NEXT: #NO_APP 1220 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 1221 ; HASWELL-NEXT: retq # sched: [7:1.00] 1222 ; 1223 ; BROADWELL-LABEL: test_vfmsubaddps_256: 1224 ; BROADWELL: # %bb.0: 1225 ; BROADWELL-NEXT: #APP 1226 ; BROADWELL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] 1227 ; BROADWELL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] 1228 ; BROADWELL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] 1229 ; BROADWELL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50] 1230 ; BROADWELL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] 1231 ; BROADWELL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] 1232 ; BROADWELL-NEXT: #NO_APP 1233 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 1234 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1235 ; 1236 ; SKYLAKE-LABEL: test_vfmsubaddps_256: 1237 ; SKYLAKE: # %bb.0: 1238 ; SKYLAKE-NEXT: #APP 1239 ; SKYLAKE-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50] 1240 ; SKYLAKE-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50] 1241 ; SKYLAKE-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50] 1242 ; SKYLAKE-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50] 1243 ; SKYLAKE-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] 1244 ; SKYLAKE-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] 1245 ; SKYLAKE-NEXT: #NO_APP 1246 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 1247 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1248 ; 1249 ; KNL-LABEL: test_vfmsubaddps_256: 1250 ; KNL: # %bb.0: 1251 ; KNL-NEXT: #APP 1252 ; KNL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] 1253 ; KNL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] 1254 ; KNL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] 1255 ; KNL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50] 1256 ; KNL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50] 1257 ; KNL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50] 1258 ; KNL-NEXT: #NO_APP 1259 ; KNL-NEXT: retq # sched: [7:1.00] 1260 ; 1261 ; SKX-LABEL: test_vfmsubaddps_256: 1262 ; SKX: # %bb.0: 1263 ; SKX-NEXT: #APP 1264 ; SKX-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50] 1265 ; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50] 1266 ; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50] 1267 ; SKX-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50] 1268 ; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] 1269 ; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] 1270 ; SKX-NEXT: #NO_APP 1271 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 1272 ; SKX-NEXT: retq # sched: [7:1.00] 1273 ; 1274 ; ZNVER1-LABEL: test_vfmsubaddps_256: 1275 ; ZNVER1: # %bb.0: 1276 ; ZNVER1-NEXT: #APP 1277 ; ZNVER1-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] 1278 ; ZNVER1-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] 1279 ; ZNVER1-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] 1280 ; ZNVER1-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50] 1281 ; ZNVER1-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50] 1282 ; ZNVER1-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50] 1283 ; ZNVER1-NEXT: #NO_APP 1284 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 1285 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1286 tail call void asm "vfmsubadd132ps $2, $1, $0 \0A\09 vfmsubadd213ps $2, $1, $0 \0A\09 vfmsubadd231ps $2, $1, $0 \0A\09 vfmsubadd132ps $3, $1, $0 \0A\09 vfmsubadd213ps $3, $1, $0 \0A\09 vfmsubadd231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind 1287 ret void 1288 } 1289 1290 ; 1291 ; VFMSUB 1292 ; 1293 1294 define void @test_vfmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { 1295 ; GENERIC-LABEL: test_vfmsubpd_128: 1296 ; GENERIC: # %bb.0: 1297 ; GENERIC-NEXT: #APP 1298 ; GENERIC-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] 1299 ; GENERIC-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] 1300 ; GENERIC-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] 1301 ; GENERIC-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] 1302 ; GENERIC-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] 1303 ; GENERIC-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] 1304 ; GENERIC-NEXT: #NO_APP 1305 ; GENERIC-NEXT: retq # sched: [1:1.00] 1306 ; 1307 ; HASWELL-LABEL: test_vfmsubpd_128: 1308 ; HASWELL: # %bb.0: 1309 ; HASWELL-NEXT: #APP 1310 ; HASWELL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] 1311 ; HASWELL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] 1312 ; HASWELL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] 1313 ; HASWELL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50] 1314 ; HASWELL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50] 1315 ; HASWELL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50] 1316 ; HASWELL-NEXT: #NO_APP 1317 ; HASWELL-NEXT: retq # sched: [7:1.00] 1318 ; 1319 ; BROADWELL-LABEL: test_vfmsubpd_128: 1320 ; BROADWELL: # %bb.0: 1321 ; BROADWELL-NEXT: #APP 1322 ; BROADWELL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] 1323 ; BROADWELL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] 1324 ; BROADWELL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] 1325 ; BROADWELL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] 1326 ; BROADWELL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] 1327 ; BROADWELL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] 1328 ; BROADWELL-NEXT: #NO_APP 1329 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1330 ; 1331 ; SKYLAKE-LABEL: test_vfmsubpd_128: 1332 ; SKYLAKE: # %bb.0: 1333 ; SKYLAKE-NEXT: #APP 1334 ; SKYLAKE-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] 1335 ; SKYLAKE-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] 1336 ; SKYLAKE-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] 1337 ; SKYLAKE-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] 1338 ; SKYLAKE-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] 1339 ; SKYLAKE-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] 1340 ; SKYLAKE-NEXT: #NO_APP 1341 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1342 ; 1343 ; KNL-LABEL: test_vfmsubpd_128: 1344 ; KNL: # %bb.0: 1345 ; KNL-NEXT: #APP 1346 ; KNL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] 1347 ; KNL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] 1348 ; KNL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] 1349 ; KNL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50] 1350 ; KNL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50] 1351 ; KNL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50] 1352 ; KNL-NEXT: #NO_APP 1353 ; KNL-NEXT: retq # sched: [7:1.00] 1354 ; 1355 ; SKX-LABEL: test_vfmsubpd_128: 1356 ; SKX: # %bb.0: 1357 ; SKX-NEXT: #APP 1358 ; SKX-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] 1359 ; SKX-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] 1360 ; SKX-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] 1361 ; SKX-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] 1362 ; SKX-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] 1363 ; SKX-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] 1364 ; SKX-NEXT: #NO_APP 1365 ; SKX-NEXT: retq # sched: [7:1.00] 1366 ; 1367 ; ZNVER1-LABEL: test_vfmsubpd_128: 1368 ; ZNVER1: # %bb.0: 1369 ; ZNVER1-NEXT: #APP 1370 ; ZNVER1-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] 1371 ; ZNVER1-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] 1372 ; ZNVER1-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] 1373 ; ZNVER1-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50] 1374 ; ZNVER1-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50] 1375 ; ZNVER1-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50] 1376 ; ZNVER1-NEXT: #NO_APP 1377 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1378 tail call void asm "vfmsub132pd $2, $1, $0 \0A\09 vfmsub213pd $2, $1, $0 \0A\09 vfmsub231pd $2, $1, $0 \0A\09 vfmsub132pd $3, $1, $0 \0A\09 vfmsub213pd $3, $1, $0 \0A\09 vfmsub231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind 1379 ret void 1380 } 1381 1382 define void @test_vfmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize { 1383 ; GENERIC-LABEL: test_vfmsubpd_256: 1384 ; GENERIC: # %bb.0: 1385 ; GENERIC-NEXT: #APP 1386 ; GENERIC-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] 1387 ; GENERIC-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] 1388 ; GENERIC-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] 1389 ; GENERIC-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50] 1390 ; GENERIC-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50] 1391 ; GENERIC-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50] 1392 ; GENERIC-NEXT: #NO_APP 1393 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1394 ; GENERIC-NEXT: retq # sched: [1:1.00] 1395 ; 1396 ; HASWELL-LABEL: test_vfmsubpd_256: 1397 ; HASWELL: # %bb.0: 1398 ; HASWELL-NEXT: #APP 1399 ; HASWELL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] 1400 ; HASWELL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] 1401 ; HASWELL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] 1402 ; HASWELL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50] 1403 ; HASWELL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50] 1404 ; HASWELL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50] 1405 ; HASWELL-NEXT: #NO_APP 1406 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 1407 ; HASWELL-NEXT: retq # sched: [7:1.00] 1408 ; 1409 ; BROADWELL-LABEL: test_vfmsubpd_256: 1410 ; BROADWELL: # %bb.0: 1411 ; BROADWELL-NEXT: #APP 1412 ; BROADWELL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] 1413 ; BROADWELL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] 1414 ; BROADWELL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] 1415 ; BROADWELL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50] 1416 ; BROADWELL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] 1417 ; BROADWELL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] 1418 ; BROADWELL-NEXT: #NO_APP 1419 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 1420 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1421 ; 1422 ; SKYLAKE-LABEL: test_vfmsubpd_256: 1423 ; SKYLAKE: # %bb.0: 1424 ; SKYLAKE-NEXT: #APP 1425 ; SKYLAKE-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50] 1426 ; SKYLAKE-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50] 1427 ; SKYLAKE-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50] 1428 ; SKYLAKE-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50] 1429 ; SKYLAKE-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] 1430 ; SKYLAKE-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] 1431 ; SKYLAKE-NEXT: #NO_APP 1432 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 1433 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1434 ; 1435 ; KNL-LABEL: test_vfmsubpd_256: 1436 ; KNL: # %bb.0: 1437 ; KNL-NEXT: #APP 1438 ; KNL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] 1439 ; KNL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] 1440 ; KNL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] 1441 ; KNL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50] 1442 ; KNL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50] 1443 ; KNL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50] 1444 ; KNL-NEXT: #NO_APP 1445 ; KNL-NEXT: retq # sched: [7:1.00] 1446 ; 1447 ; SKX-LABEL: test_vfmsubpd_256: 1448 ; SKX: # %bb.0: 1449 ; SKX-NEXT: #APP 1450 ; SKX-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50] 1451 ; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50] 1452 ; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50] 1453 ; SKX-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50] 1454 ; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] 1455 ; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] 1456 ; SKX-NEXT: #NO_APP 1457 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 1458 ; SKX-NEXT: retq # sched: [7:1.00] 1459 ; 1460 ; ZNVER1-LABEL: test_vfmsubpd_256: 1461 ; ZNVER1: # %bb.0: 1462 ; ZNVER1-NEXT: #APP 1463 ; ZNVER1-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] 1464 ; ZNVER1-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] 1465 ; ZNVER1-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] 1466 ; ZNVER1-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50] 1467 ; ZNVER1-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50] 1468 ; ZNVER1-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50] 1469 ; ZNVER1-NEXT: #NO_APP 1470 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 1471 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1472 tail call void asm "vfmsub132pd $2, $1, $0 \0A\09 vfmsub213pd $2, $1, $0 \0A\09 vfmsub231pd $2, $1, $0 \0A\09 vfmsub132pd $3, $1, $0 \0A\09 vfmsub213pd $3, $1, $0 \0A\09 vfmsub231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind 1473 ret void 1474 } 1475 1476 define void @test_vfmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { 1477 ; GENERIC-LABEL: test_vfmsubps_128: 1478 ; GENERIC: # %bb.0: 1479 ; GENERIC-NEXT: #APP 1480 ; GENERIC-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] 1481 ; GENERIC-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] 1482 ; GENERIC-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] 1483 ; GENERIC-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] 1484 ; GENERIC-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] 1485 ; GENERIC-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] 1486 ; GENERIC-NEXT: #NO_APP 1487 ; GENERIC-NEXT: retq # sched: [1:1.00] 1488 ; 1489 ; HASWELL-LABEL: test_vfmsubps_128: 1490 ; HASWELL: # %bb.0: 1491 ; HASWELL-NEXT: #APP 1492 ; HASWELL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] 1493 ; HASWELL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] 1494 ; HASWELL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] 1495 ; HASWELL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50] 1496 ; HASWELL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50] 1497 ; HASWELL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50] 1498 ; HASWELL-NEXT: #NO_APP 1499 ; HASWELL-NEXT: retq # sched: [7:1.00] 1500 ; 1501 ; BROADWELL-LABEL: test_vfmsubps_128: 1502 ; BROADWELL: # %bb.0: 1503 ; BROADWELL-NEXT: #APP 1504 ; BROADWELL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] 1505 ; BROADWELL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] 1506 ; BROADWELL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] 1507 ; BROADWELL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] 1508 ; BROADWELL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] 1509 ; BROADWELL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] 1510 ; BROADWELL-NEXT: #NO_APP 1511 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1512 ; 1513 ; SKYLAKE-LABEL: test_vfmsubps_128: 1514 ; SKYLAKE: # %bb.0: 1515 ; SKYLAKE-NEXT: #APP 1516 ; SKYLAKE-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] 1517 ; SKYLAKE-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] 1518 ; SKYLAKE-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] 1519 ; SKYLAKE-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] 1520 ; SKYLAKE-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] 1521 ; SKYLAKE-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] 1522 ; SKYLAKE-NEXT: #NO_APP 1523 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1524 ; 1525 ; KNL-LABEL: test_vfmsubps_128: 1526 ; KNL: # %bb.0: 1527 ; KNL-NEXT: #APP 1528 ; KNL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] 1529 ; KNL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] 1530 ; KNL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] 1531 ; KNL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50] 1532 ; KNL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50] 1533 ; KNL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50] 1534 ; KNL-NEXT: #NO_APP 1535 ; KNL-NEXT: retq # sched: [7:1.00] 1536 ; 1537 ; SKX-LABEL: test_vfmsubps_128: 1538 ; SKX: # %bb.0: 1539 ; SKX-NEXT: #APP 1540 ; SKX-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] 1541 ; SKX-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] 1542 ; SKX-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] 1543 ; SKX-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] 1544 ; SKX-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] 1545 ; SKX-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] 1546 ; SKX-NEXT: #NO_APP 1547 ; SKX-NEXT: retq # sched: [7:1.00] 1548 ; 1549 ; ZNVER1-LABEL: test_vfmsubps_128: 1550 ; ZNVER1: # %bb.0: 1551 ; ZNVER1-NEXT: #APP 1552 ; ZNVER1-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] 1553 ; ZNVER1-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] 1554 ; ZNVER1-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] 1555 ; ZNVER1-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50] 1556 ; ZNVER1-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50] 1557 ; ZNVER1-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50] 1558 ; ZNVER1-NEXT: #NO_APP 1559 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1560 tail call void asm "vfmsub132ps $2, $1, $0 \0A\09 vfmsub213ps $2, $1, $0 \0A\09 vfmsub231ps $2, $1, $0 \0A\09 vfmsub132ps $3, $1, $0 \0A\09 vfmsub213ps $3, $1, $0 \0A\09 vfmsub231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind 1561 ret void 1562 } 1563 1564 define void @test_vfmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize { 1565 ; GENERIC-LABEL: test_vfmsubps_256: 1566 ; GENERIC: # %bb.0: 1567 ; GENERIC-NEXT: #APP 1568 ; GENERIC-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] 1569 ; GENERIC-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] 1570 ; GENERIC-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] 1571 ; GENERIC-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50] 1572 ; GENERIC-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50] 1573 ; GENERIC-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50] 1574 ; GENERIC-NEXT: #NO_APP 1575 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1576 ; GENERIC-NEXT: retq # sched: [1:1.00] 1577 ; 1578 ; HASWELL-LABEL: test_vfmsubps_256: 1579 ; HASWELL: # %bb.0: 1580 ; HASWELL-NEXT: #APP 1581 ; HASWELL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] 1582 ; HASWELL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] 1583 ; HASWELL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] 1584 ; HASWELL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50] 1585 ; HASWELL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50] 1586 ; HASWELL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50] 1587 ; HASWELL-NEXT: #NO_APP 1588 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 1589 ; HASWELL-NEXT: retq # sched: [7:1.00] 1590 ; 1591 ; BROADWELL-LABEL: test_vfmsubps_256: 1592 ; BROADWELL: # %bb.0: 1593 ; BROADWELL-NEXT: #APP 1594 ; BROADWELL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] 1595 ; BROADWELL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] 1596 ; BROADWELL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] 1597 ; BROADWELL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50] 1598 ; BROADWELL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] 1599 ; BROADWELL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] 1600 ; BROADWELL-NEXT: #NO_APP 1601 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 1602 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1603 ; 1604 ; SKYLAKE-LABEL: test_vfmsubps_256: 1605 ; SKYLAKE: # %bb.0: 1606 ; SKYLAKE-NEXT: #APP 1607 ; SKYLAKE-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50] 1608 ; SKYLAKE-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50] 1609 ; SKYLAKE-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50] 1610 ; SKYLAKE-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50] 1611 ; SKYLAKE-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] 1612 ; SKYLAKE-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] 1613 ; SKYLAKE-NEXT: #NO_APP 1614 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 1615 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1616 ; 1617 ; KNL-LABEL: test_vfmsubps_256: 1618 ; KNL: # %bb.0: 1619 ; KNL-NEXT: #APP 1620 ; KNL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] 1621 ; KNL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] 1622 ; KNL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] 1623 ; KNL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50] 1624 ; KNL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50] 1625 ; KNL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50] 1626 ; KNL-NEXT: #NO_APP 1627 ; KNL-NEXT: retq # sched: [7:1.00] 1628 ; 1629 ; SKX-LABEL: test_vfmsubps_256: 1630 ; SKX: # %bb.0: 1631 ; SKX-NEXT: #APP 1632 ; SKX-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50] 1633 ; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50] 1634 ; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50] 1635 ; SKX-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50] 1636 ; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] 1637 ; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] 1638 ; SKX-NEXT: #NO_APP 1639 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 1640 ; SKX-NEXT: retq # sched: [7:1.00] 1641 ; 1642 ; ZNVER1-LABEL: test_vfmsubps_256: 1643 ; ZNVER1: # %bb.0: 1644 ; ZNVER1-NEXT: #APP 1645 ; ZNVER1-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] 1646 ; ZNVER1-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] 1647 ; ZNVER1-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] 1648 ; ZNVER1-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50] 1649 ; ZNVER1-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50] 1650 ; ZNVER1-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50] 1651 ; ZNVER1-NEXT: #NO_APP 1652 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 1653 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1654 tail call void asm "vfmsub132ps $2, $1, $0 \0A\09 vfmsub213ps $2, $1, $0 \0A\09 vfmsub231ps $2, $1, $0 \0A\09 vfmsub132ps $3, $1, $0 \0A\09 vfmsub213ps $3, $1, $0 \0A\09 vfmsub231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind 1655 ret void 1656 } 1657 1658 define void @test_vfmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { 1659 ; GENERIC-LABEL: test_vfmsubsd_128: 1660 ; GENERIC: # %bb.0: 1661 ; GENERIC-NEXT: #APP 1662 ; GENERIC-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] 1663 ; GENERIC-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] 1664 ; GENERIC-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] 1665 ; GENERIC-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] 1666 ; GENERIC-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] 1667 ; GENERIC-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] 1668 ; GENERIC-NEXT: #NO_APP 1669 ; GENERIC-NEXT: retq # sched: [1:1.00] 1670 ; 1671 ; HASWELL-LABEL: test_vfmsubsd_128: 1672 ; HASWELL: # %bb.0: 1673 ; HASWELL-NEXT: #APP 1674 ; HASWELL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] 1675 ; HASWELL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] 1676 ; HASWELL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] 1677 ; HASWELL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] 1678 ; HASWELL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] 1679 ; HASWELL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] 1680 ; HASWELL-NEXT: #NO_APP 1681 ; HASWELL-NEXT: retq # sched: [7:1.00] 1682 ; 1683 ; BROADWELL-LABEL: test_vfmsubsd_128: 1684 ; BROADWELL: # %bb.0: 1685 ; BROADWELL-NEXT: #APP 1686 ; BROADWELL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] 1687 ; BROADWELL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] 1688 ; BROADWELL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] 1689 ; BROADWELL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] 1690 ; BROADWELL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] 1691 ; BROADWELL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] 1692 ; BROADWELL-NEXT: #NO_APP 1693 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1694 ; 1695 ; SKYLAKE-LABEL: test_vfmsubsd_128: 1696 ; SKYLAKE: # %bb.0: 1697 ; SKYLAKE-NEXT: #APP 1698 ; SKYLAKE-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] 1699 ; SKYLAKE-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] 1700 ; SKYLAKE-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] 1701 ; SKYLAKE-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50] 1702 ; SKYLAKE-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50] 1703 ; SKYLAKE-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50] 1704 ; SKYLAKE-NEXT: #NO_APP 1705 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1706 ; 1707 ; KNL-LABEL: test_vfmsubsd_128: 1708 ; KNL: # %bb.0: 1709 ; KNL-NEXT: #APP 1710 ; KNL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] 1711 ; KNL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] 1712 ; KNL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] 1713 ; KNL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] 1714 ; KNL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] 1715 ; KNL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] 1716 ; KNL-NEXT: #NO_APP 1717 ; KNL-NEXT: retq # sched: [7:1.00] 1718 ; 1719 ; SKX-LABEL: test_vfmsubsd_128: 1720 ; SKX: # %bb.0: 1721 ; SKX-NEXT: #APP 1722 ; SKX-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] 1723 ; SKX-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] 1724 ; SKX-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] 1725 ; SKX-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50] 1726 ; SKX-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50] 1727 ; SKX-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50] 1728 ; SKX-NEXT: #NO_APP 1729 ; SKX-NEXT: retq # sched: [7:1.00] 1730 ; 1731 ; ZNVER1-LABEL: test_vfmsubsd_128: 1732 ; ZNVER1: # %bb.0: 1733 ; ZNVER1-NEXT: #APP 1734 ; ZNVER1-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] 1735 ; ZNVER1-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] 1736 ; ZNVER1-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] 1737 ; ZNVER1-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50] 1738 ; ZNVER1-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50] 1739 ; ZNVER1-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50] 1740 ; ZNVER1-NEXT: #NO_APP 1741 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1742 tail call void asm "vfmsub132sd $2, $1, $0 \0A\09 vfmsub213sd $2, $1, $0 \0A\09 vfmsub231sd $2, $1, $0 \0A\09 vfmsub132sd $3, $1, $0 \0A\09 vfmsub213sd $3, $1, $0 \0A\09 vfmsub231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind 1743 ret void 1744 } 1745 1746 define void @test_vfmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { 1747 ; GENERIC-LABEL: test_vfmsubss_128: 1748 ; GENERIC: # %bb.0: 1749 ; GENERIC-NEXT: #APP 1750 ; GENERIC-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] 1751 ; GENERIC-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] 1752 ; GENERIC-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] 1753 ; GENERIC-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] 1754 ; GENERIC-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] 1755 ; GENERIC-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] 1756 ; GENERIC-NEXT: #NO_APP 1757 ; GENERIC-NEXT: retq # sched: [1:1.00] 1758 ; 1759 ; HASWELL-LABEL: test_vfmsubss_128: 1760 ; HASWELL: # %bb.0: 1761 ; HASWELL-NEXT: #APP 1762 ; HASWELL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] 1763 ; HASWELL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] 1764 ; HASWELL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] 1765 ; HASWELL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] 1766 ; HASWELL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] 1767 ; HASWELL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] 1768 ; HASWELL-NEXT: #NO_APP 1769 ; HASWELL-NEXT: retq # sched: [7:1.00] 1770 ; 1771 ; BROADWELL-LABEL: test_vfmsubss_128: 1772 ; BROADWELL: # %bb.0: 1773 ; BROADWELL-NEXT: #APP 1774 ; BROADWELL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] 1775 ; BROADWELL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] 1776 ; BROADWELL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] 1777 ; BROADWELL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] 1778 ; BROADWELL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] 1779 ; BROADWELL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] 1780 ; BROADWELL-NEXT: #NO_APP 1781 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1782 ; 1783 ; SKYLAKE-LABEL: test_vfmsubss_128: 1784 ; SKYLAKE: # %bb.0: 1785 ; SKYLAKE-NEXT: #APP 1786 ; SKYLAKE-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] 1787 ; SKYLAKE-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] 1788 ; SKYLAKE-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] 1789 ; SKYLAKE-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50] 1790 ; SKYLAKE-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50] 1791 ; SKYLAKE-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50] 1792 ; SKYLAKE-NEXT: #NO_APP 1793 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1794 ; 1795 ; KNL-LABEL: test_vfmsubss_128: 1796 ; KNL: # %bb.0: 1797 ; KNL-NEXT: #APP 1798 ; KNL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] 1799 ; KNL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] 1800 ; KNL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] 1801 ; KNL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] 1802 ; KNL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] 1803 ; KNL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] 1804 ; KNL-NEXT: #NO_APP 1805 ; KNL-NEXT: retq # sched: [7:1.00] 1806 ; 1807 ; SKX-LABEL: test_vfmsubss_128: 1808 ; SKX: # %bb.0: 1809 ; SKX-NEXT: #APP 1810 ; SKX-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] 1811 ; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] 1812 ; SKX-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] 1813 ; SKX-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50] 1814 ; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50] 1815 ; SKX-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50] 1816 ; SKX-NEXT: #NO_APP 1817 ; SKX-NEXT: retq # sched: [7:1.00] 1818 ; 1819 ; ZNVER1-LABEL: test_vfmsubss_128: 1820 ; ZNVER1: # %bb.0: 1821 ; ZNVER1-NEXT: #APP 1822 ; ZNVER1-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] 1823 ; ZNVER1-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] 1824 ; ZNVER1-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] 1825 ; ZNVER1-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50] 1826 ; ZNVER1-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50] 1827 ; ZNVER1-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50] 1828 ; ZNVER1-NEXT: #NO_APP 1829 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1830 tail call void asm "vfmsub132ss $2, $1, $0 \0A\09 vfmsub213ss $2, $1, $0 \0A\09 vfmsub231ss $2, $1, $0 \0A\09 vfmsub132ss $3, $1, $0 \0A\09 vfmsub213ss $3, $1, $0 \0A\09 vfmsub231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind 1831 ret void 1832 } 1833 1834 ; 1835 ; VFNMADD 1836 ; 1837 1838 define void @test_vfnmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { 1839 ; GENERIC-LABEL: test_vfnmaddpd_128: 1840 ; GENERIC: # %bb.0: 1841 ; GENERIC-NEXT: #APP 1842 ; GENERIC-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] 1843 ; GENERIC-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] 1844 ; GENERIC-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] 1845 ; GENERIC-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] 1846 ; GENERIC-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] 1847 ; GENERIC-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] 1848 ; GENERIC-NEXT: #NO_APP 1849 ; GENERIC-NEXT: retq # sched: [1:1.00] 1850 ; 1851 ; HASWELL-LABEL: test_vfnmaddpd_128: 1852 ; HASWELL: # %bb.0: 1853 ; HASWELL-NEXT: #APP 1854 ; HASWELL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] 1855 ; HASWELL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] 1856 ; HASWELL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] 1857 ; HASWELL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50] 1858 ; HASWELL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50] 1859 ; HASWELL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50] 1860 ; HASWELL-NEXT: #NO_APP 1861 ; HASWELL-NEXT: retq # sched: [7:1.00] 1862 ; 1863 ; BROADWELL-LABEL: test_vfnmaddpd_128: 1864 ; BROADWELL: # %bb.0: 1865 ; BROADWELL-NEXT: #APP 1866 ; BROADWELL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] 1867 ; BROADWELL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] 1868 ; BROADWELL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] 1869 ; BROADWELL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] 1870 ; BROADWELL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] 1871 ; BROADWELL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] 1872 ; BROADWELL-NEXT: #NO_APP 1873 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1874 ; 1875 ; SKYLAKE-LABEL: test_vfnmaddpd_128: 1876 ; SKYLAKE: # %bb.0: 1877 ; SKYLAKE-NEXT: #APP 1878 ; SKYLAKE-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] 1879 ; SKYLAKE-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] 1880 ; SKYLAKE-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] 1881 ; SKYLAKE-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] 1882 ; SKYLAKE-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] 1883 ; SKYLAKE-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] 1884 ; SKYLAKE-NEXT: #NO_APP 1885 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1886 ; 1887 ; KNL-LABEL: test_vfnmaddpd_128: 1888 ; KNL: # %bb.0: 1889 ; KNL-NEXT: #APP 1890 ; KNL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] 1891 ; KNL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] 1892 ; KNL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] 1893 ; KNL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50] 1894 ; KNL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50] 1895 ; KNL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50] 1896 ; KNL-NEXT: #NO_APP 1897 ; KNL-NEXT: retq # sched: [7:1.00] 1898 ; 1899 ; SKX-LABEL: test_vfnmaddpd_128: 1900 ; SKX: # %bb.0: 1901 ; SKX-NEXT: #APP 1902 ; SKX-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] 1903 ; SKX-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] 1904 ; SKX-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] 1905 ; SKX-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] 1906 ; SKX-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] 1907 ; SKX-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] 1908 ; SKX-NEXT: #NO_APP 1909 ; SKX-NEXT: retq # sched: [7:1.00] 1910 ; 1911 ; ZNVER1-LABEL: test_vfnmaddpd_128: 1912 ; ZNVER1: # %bb.0: 1913 ; ZNVER1-NEXT: #APP 1914 ; ZNVER1-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] 1915 ; ZNVER1-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] 1916 ; ZNVER1-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] 1917 ; ZNVER1-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50] 1918 ; ZNVER1-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50] 1919 ; ZNVER1-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50] 1920 ; ZNVER1-NEXT: #NO_APP 1921 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1922 tail call void asm "vfnmadd132pd $2, $1, $0 \0A\09 vfnmadd213pd $2, $1, $0 \0A\09 vfnmadd231pd $2, $1, $0 \0A\09 vfnmadd132pd $3, $1, $0 \0A\09 vfnmadd213pd $3, $1, $0 \0A\09 vfnmadd231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind 1923 ret void 1924 } 1925 1926 define void @test_vfnmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize { 1927 ; GENERIC-LABEL: test_vfnmaddpd_256: 1928 ; GENERIC: # %bb.0: 1929 ; GENERIC-NEXT: #APP 1930 ; GENERIC-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] 1931 ; GENERIC-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] 1932 ; GENERIC-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] 1933 ; GENERIC-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50] 1934 ; GENERIC-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50] 1935 ; GENERIC-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50] 1936 ; GENERIC-NEXT: #NO_APP 1937 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1938 ; GENERIC-NEXT: retq # sched: [1:1.00] 1939 ; 1940 ; HASWELL-LABEL: test_vfnmaddpd_256: 1941 ; HASWELL: # %bb.0: 1942 ; HASWELL-NEXT: #APP 1943 ; HASWELL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] 1944 ; HASWELL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] 1945 ; HASWELL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] 1946 ; HASWELL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50] 1947 ; HASWELL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50] 1948 ; HASWELL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50] 1949 ; HASWELL-NEXT: #NO_APP 1950 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 1951 ; HASWELL-NEXT: retq # sched: [7:1.00] 1952 ; 1953 ; BROADWELL-LABEL: test_vfnmaddpd_256: 1954 ; BROADWELL: # %bb.0: 1955 ; BROADWELL-NEXT: #APP 1956 ; BROADWELL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] 1957 ; BROADWELL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] 1958 ; BROADWELL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] 1959 ; BROADWELL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50] 1960 ; BROADWELL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] 1961 ; BROADWELL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] 1962 ; BROADWELL-NEXT: #NO_APP 1963 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 1964 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1965 ; 1966 ; SKYLAKE-LABEL: test_vfnmaddpd_256: 1967 ; SKYLAKE: # %bb.0: 1968 ; SKYLAKE-NEXT: #APP 1969 ; SKYLAKE-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50] 1970 ; SKYLAKE-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50] 1971 ; SKYLAKE-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50] 1972 ; SKYLAKE-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50] 1973 ; SKYLAKE-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] 1974 ; SKYLAKE-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] 1975 ; SKYLAKE-NEXT: #NO_APP 1976 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 1977 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1978 ; 1979 ; KNL-LABEL: test_vfnmaddpd_256: 1980 ; KNL: # %bb.0: 1981 ; KNL-NEXT: #APP 1982 ; KNL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] 1983 ; KNL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] 1984 ; KNL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] 1985 ; KNL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50] 1986 ; KNL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50] 1987 ; KNL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50] 1988 ; KNL-NEXT: #NO_APP 1989 ; KNL-NEXT: retq # sched: [7:1.00] 1990 ; 1991 ; SKX-LABEL: test_vfnmaddpd_256: 1992 ; SKX: # %bb.0: 1993 ; SKX-NEXT: #APP 1994 ; SKX-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50] 1995 ; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50] 1996 ; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50] 1997 ; SKX-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50] 1998 ; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] 1999 ; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] 2000 ; SKX-NEXT: #NO_APP 2001 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 2002 ; SKX-NEXT: retq # sched: [7:1.00] 2003 ; 2004 ; ZNVER1-LABEL: test_vfnmaddpd_256: 2005 ; ZNVER1: # %bb.0: 2006 ; ZNVER1-NEXT: #APP 2007 ; ZNVER1-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] 2008 ; ZNVER1-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] 2009 ; ZNVER1-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] 2010 ; ZNVER1-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50] 2011 ; ZNVER1-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50] 2012 ; ZNVER1-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50] 2013 ; ZNVER1-NEXT: #NO_APP 2014 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 2015 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2016 tail call void asm "vfnmadd132pd $2, $1, $0 \0A\09 vfnmadd213pd $2, $1, $0 \0A\09 vfnmadd231pd $2, $1, $0 \0A\09 vfnmadd132pd $3, $1, $0 \0A\09 vfnmadd213pd $3, $1, $0 \0A\09 vfnmadd231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind 2017 ret void 2018 } 2019 2020 define void @test_vfnmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { 2021 ; GENERIC-LABEL: test_vfnmaddps_128: 2022 ; GENERIC: # %bb.0: 2023 ; GENERIC-NEXT: #APP 2024 ; GENERIC-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] 2025 ; GENERIC-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] 2026 ; GENERIC-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] 2027 ; GENERIC-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] 2028 ; GENERIC-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] 2029 ; GENERIC-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] 2030 ; GENERIC-NEXT: #NO_APP 2031 ; GENERIC-NEXT: retq # sched: [1:1.00] 2032 ; 2033 ; HASWELL-LABEL: test_vfnmaddps_128: 2034 ; HASWELL: # %bb.0: 2035 ; HASWELL-NEXT: #APP 2036 ; HASWELL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] 2037 ; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] 2038 ; HASWELL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] 2039 ; HASWELL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50] 2040 ; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50] 2041 ; HASWELL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50] 2042 ; HASWELL-NEXT: #NO_APP 2043 ; HASWELL-NEXT: retq # sched: [7:1.00] 2044 ; 2045 ; BROADWELL-LABEL: test_vfnmaddps_128: 2046 ; BROADWELL: # %bb.0: 2047 ; BROADWELL-NEXT: #APP 2048 ; BROADWELL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] 2049 ; BROADWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] 2050 ; BROADWELL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] 2051 ; BROADWELL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] 2052 ; BROADWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] 2053 ; BROADWELL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] 2054 ; BROADWELL-NEXT: #NO_APP 2055 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2056 ; 2057 ; SKYLAKE-LABEL: test_vfnmaddps_128: 2058 ; SKYLAKE: # %bb.0: 2059 ; SKYLAKE-NEXT: #APP 2060 ; SKYLAKE-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] 2061 ; SKYLAKE-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] 2062 ; SKYLAKE-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] 2063 ; SKYLAKE-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] 2064 ; SKYLAKE-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] 2065 ; SKYLAKE-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] 2066 ; SKYLAKE-NEXT: #NO_APP 2067 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2068 ; 2069 ; KNL-LABEL: test_vfnmaddps_128: 2070 ; KNL: # %bb.0: 2071 ; KNL-NEXT: #APP 2072 ; KNL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] 2073 ; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] 2074 ; KNL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] 2075 ; KNL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50] 2076 ; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50] 2077 ; KNL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50] 2078 ; KNL-NEXT: #NO_APP 2079 ; KNL-NEXT: retq # sched: [7:1.00] 2080 ; 2081 ; SKX-LABEL: test_vfnmaddps_128: 2082 ; SKX: # %bb.0: 2083 ; SKX-NEXT: #APP 2084 ; SKX-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] 2085 ; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] 2086 ; SKX-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] 2087 ; SKX-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] 2088 ; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] 2089 ; SKX-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] 2090 ; SKX-NEXT: #NO_APP 2091 ; SKX-NEXT: retq # sched: [7:1.00] 2092 ; 2093 ; ZNVER1-LABEL: test_vfnmaddps_128: 2094 ; ZNVER1: # %bb.0: 2095 ; ZNVER1-NEXT: #APP 2096 ; ZNVER1-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] 2097 ; ZNVER1-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] 2098 ; ZNVER1-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] 2099 ; ZNVER1-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50] 2100 ; ZNVER1-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50] 2101 ; ZNVER1-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50] 2102 ; ZNVER1-NEXT: #NO_APP 2103 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2104 tail call void asm "vfnmadd132ps $2, $1, $0 \0A\09 vfnmadd213ps $2, $1, $0 \0A\09 vfnmadd231ps $2, $1, $0 \0A\09 vfnmadd132ps $3, $1, $0 \0A\09 vfnmadd213ps $3, $1, $0 \0A\09 vfnmadd231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind 2105 ret void 2106 } 2107 2108 define void @test_vfnmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize { 2109 ; GENERIC-LABEL: test_vfnmaddps_256: 2110 ; GENERIC: # %bb.0: 2111 ; GENERIC-NEXT: #APP 2112 ; GENERIC-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] 2113 ; GENERIC-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] 2114 ; GENERIC-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] 2115 ; GENERIC-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50] 2116 ; GENERIC-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50] 2117 ; GENERIC-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50] 2118 ; GENERIC-NEXT: #NO_APP 2119 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 2120 ; GENERIC-NEXT: retq # sched: [1:1.00] 2121 ; 2122 ; HASWELL-LABEL: test_vfnmaddps_256: 2123 ; HASWELL: # %bb.0: 2124 ; HASWELL-NEXT: #APP 2125 ; HASWELL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] 2126 ; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] 2127 ; HASWELL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] 2128 ; HASWELL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50] 2129 ; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50] 2130 ; HASWELL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50] 2131 ; HASWELL-NEXT: #NO_APP 2132 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 2133 ; HASWELL-NEXT: retq # sched: [7:1.00] 2134 ; 2135 ; BROADWELL-LABEL: test_vfnmaddps_256: 2136 ; BROADWELL: # %bb.0: 2137 ; BROADWELL-NEXT: #APP 2138 ; BROADWELL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] 2139 ; BROADWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] 2140 ; BROADWELL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] 2141 ; BROADWELL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50] 2142 ; BROADWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] 2143 ; BROADWELL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] 2144 ; BROADWELL-NEXT: #NO_APP 2145 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 2146 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2147 ; 2148 ; SKYLAKE-LABEL: test_vfnmaddps_256: 2149 ; SKYLAKE: # %bb.0: 2150 ; SKYLAKE-NEXT: #APP 2151 ; SKYLAKE-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50] 2152 ; SKYLAKE-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50] 2153 ; SKYLAKE-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50] 2154 ; SKYLAKE-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50] 2155 ; SKYLAKE-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] 2156 ; SKYLAKE-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] 2157 ; SKYLAKE-NEXT: #NO_APP 2158 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 2159 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2160 ; 2161 ; KNL-LABEL: test_vfnmaddps_256: 2162 ; KNL: # %bb.0: 2163 ; KNL-NEXT: #APP 2164 ; KNL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] 2165 ; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] 2166 ; KNL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] 2167 ; KNL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50] 2168 ; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50] 2169 ; KNL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50] 2170 ; KNL-NEXT: #NO_APP 2171 ; KNL-NEXT: retq # sched: [7:1.00] 2172 ; 2173 ; SKX-LABEL: test_vfnmaddps_256: 2174 ; SKX: # %bb.0: 2175 ; SKX-NEXT: #APP 2176 ; SKX-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50] 2177 ; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50] 2178 ; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50] 2179 ; SKX-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50] 2180 ; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] 2181 ; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] 2182 ; SKX-NEXT: #NO_APP 2183 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 2184 ; SKX-NEXT: retq # sched: [7:1.00] 2185 ; 2186 ; ZNVER1-LABEL: test_vfnmaddps_256: 2187 ; ZNVER1: # %bb.0: 2188 ; ZNVER1-NEXT: #APP 2189 ; ZNVER1-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] 2190 ; ZNVER1-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] 2191 ; ZNVER1-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] 2192 ; ZNVER1-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50] 2193 ; ZNVER1-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50] 2194 ; ZNVER1-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50] 2195 ; ZNVER1-NEXT: #NO_APP 2196 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 2197 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2198 tail call void asm "vfnmadd132ps $2, $1, $0 \0A\09 vfnmadd213ps $2, $1, $0 \0A\09 vfnmadd231ps $2, $1, $0 \0A\09 vfnmadd132ps $3, $1, $0 \0A\09 vfnmadd213ps $3, $1, $0 \0A\09 vfnmadd231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind 2199 ret void 2200 } 2201 2202 define void @test_vfnmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { 2203 ; GENERIC-LABEL: test_vfnmaddsd_128: 2204 ; GENERIC: # %bb.0: 2205 ; GENERIC-NEXT: #APP 2206 ; GENERIC-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] 2207 ; GENERIC-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] 2208 ; GENERIC-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] 2209 ; GENERIC-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] 2210 ; GENERIC-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] 2211 ; GENERIC-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] 2212 ; GENERIC-NEXT: #NO_APP 2213 ; GENERIC-NEXT: retq # sched: [1:1.00] 2214 ; 2215 ; HASWELL-LABEL: test_vfnmaddsd_128: 2216 ; HASWELL: # %bb.0: 2217 ; HASWELL-NEXT: #APP 2218 ; HASWELL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] 2219 ; HASWELL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] 2220 ; HASWELL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] 2221 ; HASWELL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] 2222 ; HASWELL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] 2223 ; HASWELL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] 2224 ; HASWELL-NEXT: #NO_APP 2225 ; HASWELL-NEXT: retq # sched: [7:1.00] 2226 ; 2227 ; BROADWELL-LABEL: test_vfnmaddsd_128: 2228 ; BROADWELL: # %bb.0: 2229 ; BROADWELL-NEXT: #APP 2230 ; BROADWELL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] 2231 ; BROADWELL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] 2232 ; BROADWELL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] 2233 ; BROADWELL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] 2234 ; BROADWELL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] 2235 ; BROADWELL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] 2236 ; BROADWELL-NEXT: #NO_APP 2237 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2238 ; 2239 ; SKYLAKE-LABEL: test_vfnmaddsd_128: 2240 ; SKYLAKE: # %bb.0: 2241 ; SKYLAKE-NEXT: #APP 2242 ; SKYLAKE-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] 2243 ; SKYLAKE-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] 2244 ; SKYLAKE-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] 2245 ; SKYLAKE-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50] 2246 ; SKYLAKE-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] 2247 ; SKYLAKE-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50] 2248 ; SKYLAKE-NEXT: #NO_APP 2249 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2250 ; 2251 ; KNL-LABEL: test_vfnmaddsd_128: 2252 ; KNL: # %bb.0: 2253 ; KNL-NEXT: #APP 2254 ; KNL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] 2255 ; KNL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] 2256 ; KNL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] 2257 ; KNL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] 2258 ; KNL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] 2259 ; KNL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] 2260 ; KNL-NEXT: #NO_APP 2261 ; KNL-NEXT: retq # sched: [7:1.00] 2262 ; 2263 ; SKX-LABEL: test_vfnmaddsd_128: 2264 ; SKX: # %bb.0: 2265 ; SKX-NEXT: #APP 2266 ; SKX-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] 2267 ; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] 2268 ; SKX-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] 2269 ; SKX-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50] 2270 ; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] 2271 ; SKX-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50] 2272 ; SKX-NEXT: #NO_APP 2273 ; SKX-NEXT: retq # sched: [7:1.00] 2274 ; 2275 ; ZNVER1-LABEL: test_vfnmaddsd_128: 2276 ; ZNVER1: # %bb.0: 2277 ; ZNVER1-NEXT: #APP 2278 ; ZNVER1-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] 2279 ; ZNVER1-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] 2280 ; ZNVER1-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] 2281 ; ZNVER1-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50] 2282 ; ZNVER1-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50] 2283 ; ZNVER1-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50] 2284 ; ZNVER1-NEXT: #NO_APP 2285 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2286 tail call void asm "vfnmadd132sd $2, $1, $0 \0A\09 vfnmadd213sd $2, $1, $0 \0A\09 vfnmadd231sd $2, $1, $0 \0A\09 vfnmadd132sd $3, $1, $0 \0A\09 vfnmadd213sd $3, $1, $0 \0A\09 vfnmadd231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind 2287 ret void 2288 } 2289 2290 define void @test_vfnmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { 2291 ; GENERIC-LABEL: test_vfnmaddss_128: 2292 ; GENERIC: # %bb.0: 2293 ; GENERIC-NEXT: #APP 2294 ; GENERIC-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] 2295 ; GENERIC-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] 2296 ; GENERIC-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] 2297 ; GENERIC-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] 2298 ; GENERIC-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] 2299 ; GENERIC-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] 2300 ; GENERIC-NEXT: #NO_APP 2301 ; GENERIC-NEXT: retq # sched: [1:1.00] 2302 ; 2303 ; HASWELL-LABEL: test_vfnmaddss_128: 2304 ; HASWELL: # %bb.0: 2305 ; HASWELL-NEXT: #APP 2306 ; HASWELL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] 2307 ; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] 2308 ; HASWELL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] 2309 ; HASWELL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] 2310 ; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] 2311 ; HASWELL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] 2312 ; HASWELL-NEXT: #NO_APP 2313 ; HASWELL-NEXT: retq # sched: [7:1.00] 2314 ; 2315 ; BROADWELL-LABEL: test_vfnmaddss_128: 2316 ; BROADWELL: # %bb.0: 2317 ; BROADWELL-NEXT: #APP 2318 ; BROADWELL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] 2319 ; BROADWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] 2320 ; BROADWELL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] 2321 ; BROADWELL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] 2322 ; BROADWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] 2323 ; BROADWELL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] 2324 ; BROADWELL-NEXT: #NO_APP 2325 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2326 ; 2327 ; SKYLAKE-LABEL: test_vfnmaddss_128: 2328 ; SKYLAKE: # %bb.0: 2329 ; SKYLAKE-NEXT: #APP 2330 ; SKYLAKE-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] 2331 ; SKYLAKE-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] 2332 ; SKYLAKE-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] 2333 ; SKYLAKE-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50] 2334 ; SKYLAKE-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] 2335 ; SKYLAKE-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50] 2336 ; SKYLAKE-NEXT: #NO_APP 2337 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2338 ; 2339 ; KNL-LABEL: test_vfnmaddss_128: 2340 ; KNL: # %bb.0: 2341 ; KNL-NEXT: #APP 2342 ; KNL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] 2343 ; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] 2344 ; KNL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] 2345 ; KNL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] 2346 ; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] 2347 ; KNL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] 2348 ; KNL-NEXT: #NO_APP 2349 ; KNL-NEXT: retq # sched: [7:1.00] 2350 ; 2351 ; SKX-LABEL: test_vfnmaddss_128: 2352 ; SKX: # %bb.0: 2353 ; SKX-NEXT: #APP 2354 ; SKX-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] 2355 ; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] 2356 ; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] 2357 ; SKX-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50] 2358 ; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] 2359 ; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50] 2360 ; SKX-NEXT: #NO_APP 2361 ; SKX-NEXT: retq # sched: [7:1.00] 2362 ; 2363 ; ZNVER1-LABEL: test_vfnmaddss_128: 2364 ; ZNVER1: # %bb.0: 2365 ; ZNVER1-NEXT: #APP 2366 ; ZNVER1-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] 2367 ; ZNVER1-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] 2368 ; ZNVER1-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] 2369 ; ZNVER1-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50] 2370 ; ZNVER1-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50] 2371 ; ZNVER1-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50] 2372 ; ZNVER1-NEXT: #NO_APP 2373 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2374 tail call void asm "vfnmadd132ss $2, $1, $0 \0A\09 vfnmadd213ss $2, $1, $0 \0A\09 vfnmadd231ss $2, $1, $0 \0A\09 vfnmadd132ss $3, $1, $0 \0A\09 vfnmadd213ss $3, $1, $0 \0A\09 vfnmadd231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind 2375 ret void 2376 } 2377 2378 ; 2379 ; VFNMSUB 2380 ; 2381 2382 define void @test_vfnmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { 2383 ; GENERIC-LABEL: test_vfnmsubpd_128: 2384 ; GENERIC: # %bb.0: 2385 ; GENERIC-NEXT: #APP 2386 ; GENERIC-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] 2387 ; GENERIC-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] 2388 ; GENERIC-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] 2389 ; GENERIC-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] 2390 ; GENERIC-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] 2391 ; GENERIC-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] 2392 ; GENERIC-NEXT: #NO_APP 2393 ; GENERIC-NEXT: retq # sched: [1:1.00] 2394 ; 2395 ; HASWELL-LABEL: test_vfnmsubpd_128: 2396 ; HASWELL: # %bb.0: 2397 ; HASWELL-NEXT: #APP 2398 ; HASWELL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] 2399 ; HASWELL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] 2400 ; HASWELL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] 2401 ; HASWELL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50] 2402 ; HASWELL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50] 2403 ; HASWELL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50] 2404 ; HASWELL-NEXT: #NO_APP 2405 ; HASWELL-NEXT: retq # sched: [7:1.00] 2406 ; 2407 ; BROADWELL-LABEL: test_vfnmsubpd_128: 2408 ; BROADWELL: # %bb.0: 2409 ; BROADWELL-NEXT: #APP 2410 ; BROADWELL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] 2411 ; BROADWELL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] 2412 ; BROADWELL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] 2413 ; BROADWELL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] 2414 ; BROADWELL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] 2415 ; BROADWELL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] 2416 ; BROADWELL-NEXT: #NO_APP 2417 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2418 ; 2419 ; SKYLAKE-LABEL: test_vfnmsubpd_128: 2420 ; SKYLAKE: # %bb.0: 2421 ; SKYLAKE-NEXT: #APP 2422 ; SKYLAKE-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] 2423 ; SKYLAKE-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] 2424 ; SKYLAKE-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] 2425 ; SKYLAKE-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] 2426 ; SKYLAKE-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] 2427 ; SKYLAKE-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] 2428 ; SKYLAKE-NEXT: #NO_APP 2429 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2430 ; 2431 ; KNL-LABEL: test_vfnmsubpd_128: 2432 ; KNL: # %bb.0: 2433 ; KNL-NEXT: #APP 2434 ; KNL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] 2435 ; KNL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] 2436 ; KNL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] 2437 ; KNL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50] 2438 ; KNL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50] 2439 ; KNL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50] 2440 ; KNL-NEXT: #NO_APP 2441 ; KNL-NEXT: retq # sched: [7:1.00] 2442 ; 2443 ; SKX-LABEL: test_vfnmsubpd_128: 2444 ; SKX: # %bb.0: 2445 ; SKX-NEXT: #APP 2446 ; SKX-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] 2447 ; SKX-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] 2448 ; SKX-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] 2449 ; SKX-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] 2450 ; SKX-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] 2451 ; SKX-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] 2452 ; SKX-NEXT: #NO_APP 2453 ; SKX-NEXT: retq # sched: [7:1.00] 2454 ; 2455 ; ZNVER1-LABEL: test_vfnmsubpd_128: 2456 ; ZNVER1: # %bb.0: 2457 ; ZNVER1-NEXT: #APP 2458 ; ZNVER1-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] 2459 ; ZNVER1-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] 2460 ; ZNVER1-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] 2461 ; ZNVER1-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50] 2462 ; ZNVER1-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50] 2463 ; ZNVER1-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50] 2464 ; ZNVER1-NEXT: #NO_APP 2465 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2466 tail call void asm "vfnmsub132pd $2, $1, $0 \0A\09 vfnmsub213pd $2, $1, $0 \0A\09 vfnmsub231pd $2, $1, $0 \0A\09 vfnmsub132pd $3, $1, $0 \0A\09 vfnmsub213pd $3, $1, $0 \0A\09 vfnmsub231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind 2467 ret void 2468 } 2469 2470 define void @test_vfnmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize { 2471 ; GENERIC-LABEL: test_vfnmsubpd_256: 2472 ; GENERIC: # %bb.0: 2473 ; GENERIC-NEXT: #APP 2474 ; GENERIC-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] 2475 ; GENERIC-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] 2476 ; GENERIC-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] 2477 ; GENERIC-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50] 2478 ; GENERIC-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50] 2479 ; GENERIC-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50] 2480 ; GENERIC-NEXT: #NO_APP 2481 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 2482 ; GENERIC-NEXT: retq # sched: [1:1.00] 2483 ; 2484 ; HASWELL-LABEL: test_vfnmsubpd_256: 2485 ; HASWELL: # %bb.0: 2486 ; HASWELL-NEXT: #APP 2487 ; HASWELL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] 2488 ; HASWELL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] 2489 ; HASWELL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] 2490 ; HASWELL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50] 2491 ; HASWELL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50] 2492 ; HASWELL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50] 2493 ; HASWELL-NEXT: #NO_APP 2494 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 2495 ; HASWELL-NEXT: retq # sched: [7:1.00] 2496 ; 2497 ; BROADWELL-LABEL: test_vfnmsubpd_256: 2498 ; BROADWELL: # %bb.0: 2499 ; BROADWELL-NEXT: #APP 2500 ; BROADWELL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] 2501 ; BROADWELL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] 2502 ; BROADWELL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] 2503 ; BROADWELL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50] 2504 ; BROADWELL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] 2505 ; BROADWELL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] 2506 ; BROADWELL-NEXT: #NO_APP 2507 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 2508 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2509 ; 2510 ; SKYLAKE-LABEL: test_vfnmsubpd_256: 2511 ; SKYLAKE: # %bb.0: 2512 ; SKYLAKE-NEXT: #APP 2513 ; SKYLAKE-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50] 2514 ; SKYLAKE-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50] 2515 ; SKYLAKE-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50] 2516 ; SKYLAKE-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50] 2517 ; SKYLAKE-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] 2518 ; SKYLAKE-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] 2519 ; SKYLAKE-NEXT: #NO_APP 2520 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 2521 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2522 ; 2523 ; KNL-LABEL: test_vfnmsubpd_256: 2524 ; KNL: # %bb.0: 2525 ; KNL-NEXT: #APP 2526 ; KNL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] 2527 ; KNL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] 2528 ; KNL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] 2529 ; KNL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50] 2530 ; KNL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50] 2531 ; KNL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50] 2532 ; KNL-NEXT: #NO_APP 2533 ; KNL-NEXT: retq # sched: [7:1.00] 2534 ; 2535 ; SKX-LABEL: test_vfnmsubpd_256: 2536 ; SKX: # %bb.0: 2537 ; SKX-NEXT: #APP 2538 ; SKX-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50] 2539 ; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50] 2540 ; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50] 2541 ; SKX-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50] 2542 ; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] 2543 ; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] 2544 ; SKX-NEXT: #NO_APP 2545 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 2546 ; SKX-NEXT: retq # sched: [7:1.00] 2547 ; 2548 ; ZNVER1-LABEL: test_vfnmsubpd_256: 2549 ; ZNVER1: # %bb.0: 2550 ; ZNVER1-NEXT: #APP 2551 ; ZNVER1-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] 2552 ; ZNVER1-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] 2553 ; ZNVER1-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] 2554 ; ZNVER1-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50] 2555 ; ZNVER1-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50] 2556 ; ZNVER1-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50] 2557 ; ZNVER1-NEXT: #NO_APP 2558 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 2559 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2560 tail call void asm "vfnmsub132pd $2, $1, $0 \0A\09 vfnmsub213pd $2, $1, $0 \0A\09 vfnmsub231pd $2, $1, $0 \0A\09 vfnmsub132pd $3, $1, $0 \0A\09 vfnmsub213pd $3, $1, $0 \0A\09 vfnmsub231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind 2561 ret void 2562 } 2563 2564 define void @test_vfnmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { 2565 ; GENERIC-LABEL: test_vfnmsubps_128: 2566 ; GENERIC: # %bb.0: 2567 ; GENERIC-NEXT: #APP 2568 ; GENERIC-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] 2569 ; GENERIC-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] 2570 ; GENERIC-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] 2571 ; GENERIC-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] 2572 ; GENERIC-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] 2573 ; GENERIC-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] 2574 ; GENERIC-NEXT: #NO_APP 2575 ; GENERIC-NEXT: retq # sched: [1:1.00] 2576 ; 2577 ; HASWELL-LABEL: test_vfnmsubps_128: 2578 ; HASWELL: # %bb.0: 2579 ; HASWELL-NEXT: #APP 2580 ; HASWELL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] 2581 ; HASWELL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] 2582 ; HASWELL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] 2583 ; HASWELL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50] 2584 ; HASWELL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50] 2585 ; HASWELL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50] 2586 ; HASWELL-NEXT: #NO_APP 2587 ; HASWELL-NEXT: retq # sched: [7:1.00] 2588 ; 2589 ; BROADWELL-LABEL: test_vfnmsubps_128: 2590 ; BROADWELL: # %bb.0: 2591 ; BROADWELL-NEXT: #APP 2592 ; BROADWELL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] 2593 ; BROADWELL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] 2594 ; BROADWELL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] 2595 ; BROADWELL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] 2596 ; BROADWELL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] 2597 ; BROADWELL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] 2598 ; BROADWELL-NEXT: #NO_APP 2599 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2600 ; 2601 ; SKYLAKE-LABEL: test_vfnmsubps_128: 2602 ; SKYLAKE: # %bb.0: 2603 ; SKYLAKE-NEXT: #APP 2604 ; SKYLAKE-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] 2605 ; SKYLAKE-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] 2606 ; SKYLAKE-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] 2607 ; SKYLAKE-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] 2608 ; SKYLAKE-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] 2609 ; SKYLAKE-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] 2610 ; SKYLAKE-NEXT: #NO_APP 2611 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2612 ; 2613 ; KNL-LABEL: test_vfnmsubps_128: 2614 ; KNL: # %bb.0: 2615 ; KNL-NEXT: #APP 2616 ; KNL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] 2617 ; KNL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] 2618 ; KNL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] 2619 ; KNL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50] 2620 ; KNL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50] 2621 ; KNL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50] 2622 ; KNL-NEXT: #NO_APP 2623 ; KNL-NEXT: retq # sched: [7:1.00] 2624 ; 2625 ; SKX-LABEL: test_vfnmsubps_128: 2626 ; SKX: # %bb.0: 2627 ; SKX-NEXT: #APP 2628 ; SKX-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] 2629 ; SKX-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] 2630 ; SKX-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] 2631 ; SKX-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] 2632 ; SKX-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] 2633 ; SKX-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] 2634 ; SKX-NEXT: #NO_APP 2635 ; SKX-NEXT: retq # sched: [7:1.00] 2636 ; 2637 ; ZNVER1-LABEL: test_vfnmsubps_128: 2638 ; ZNVER1: # %bb.0: 2639 ; ZNVER1-NEXT: #APP 2640 ; ZNVER1-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] 2641 ; ZNVER1-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] 2642 ; ZNVER1-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] 2643 ; ZNVER1-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50] 2644 ; ZNVER1-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50] 2645 ; ZNVER1-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50] 2646 ; ZNVER1-NEXT: #NO_APP 2647 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2648 tail call void asm "vfnmsub132ps $2, $1, $0 \0A\09 vfnmsub213ps $2, $1, $0 \0A\09 vfnmsub231ps $2, $1, $0 \0A\09 vfnmsub132ps $3, $1, $0 \0A\09 vfnmsub213ps $3, $1, $0 \0A\09 vfnmsub231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind 2649 ret void 2650 } 2651 2652 define void @test_vfnmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize { 2653 ; GENERIC-LABEL: test_vfnmsubps_256: 2654 ; GENERIC: # %bb.0: 2655 ; GENERIC-NEXT: #APP 2656 ; GENERIC-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] 2657 ; GENERIC-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] 2658 ; GENERIC-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] 2659 ; GENERIC-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50] 2660 ; GENERIC-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50] 2661 ; GENERIC-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50] 2662 ; GENERIC-NEXT: #NO_APP 2663 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 2664 ; GENERIC-NEXT: retq # sched: [1:1.00] 2665 ; 2666 ; HASWELL-LABEL: test_vfnmsubps_256: 2667 ; HASWELL: # %bb.0: 2668 ; HASWELL-NEXT: #APP 2669 ; HASWELL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] 2670 ; HASWELL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] 2671 ; HASWELL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] 2672 ; HASWELL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50] 2673 ; HASWELL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50] 2674 ; HASWELL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50] 2675 ; HASWELL-NEXT: #NO_APP 2676 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 2677 ; HASWELL-NEXT: retq # sched: [7:1.00] 2678 ; 2679 ; BROADWELL-LABEL: test_vfnmsubps_256: 2680 ; BROADWELL: # %bb.0: 2681 ; BROADWELL-NEXT: #APP 2682 ; BROADWELL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] 2683 ; BROADWELL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] 2684 ; BROADWELL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] 2685 ; BROADWELL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50] 2686 ; BROADWELL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] 2687 ; BROADWELL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] 2688 ; BROADWELL-NEXT: #NO_APP 2689 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 2690 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2691 ; 2692 ; SKYLAKE-LABEL: test_vfnmsubps_256: 2693 ; SKYLAKE: # %bb.0: 2694 ; SKYLAKE-NEXT: #APP 2695 ; SKYLAKE-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50] 2696 ; SKYLAKE-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50] 2697 ; SKYLAKE-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50] 2698 ; SKYLAKE-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50] 2699 ; SKYLAKE-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] 2700 ; SKYLAKE-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] 2701 ; SKYLAKE-NEXT: #NO_APP 2702 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 2703 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2704 ; 2705 ; KNL-LABEL: test_vfnmsubps_256: 2706 ; KNL: # %bb.0: 2707 ; KNL-NEXT: #APP 2708 ; KNL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] 2709 ; KNL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] 2710 ; KNL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] 2711 ; KNL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50] 2712 ; KNL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50] 2713 ; KNL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50] 2714 ; KNL-NEXT: #NO_APP 2715 ; KNL-NEXT: retq # sched: [7:1.00] 2716 ; 2717 ; SKX-LABEL: test_vfnmsubps_256: 2718 ; SKX: # %bb.0: 2719 ; SKX-NEXT: #APP 2720 ; SKX-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50] 2721 ; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50] 2722 ; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50] 2723 ; SKX-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50] 2724 ; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] 2725 ; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] 2726 ; SKX-NEXT: #NO_APP 2727 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 2728 ; SKX-NEXT: retq # sched: [7:1.00] 2729 ; 2730 ; ZNVER1-LABEL: test_vfnmsubps_256: 2731 ; ZNVER1: # %bb.0: 2732 ; ZNVER1-NEXT: #APP 2733 ; ZNVER1-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] 2734 ; ZNVER1-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] 2735 ; ZNVER1-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] 2736 ; ZNVER1-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50] 2737 ; ZNVER1-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50] 2738 ; ZNVER1-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50] 2739 ; ZNVER1-NEXT: #NO_APP 2740 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 2741 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2742 tail call void asm "vfnmsub132ps $2, $1, $0 \0A\09 vfnmsub213ps $2, $1, $0 \0A\09 vfnmsub231ps $2, $1, $0 \0A\09 vfnmsub132ps $3, $1, $0 \0A\09 vfnmsub213ps $3, $1, $0 \0A\09 vfnmsub231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind 2743 ret void 2744 } 2745 2746 define void @test_vfnmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { 2747 ; GENERIC-LABEL: test_vfnmsubsd_128: 2748 ; GENERIC: # %bb.0: 2749 ; GENERIC-NEXT: #APP 2750 ; GENERIC-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] 2751 ; GENERIC-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] 2752 ; GENERIC-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] 2753 ; GENERIC-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] 2754 ; GENERIC-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] 2755 ; GENERIC-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] 2756 ; GENERIC-NEXT: #NO_APP 2757 ; GENERIC-NEXT: retq # sched: [1:1.00] 2758 ; 2759 ; HASWELL-LABEL: test_vfnmsubsd_128: 2760 ; HASWELL: # %bb.0: 2761 ; HASWELL-NEXT: #APP 2762 ; HASWELL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] 2763 ; HASWELL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] 2764 ; HASWELL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] 2765 ; HASWELL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] 2766 ; HASWELL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] 2767 ; HASWELL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] 2768 ; HASWELL-NEXT: #NO_APP 2769 ; HASWELL-NEXT: retq # sched: [7:1.00] 2770 ; 2771 ; BROADWELL-LABEL: test_vfnmsubsd_128: 2772 ; BROADWELL: # %bb.0: 2773 ; BROADWELL-NEXT: #APP 2774 ; BROADWELL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] 2775 ; BROADWELL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] 2776 ; BROADWELL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] 2777 ; BROADWELL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] 2778 ; BROADWELL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] 2779 ; BROADWELL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] 2780 ; BROADWELL-NEXT: #NO_APP 2781 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2782 ; 2783 ; SKYLAKE-LABEL: test_vfnmsubsd_128: 2784 ; SKYLAKE: # %bb.0: 2785 ; SKYLAKE-NEXT: #APP 2786 ; SKYLAKE-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] 2787 ; SKYLAKE-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] 2788 ; SKYLAKE-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] 2789 ; SKYLAKE-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50] 2790 ; SKYLAKE-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50] 2791 ; SKYLAKE-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50] 2792 ; SKYLAKE-NEXT: #NO_APP 2793 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2794 ; 2795 ; KNL-LABEL: test_vfnmsubsd_128: 2796 ; KNL: # %bb.0: 2797 ; KNL-NEXT: #APP 2798 ; KNL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] 2799 ; KNL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] 2800 ; KNL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] 2801 ; KNL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] 2802 ; KNL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] 2803 ; KNL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] 2804 ; KNL-NEXT: #NO_APP 2805 ; KNL-NEXT: retq # sched: [7:1.00] 2806 ; 2807 ; SKX-LABEL: test_vfnmsubsd_128: 2808 ; SKX: # %bb.0: 2809 ; SKX-NEXT: #APP 2810 ; SKX-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] 2811 ; SKX-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] 2812 ; SKX-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] 2813 ; SKX-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50] 2814 ; SKX-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50] 2815 ; SKX-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50] 2816 ; SKX-NEXT: #NO_APP 2817 ; SKX-NEXT: retq # sched: [7:1.00] 2818 ; 2819 ; ZNVER1-LABEL: test_vfnmsubsd_128: 2820 ; ZNVER1: # %bb.0: 2821 ; ZNVER1-NEXT: #APP 2822 ; ZNVER1-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] 2823 ; ZNVER1-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] 2824 ; ZNVER1-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] 2825 ; ZNVER1-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50] 2826 ; ZNVER1-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50] 2827 ; ZNVER1-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50] 2828 ; ZNVER1-NEXT: #NO_APP 2829 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2830 tail call void asm "vfnmsub132sd $2, $1, $0 \0A\09 vfnmsub213sd $2, $1, $0 \0A\09 vfnmsub231sd $2, $1, $0 \0A\09 vfnmsub132sd $3, $1, $0 \0A\09 vfnmsub213sd $3, $1, $0 \0A\09 vfnmsub231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind 2831 ret void 2832 } 2833 2834 define void @test_vfnmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { 2835 ; GENERIC-LABEL: test_vfnmsubss_128: 2836 ; GENERIC: # %bb.0: 2837 ; GENERIC-NEXT: #APP 2838 ; GENERIC-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] 2839 ; GENERIC-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] 2840 ; GENERIC-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] 2841 ; GENERIC-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] 2842 ; GENERIC-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] 2843 ; GENERIC-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] 2844 ; GENERIC-NEXT: #NO_APP 2845 ; GENERIC-NEXT: retq # sched: [1:1.00] 2846 ; 2847 ; HASWELL-LABEL: test_vfnmsubss_128: 2848 ; HASWELL: # %bb.0: 2849 ; HASWELL-NEXT: #APP 2850 ; HASWELL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] 2851 ; HASWELL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] 2852 ; HASWELL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] 2853 ; HASWELL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] 2854 ; HASWELL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] 2855 ; HASWELL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] 2856 ; HASWELL-NEXT: #NO_APP 2857 ; HASWELL-NEXT: retq # sched: [7:1.00] 2858 ; 2859 ; BROADWELL-LABEL: test_vfnmsubss_128: 2860 ; BROADWELL: # %bb.0: 2861 ; BROADWELL-NEXT: #APP 2862 ; BROADWELL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] 2863 ; BROADWELL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] 2864 ; BROADWELL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] 2865 ; BROADWELL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] 2866 ; BROADWELL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] 2867 ; BROADWELL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] 2868 ; BROADWELL-NEXT: #NO_APP 2869 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2870 ; 2871 ; SKYLAKE-LABEL: test_vfnmsubss_128: 2872 ; SKYLAKE: # %bb.0: 2873 ; SKYLAKE-NEXT: #APP 2874 ; SKYLAKE-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] 2875 ; SKYLAKE-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] 2876 ; SKYLAKE-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] 2877 ; SKYLAKE-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50] 2878 ; SKYLAKE-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50] 2879 ; SKYLAKE-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50] 2880 ; SKYLAKE-NEXT: #NO_APP 2881 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2882 ; 2883 ; KNL-LABEL: test_vfnmsubss_128: 2884 ; KNL: # %bb.0: 2885 ; KNL-NEXT: #APP 2886 ; KNL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] 2887 ; KNL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] 2888 ; KNL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] 2889 ; KNL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] 2890 ; KNL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] 2891 ; KNL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] 2892 ; KNL-NEXT: #NO_APP 2893 ; KNL-NEXT: retq # sched: [7:1.00] 2894 ; 2895 ; SKX-LABEL: test_vfnmsubss_128: 2896 ; SKX: # %bb.0: 2897 ; SKX-NEXT: #APP 2898 ; SKX-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] 2899 ; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] 2900 ; SKX-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] 2901 ; SKX-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50] 2902 ; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50] 2903 ; SKX-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50] 2904 ; SKX-NEXT: #NO_APP 2905 ; SKX-NEXT: retq # sched: [7:1.00] 2906 ; 2907 ; ZNVER1-LABEL: test_vfnmsubss_128: 2908 ; ZNVER1: # %bb.0: 2909 ; ZNVER1-NEXT: #APP 2910 ; ZNVER1-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] 2911 ; ZNVER1-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] 2912 ; ZNVER1-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] 2913 ; ZNVER1-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50] 2914 ; ZNVER1-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50] 2915 ; ZNVER1-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50] 2916 ; ZNVER1-NEXT: #NO_APP 2917 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2918 tail call void asm "vfnmsub132ss $2, $1, $0 \0A\09 vfnmsub213ss $2, $1, $0 \0A\09 vfnmsub231ss $2, $1, $0 \0A\09 vfnmsub132ss $3, $1, $0 \0A\09 vfnmsub213ss $3, $1, $0 \0A\09 vfnmsub231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind 2919 ret void 2920 } 2921