1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,-fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,+fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK 4 5 ; VFMADD 6 define <4 x float> @test_x86_fma4_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 7 ; CHECK-LABEL: test_x86_fma4_vfmadd_ss: 8 ; CHECK: # %bb.0: 9 ; CHECK-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6a,0xc2,0x10] 10 ; CHECK-NEXT: retq # encoding: [0xc3] 11 %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 12 ret <4 x float> %res 13 } 14 15 define <4 x float> @test_x86_fma4_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 16 ; CHECK-LABEL: test_x86_fma4_vfmadd_bac_ss: 17 ; CHECK: # %bb.0: 18 ; CHECK-NEXT: vfmaddss %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6a,0xc2,0x00] 19 ; CHECK-NEXT: retq # encoding: [0xc3] 20 %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) 21 ret <4 x float> %res 22 } 23 declare <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) 24 25 define <2 x double> @test_x86_fma4_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 26 ; CHECK-LABEL: test_x86_fma4_vfmadd_sd: 27 ; CHECK: # %bb.0: 28 ; CHECK-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10] 29 ; CHECK-NEXT: retq # encoding: [0xc3] 30 %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 31 ret <2 x double> %res 32 } 33 34 define <2 x double> @test_x86_fma4_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 35 ; CHECK-LABEL: test_x86_fma4_vfmadd_bac_sd: 36 ; CHECK: # %bb.0: 37 ; CHECK-NEXT: vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6b,0xc2,0x00] 38 ; CHECK-NEXT: retq # encoding: [0xc3] 39 %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) 40 ret <2 x double> %res 41 } 42 declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) 43 44 define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 45 ; CHECK-LABEL: test_x86_fma_vfmadd_ps: 46 ; CHECK: # %bb.0: 47 ; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10] 48 ; CHECK-NEXT: retq # encoding: [0xc3] 49 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 50 ret <4 x float> %1 51 } 52 53 define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 54 ; CHECK-LABEL: test_x86_fma_vfmadd_pd: 55 ; CHECK: # %bb.0: 56 ; CHECK-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10] 57 ; CHECK-NEXT: retq # encoding: [0xc3] 58 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 59 ret <2 x double> %1 60 } 61 62 define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 63 ; CHECK-LABEL: test_x86_fma_vfmadd_ps_256: 64 ; CHECK: # %bb.0: 65 ; CHECK-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10] 66 ; CHECK-NEXT: retq # encoding: [0xc3] 67 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 68 ret <8 x float> %1 69 } 70 71 define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 72 ; CHECK-LABEL: test_x86_fma_vfmadd_pd_256: 73 ; CHECK: # %bb.0: 74 ; CHECK-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10] 75 ; CHECK-NEXT: retq # encoding: [0xc3] 76 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 77 ret <4 x double> %1 78 } 79 80 ; VFMSUB 81 define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 82 ; CHECK-LABEL: test_x86_fma_vfmsub_ps: 83 ; CHECK: # %bb.0: 84 ; CHECK-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10] 85 ; CHECK-NEXT: retq # encoding: [0xc3] 86 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 87 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %1) 88 ret <4 x float> %2 89 } 90 91 define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 92 ; CHECK-LABEL: test_x86_fma_vfmsub_pd: 93 ; CHECK: # %bb.0: 94 ; CHECK-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10] 95 ; CHECK-NEXT: retq # encoding: [0xc3] 96 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2 97 %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %1) 98 ret <2 x double> %2 99 } 100 101 define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 102 ; CHECK-LABEL: test_x86_fma_vfmsub_ps_256: 103 ; CHECK: # %bb.0: 104 ; CHECK-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10] 105 ; CHECK-NEXT: retq # encoding: [0xc3] 106 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 107 %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %1) 108 ret <8 x float> %2 109 } 110 111 define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 112 ; CHECK-LABEL: test_x86_fma_vfmsub_pd_256: 113 ; CHECK: # %bb.0: 114 ; CHECK-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10] 115 ; CHECK-NEXT: retq # encoding: [0xc3] 116 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 117 %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %1) 118 ret <4 x double> %2 119 } 120 121 ; VFNMADD 122 define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 123 ; CHECK-LABEL: test_x86_fma_vfnmadd_ps: 124 ; CHECK: # %bb.0: 125 ; CHECK-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10] 126 ; CHECK-NEXT: retq # encoding: [0xc3] 127 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0 128 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %a2) 129 ret <4 x float> %2 130 } 131 132 define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 133 ; CHECK-LABEL: test_x86_fma_vfnmadd_pd: 134 ; CHECK: # %bb.0: 135 ; CHECK-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10] 136 ; CHECK-NEXT: retq # encoding: [0xc3] 137 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0 138 %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %a2) 139 ret <2 x double> %2 140 } 141 142 define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 143 ; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256: 144 ; CHECK: # %bb.0: 145 ; CHECK-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10] 146 ; CHECK-NEXT: retq # encoding: [0xc3] 147 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0 148 %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %a2) 149 ret <8 x float> %2 150 } 151 152 define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 153 ; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256: 154 ; CHECK: # %bb.0: 155 ; CHECK-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10] 156 ; CHECK-NEXT: retq # encoding: [0xc3] 157 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0 158 %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %a2) 159 ret <4 x double> %2 160 } 161 162 ; VFNMSUB 163 define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 164 ; CHECK-LABEL: test_x86_fma_vfnmsub_ps: 165 ; CHECK: # %bb.0: 166 ; CHECK-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10] 167 ; CHECK-NEXT: retq # encoding: [0xc3] 168 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0 169 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 170 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %2) 171 ret <4 x float> %3 172 } 173 174 define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 175 ; CHECK-LABEL: test_x86_fma_vfnmsub_pd: 176 ; CHECK: # %bb.0: 177 ; CHECK-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10] 178 ; CHECK-NEXT: retq # encoding: [0xc3] 179 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0 180 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2 181 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %2) 182 ret <2 x double> %3 183 } 184 185 define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 186 ; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256: 187 ; CHECK: # %bb.0: 188 ; CHECK-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10] 189 ; CHECK-NEXT: retq # encoding: [0xc3] 190 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0 191 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 192 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %2) 193 ret <8 x float> %3 194 } 195 196 define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 197 ; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256: 198 ; CHECK: # %bb.0: 199 ; CHECK-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10] 200 ; CHECK-NEXT: retq # encoding: [0xc3] 201 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0 202 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 203 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %2) 204 ret <4 x double> %3 205 } 206 207 ; VFMADDSUB 208 define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 209 ; CHECK-LABEL: test_x86_fma_vfmaddsub_ps: 210 ; CHECK: # %bb.0: 211 ; CHECK-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10] 212 ; CHECK-NEXT: retq # encoding: [0xc3] 213 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 214 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 215 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2) 216 %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 217 ret <4 x float> %4 218 } 219 220 define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 221 ; CHECK-LABEL: test_x86_fma_vfmaddsub_pd: 222 ; CHECK: # %bb.0: 223 ; CHECK-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10] 224 ; CHECK-NEXT: retq # encoding: [0xc3] 225 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 226 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2 227 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2) 228 %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3> 229 ret <2 x double> %4 230 } 231 232 define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 233 ; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256: 234 ; CHECK: # %bb.0: 235 ; CHECK-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10] 236 ; CHECK-NEXT: retq # encoding: [0xc3] 237 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 238 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 239 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2) 240 %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 241 ret <8 x float> %4 242 } 243 244 define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 245 ; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256: 246 ; CHECK: # %bb.0: 247 ; CHECK-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10] 248 ; CHECK-NEXT: retq # encoding: [0xc3] 249 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 250 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 251 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2) 252 %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 253 ret <4 x double> %4 254 } 255 256 ; VFMSUBADD 257 define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 258 ; CHECK-LABEL: test_x86_fma_vfmsubadd_ps: 259 ; CHECK: # %bb.0: 260 ; CHECK-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10] 261 ; CHECK-NEXT: retq # encoding: [0xc3] 262 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 263 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 264 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2) 265 %4 = shufflevector <4 x float> %1, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 266 ret <4 x float> %4 267 } 268 269 define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 270 ; CHECK-LABEL: test_x86_fma_vfmsubadd_pd: 271 ; CHECK: # %bb.0: 272 ; CHECK-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10] 273 ; CHECK-NEXT: retq # encoding: [0xc3] 274 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 275 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2 276 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2) 277 %4 = shufflevector <2 x double> %1, <2 x double> %3, <2 x i32> <i32 0, i32 3> 278 ret <2 x double> %4 279 } 280 281 define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 282 ; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256: 283 ; CHECK: # %bb.0: 284 ; CHECK-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10] 285 ; CHECK-NEXT: retq # encoding: [0xc3] 286 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 287 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 288 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2) 289 %4 = shufflevector <8 x float> %1, <8 x float> %3, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 290 ret <8 x float> %4 291 } 292 293 define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 294 ; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256: 295 ; CHECK: # %bb.0: 296 ; CHECK-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10] 297 ; CHECK-NEXT: retq # encoding: [0xc3] 298 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 299 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 300 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2) 301 %4 = shufflevector <4 x double> %1, <4 x double> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 302 ret <4 x double> %4 303 } 304 305 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #2 306 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #2 307 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) #2 308 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) #2 309 310 attributes #0 = { nounwind } 311