1 ; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck -check-prefix=CHECK -check-prefix=SSE2 %s 2 ; RUN: llc -mtriple=x86_64-pc-linux -mattr=-sse4.1 -mcpu=corei7 < %s | FileCheck -check-prefix=CHECK -check-prefix=SSE2 %s 3 ; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7-avx < %s | FileCheck -check-prefix=CHECK -check-prefix=AVX %s 4 5 ; Ensure that the backend no longer emits unnecessary vector insert 6 ; instructions immediately after SSE scalar fp instructions 7 ; like addss or mulss. 8 9 10 define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) { 11 %1 = extractelement <4 x float> %b, i32 0 12 %2 = extractelement <4 x float> %a, i32 0 13 %add = fadd float %2, %1 14 %3 = insertelement <4 x float> %a, float %add, i32 0 15 ret <4 x float> %3 16 } 17 18 ; CHECK-LABEL: test_add_ss 19 ; SSE2: addss %xmm1, %xmm0 20 ; AVX: vaddss %xmm1, %xmm0, %xmm0 21 ; CHECK-NOT: movss 22 ; CHECK: ret 23 24 25 define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) { 26 %1 = extractelement <4 x float> %b, i32 0 27 %2 = extractelement <4 x float> %a, i32 0 28 %sub = fsub float %2, %1 29 %3 = insertelement <4 x float> %a, float %sub, i32 0 30 ret <4 x float> %3 31 } 32 33 ; CHECK-LABEL: test_sub_ss 34 ; SSE2: subss %xmm1, %xmm0 35 ; AVX: vsubss %xmm1, %xmm0, %xmm0 36 ; CHECK-NOT: movss 37 ; CHECK: ret 38 39 define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) { 40 %1 = extractelement <4 x float> %b, i32 0 41 %2 = extractelement <4 x float> %a, i32 0 42 %mul = fmul float %2, %1 43 %3 = insertelement <4 x float> %a, float %mul, i32 0 44 ret <4 x float> %3 45 } 46 47 ; CHECK-LABEL: test_mul_ss 48 ; SSE2: mulss %xmm1, %xmm0 49 ; AVX: vmulss %xmm1, %xmm0, %xmm0 50 ; CHECK-NOT: movss 51 ; CHECK: ret 52 53 54 define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) { 55 %1 = extractelement <4 x float> %b, i32 0 56 %2 = extractelement <4 x float> %a, i32 0 57 %div = fdiv float %2, %1 58 %3 = insertelement <4 x float> %a, float %div, i32 0 59 ret <4 x float> %3 60 } 61 62 ; CHECK-LABEL: test_div_ss 63 ; SSE2: divss %xmm1, %xmm0 64 ; AVX: vdivss %xmm1, %xmm0, %xmm0 65 ; CHECK-NOT: movss 66 ; CHECK: ret 67 68 69 define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) { 70 %1 = extractelement <2 x double> %b, i32 0 71 %2 = extractelement <2 x double> %a, i32 0 72 %add = fadd double %2, %1 73 %3 = insertelement <2 x double> %a, double %add, i32 0 74 ret <2 x double> %3 75 } 76 77 ; CHECK-LABEL: test_add_sd 78 ; SSE2: addsd %xmm1, %xmm0 79 ; AVX: vaddsd %xmm1, %xmm0, %xmm0 80 ; CHECK-NOT: movsd 81 ; CHECK: ret 82 83 84 define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) { 85 %1 = extractelement <2 x double> %b, i32 0 86 %2 = extractelement <2 x double> %a, i32 0 87 %sub = fsub double %2, %1 88 %3 = insertelement <2 x double> %a, double %sub, i32 0 89 ret <2 x double> %3 90 } 91 92 ; CHECK-LABEL: test_sub_sd 93 ; SSE2: subsd %xmm1, %xmm0 94 ; AVX: vsubsd %xmm1, %xmm0, %xmm0 95 ; CHECK-NOT: movsd 96 ; CHECK: ret 97 98 99 define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) { 100 %1 = extractelement <2 x double> %b, i32 0 101 %2 = extractelement <2 x double> %a, i32 0 102 %mul = fmul double %2, %1 103 %3 = insertelement <2 x double> %a, double %mul, i32 0 104 ret <2 x double> %3 105 } 106 107 ; CHECK-LABEL: test_mul_sd 108 ; SSE2: mulsd %xmm1, %xmm0 109 ; AVX: vmulsd %xmm1, %xmm0, %xmm0 110 ; CHECK-NOT: movsd 111 ; CHECK: ret 112 113 114 define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) { 115 %1 = extractelement <2 x double> %b, i32 0 116 %2 = extractelement <2 x double> %a, i32 0 117 %div = fdiv double %2, %1 118 %3 = insertelement <2 x double> %a, double %div, i32 0 119 ret <2 x double> %3 120 } 121 122 ; CHECK-LABEL: test_div_sd 123 ; SSE2: divsd %xmm1, %xmm0 124 ; AVX: vdivsd %xmm1, %xmm0, %xmm0 125 ; CHECK-NOT: movsd 126 ; CHECK: ret 127 128 129 define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) { 130 %1 = extractelement <4 x float> %a, i32 0 131 %2 = extractelement <4 x float> %b, i32 0 132 %add = fadd float %1, %2 133 %3 = insertelement <4 x float> %b, float %add, i32 0 134 ret <4 x float> %3 135 } 136 137 ; CHECK-LABEL: test2_add_ss 138 ; SSE2: addss %xmm0, %xmm1 139 ; AVX: vaddss %xmm0, %xmm1, %xmm0 140 ; CHECK-NOT: movss 141 ; CHECK: ret 142 143 144 define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) { 145 %1 = extractelement <4 x float> %a, i32 0 146 %2 = extractelement <4 x float> %b, i32 0 147 %sub = fsub float %2, %1 148 %3 = insertelement <4 x float> %b, float %sub, i32 0 149 ret <4 x float> %3 150 } 151 152 ; CHECK-LABEL: test2_sub_ss 153 ; SSE2: subss %xmm0, %xmm1 154 ; AVX: vsubss %xmm0, %xmm1, %xmm0 155 ; CHECK-NOT: movss 156 ; CHECK: ret 157 158 159 define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) { 160 %1 = extractelement <4 x float> %a, i32 0 161 %2 = extractelement <4 x float> %b, i32 0 162 %mul = fmul float %1, %2 163 %3 = insertelement <4 x float> %b, float %mul, i32 0 164 ret <4 x float> %3 165 } 166 167 ; CHECK-LABEL: test2_mul_ss 168 ; SSE2: mulss %xmm0, %xmm1 169 ; AVX: vmulss %xmm0, %xmm1, %xmm0 170 ; CHECK-NOT: movss 171 ; CHECK: ret 172 173 174 define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) { 175 %1 = extractelement <4 x float> %a, i32 0 176 %2 = extractelement <4 x float> %b, i32 0 177 %div = fdiv float %2, %1 178 %3 = insertelement <4 x float> %b, float %div, i32 0 179 ret <4 x float> %3 180 } 181 182 ; CHECK-LABEL: test2_div_ss 183 ; SSE2: divss %xmm0, %xmm1 184 ; AVX: vdivss %xmm0, %xmm1, %xmm0 185 ; CHECK-NOT: movss 186 ; CHECK: ret 187 188 189 define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) { 190 %1 = extractelement <2 x double> %a, i32 0 191 %2 = extractelement <2 x double> %b, i32 0 192 %add = fadd double %1, %2 193 %3 = insertelement <2 x double> %b, double %add, i32 0 194 ret <2 x double> %3 195 } 196 197 ; CHECK-LABEL: test2_add_sd 198 ; SSE2: addsd %xmm0, %xmm1 199 ; AVX: vaddsd %xmm0, %xmm1, %xmm0 200 ; CHECK-NOT: movsd 201 ; CHECK: ret 202 203 204 define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) { 205 %1 = extractelement <2 x double> %a, i32 0 206 %2 = extractelement <2 x double> %b, i32 0 207 %sub = fsub double %2, %1 208 %3 = insertelement <2 x double> %b, double %sub, i32 0 209 ret <2 x double> %3 210 } 211 212 ; CHECK-LABEL: test2_sub_sd 213 ; SSE2: subsd %xmm0, %xmm1 214 ; AVX: vsubsd %xmm0, %xmm1, %xmm0 215 ; CHECK-NOT: movsd 216 ; CHECK: ret 217 218 219 define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) { 220 %1 = extractelement <2 x double> %a, i32 0 221 %2 = extractelement <2 x double> %b, i32 0 222 %mul = fmul double %1, %2 223 %3 = insertelement <2 x double> %b, double %mul, i32 0 224 ret <2 x double> %3 225 } 226 227 ; CHECK-LABEL: test2_mul_sd 228 ; SSE2: mulsd %xmm0, %xmm1 229 ; AVX: vmulsd %xmm0, %xmm1, %xmm0 230 ; CHECK-NOT: movsd 231 ; CHECK: ret 232 233 234 define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) { 235 %1 = extractelement <2 x double> %a, i32 0 236 %2 = extractelement <2 x double> %b, i32 0 237 %div = fdiv double %2, %1 238 %3 = insertelement <2 x double> %b, double %div, i32 0 239 ret <2 x double> %3 240 } 241 242 ; CHECK-LABEL: test2_div_sd 243 ; SSE2: divsd %xmm0, %xmm1 244 ; AVX: vdivsd %xmm0, %xmm1, %xmm0 245 ; CHECK-NOT: movsd 246 ; CHECK: ret 247 248 249 define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) { 250 %1 = extractelement <4 x float> %b, i32 0 251 %2 = extractelement <4 x float> %a, i32 0 252 %add = fadd float %2, %1 253 %add2 = fadd float %2, %add 254 %3 = insertelement <4 x float> %a, float %add2, i32 0 255 ret <4 x float> %3 256 } 257 258 ; CHECK-LABEL: test_multiple_add_ss 259 ; CHECK: addss 260 ; CHECK: addss 261 ; CHECK-NOT: movss 262 ; CHECK: ret 263 264 265 define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) { 266 %1 = extractelement <4 x float> %b, i32 0 267 %2 = extractelement <4 x float> %a, i32 0 268 %sub = fsub float %2, %1 269 %sub2 = fsub float %2, %sub 270 %3 = insertelement <4 x float> %a, float %sub2, i32 0 271 ret <4 x float> %3 272 } 273 274 ; CHECK-LABEL: test_multiple_sub_ss 275 ; CHECK: subss 276 ; CHECK: subss 277 ; CHECK-NOT: movss 278 ; CHECK: ret 279 280 281 define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) { 282 %1 = extractelement <4 x float> %b, i32 0 283 %2 = extractelement <4 x float> %a, i32 0 284 %mul = fmul float %2, %1 285 %mul2 = fmul float %2, %mul 286 %3 = insertelement <4 x float> %a, float %mul2, i32 0 287 ret <4 x float> %3 288 } 289 290 ; CHECK-LABEL: test_multiple_mul_ss 291 ; CHECK: mulss 292 ; CHECK: mulss 293 ; CHECK-NOT: movss 294 ; CHECK: ret 295 296 define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) { 297 %1 = extractelement <4 x float> %b, i32 0 298 %2 = extractelement <4 x float> %a, i32 0 299 %div = fdiv float %2, %1 300 %div2 = fdiv float %2, %div 301 %3 = insertelement <4 x float> %a, float %div2, i32 0 302 ret <4 x float> %3 303 } 304 305 ; CHECK-LABEL: test_multiple_div_ss 306 ; CHECK: divss 307 ; CHECK: divss 308 ; CHECK-NOT: movss 309 ; CHECK: ret 310 311