1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3 | FileCheck %s --check-prefix=SSE3 3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX 4 5 define <2 x double> @haddpd1(<2 x double> %x, <2 x double> %y) { 6 ; SSE3-LABEL: haddpd1: 7 ; SSE3: # %bb.0: 8 ; SSE3-NEXT: haddpd %xmm1, %xmm0 9 ; SSE3-NEXT: retq 10 ; 11 ; AVX-LABEL: haddpd1: 12 ; AVX: # %bb.0: 13 ; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 14 ; AVX-NEXT: retq 15 %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2> 16 %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 3> 17 %r = fadd <2 x double> %a, %b 18 ret <2 x double> %r 19 } 20 21 define <2 x double> @haddpd2(<2 x double> %x, <2 x double> %y) { 22 ; SSE3-LABEL: haddpd2: 23 ; SSE3: # %bb.0: 24 ; SSE3-NEXT: haddpd %xmm1, %xmm0 25 ; SSE3-NEXT: retq 26 ; 27 ; AVX-LABEL: haddpd2: 28 ; AVX: # %bb.0: 29 ; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 30 ; AVX-NEXT: retq 31 %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 2> 32 %b = shufflevector <2 x double> %y, <2 x double> %x, <2 x i32> <i32 2, i32 1> 33 %r = fadd <2 x double> %a, %b 34 ret <2 x double> %r 35 } 36 37 define <2 x double> @haddpd3(<2 x double> %x) { 38 ; SSE3-LABEL: haddpd3: 39 ; SSE3: # %bb.0: 40 ; SSE3-NEXT: haddpd %xmm0, %xmm0 41 ; SSE3-NEXT: retq 42 ; 43 ; AVX-LABEL: haddpd3: 44 ; AVX: # %bb.0: 45 ; AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 46 ; AVX-NEXT: retq 47 %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef> 48 %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 undef> 49 %r = fadd <2 x double> %a, %b 50 ret <2 x double> %r 51 } 52 53 define <4 x float> @haddps1(<4 x float> %x, <4 x float> %y) { 54 ; SSE3-LABEL: haddps1: 55 ; SSE3: # %bb.0: 56 ; SSE3-NEXT: haddps %xmm1, %xmm0 57 ; SSE3-NEXT: retq 58 ; 59 ; AVX-LABEL: haddps1: 60 ; AVX: # %bb.0: 61 ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 62 ; AVX-NEXT: retq 63 %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 64 %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 65 %r = fadd <4 x float> %a, %b 66 ret <4 x float> %r 67 } 68 69 define <4 x float> @haddps2(<4 x float> %x, <4 x float> %y) { 70 ; SSE3-LABEL: haddps2: 71 ; SSE3: # %bb.0: 72 ; SSE3-NEXT: haddps %xmm1, %xmm0 73 ; SSE3-NEXT: retq 74 ; 75 ; AVX-LABEL: haddps2: 76 ; AVX: # %bb.0: 77 ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 78 ; AVX-NEXT: retq 79 %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 2, i32 5, i32 6> 80 %b = shufflevector <4 x float> %y, <4 x float> %x, <4 x i32> <i32 4, i32 7, i32 0, i32 3> 81 %r = fadd <4 x float> %a, %b 82 ret <4 x float> %r 83 } 84 85 define <4 x float> @haddps3(<4 x float> %x) { 86 ; SSE3-LABEL: haddps3: 87 ; SSE3: # %bb.0: 88 ; SSE3-NEXT: haddps %xmm0, %xmm0 89 ; SSE3-NEXT: retq 90 ; 91 ; AVX-LABEL: haddps3: 92 ; AVX: # %bb.0: 93 ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 94 ; AVX-NEXT: retq 95 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6> 96 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7> 97 %r = fadd <4 x float> %a, %b 98 ret <4 x float> %r 99 } 100 101 define <4 x float> @haddps4(<4 x float> %x) { 102 ; SSE3-LABEL: haddps4: 103 ; SSE3: # %bb.0: 104 ; SSE3-NEXT: haddps %xmm0, %xmm0 105 ; SSE3-NEXT: retq 106 ; 107 ; AVX-LABEL: haddps4: 108 ; AVX: # %bb.0: 109 ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 110 ; AVX-NEXT: retq 111 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef> 112 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 113 %r = fadd <4 x float> %a, %b 114 ret <4 x float> %r 115 } 116 117 define <4 x float> @haddps5(<4 x float> %x) { 118 ; SSE3-LABEL: haddps5: 119 ; SSE3: # %bb.0: 120 ; SSE3-NEXT: haddps %xmm0, %xmm0 121 ; SSE3-NEXT: retq 122 ; 123 ; AVX-LABEL: haddps5: 124 ; AVX: # %bb.0: 125 ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 126 ; AVX-NEXT: retq 127 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 3, i32 undef, i32 undef> 128 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 undef, i32 undef> 129 %r = fadd <4 x float> %a, %b 130 ret <4 x float> %r 131 } 132 133 define <4 x float> @haddps6(<4 x float> %x) { 134 ; SSE3-LABEL: haddps6: 135 ; SSE3: # %bb.0: 136 ; SSE3-NEXT: haddps %xmm0, %xmm0 137 ; SSE3-NEXT: retq 138 ; 139 ; AVX-LABEL: haddps6: 140 ; AVX: # %bb.0: 141 ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 142 ; AVX-NEXT: retq 143 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 144 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 145 %r = fadd <4 x float> %a, %b 146 ret <4 x float> %r 147 } 148 149 define <4 x float> @haddps7(<4 x float> %x) { 150 ; SSE3-LABEL: haddps7: 151 ; SSE3: # %bb.0: 152 ; SSE3-NEXT: haddps %xmm0, %xmm0 153 ; SSE3-NEXT: retq 154 ; 155 ; AVX-LABEL: haddps7: 156 ; AVX: # %bb.0: 157 ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 158 ; AVX-NEXT: retq 159 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef> 160 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 undef> 161 %r = fadd <4 x float> %a, %b 162 ret <4 x float> %r 163 } 164 165 define <2 x double> @hsubpd1(<2 x double> %x, <2 x double> %y) { 166 ; SSE3-LABEL: hsubpd1: 167 ; SSE3: # %bb.0: 168 ; SSE3-NEXT: hsubpd %xmm1, %xmm0 169 ; SSE3-NEXT: retq 170 ; 171 ; AVX-LABEL: hsubpd1: 172 ; AVX: # %bb.0: 173 ; AVX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 174 ; AVX-NEXT: retq 175 %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2> 176 %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 3> 177 %r = fsub <2 x double> %a, %b 178 ret <2 x double> %r 179 } 180 181 define <2 x double> @hsubpd2(<2 x double> %x) { 182 ; SSE3-LABEL: hsubpd2: 183 ; SSE3: # %bb.0: 184 ; SSE3-NEXT: hsubpd %xmm0, %xmm0 185 ; SSE3-NEXT: retq 186 ; 187 ; AVX-LABEL: hsubpd2: 188 ; AVX: # %bb.0: 189 ; AVX-NEXT: vhsubpd %xmm0, %xmm0, %xmm0 190 ; AVX-NEXT: retq 191 %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef> 192 %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 undef> 193 %r = fsub <2 x double> %a, %b 194 ret <2 x double> %r 195 } 196 197 define <4 x float> @hsubps1(<4 x float> %x, <4 x float> %y) { 198 ; SSE3-LABEL: hsubps1: 199 ; SSE3: # %bb.0: 200 ; SSE3-NEXT: hsubps %xmm1, %xmm0 201 ; SSE3-NEXT: retq 202 ; 203 ; AVX-LABEL: hsubps1: 204 ; AVX: # %bb.0: 205 ; AVX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 206 ; AVX-NEXT: retq 207 %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 208 %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 209 %r = fsub <4 x float> %a, %b 210 ret <4 x float> %r 211 } 212 213 define <4 x float> @hsubps2(<4 x float> %x) { 214 ; SSE3-LABEL: hsubps2: 215 ; SSE3: # %bb.0: 216 ; SSE3-NEXT: hsubps %xmm0, %xmm0 217 ; SSE3-NEXT: retq 218 ; 219 ; AVX-LABEL: hsubps2: 220 ; AVX: # %bb.0: 221 ; AVX-NEXT: vhsubps %xmm0, %xmm0, %xmm0 222 ; AVX-NEXT: retq 223 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6> 224 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7> 225 %r = fsub <4 x float> %a, %b 226 ret <4 x float> %r 227 } 228 229 define <4 x float> @hsubps3(<4 x float> %x) { 230 ; SSE3-LABEL: hsubps3: 231 ; SSE3: # %bb.0: 232 ; SSE3-NEXT: hsubps %xmm0, %xmm0 233 ; SSE3-NEXT: retq 234 ; 235 ; AVX-LABEL: hsubps3: 236 ; AVX: # %bb.0: 237 ; AVX-NEXT: vhsubps %xmm0, %xmm0, %xmm0 238 ; AVX-NEXT: retq 239 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef> 240 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 241 %r = fsub <4 x float> %a, %b 242 ret <4 x float> %r 243 } 244 245 define <4 x float> @hsubps4(<4 x float> %x) { 246 ; SSE3-LABEL: hsubps4: 247 ; SSE3: # %bb.0: 248 ; SSE3-NEXT: hsubps %xmm0, %xmm0 249 ; SSE3-NEXT: retq 250 ; 251 ; AVX-LABEL: hsubps4: 252 ; AVX: # %bb.0: 253 ; AVX-NEXT: vhsubps %xmm0, %xmm0, %xmm0 254 ; AVX-NEXT: retq 255 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 256 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 257 %r = fsub <4 x float> %a, %b 258 ret <4 x float> %r 259 } 260 261 define <8 x float> @vhaddps1(<8 x float> %x, <8 x float> %y) { 262 ; SSE3-LABEL: vhaddps1: 263 ; SSE3: # %bb.0: 264 ; SSE3-NEXT: haddps %xmm2, %xmm0 265 ; SSE3-NEXT: haddps %xmm3, %xmm1 266 ; SSE3-NEXT: retq 267 ; 268 ; AVX-LABEL: vhaddps1: 269 ; AVX: # %bb.0: 270 ; AVX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 271 ; AVX-NEXT: retq 272 %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14> 273 %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15> 274 %r = fadd <8 x float> %a, %b 275 ret <8 x float> %r 276 } 277 278 define <8 x float> @vhaddps2(<8 x float> %x, <8 x float> %y) { 279 ; SSE3-LABEL: vhaddps2: 280 ; SSE3: # %bb.0: 281 ; SSE3-NEXT: haddps %xmm2, %xmm0 282 ; SSE3-NEXT: haddps %xmm3, %xmm1 283 ; SSE3-NEXT: retq 284 ; 285 ; AVX-LABEL: vhaddps2: 286 ; AVX: # %bb.0: 287 ; AVX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 288 ; AVX-NEXT: retq 289 %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 5, i32 6, i32 13, i32 14> 290 %b = shufflevector <8 x float> %y, <8 x float> %x, <8 x i32> <i32 8, i32 11, i32 0, i32 3, i32 12, i32 15, i32 4, i32 7> 291 %r = fadd <8 x float> %a, %b 292 ret <8 x float> %r 293 } 294 295 define <8 x float> @vhaddps3(<8 x float> %x) { 296 ; SSE3-LABEL: vhaddps3: 297 ; SSE3: # %bb.0: 298 ; SSE3-NEXT: haddps %xmm0, %xmm0 299 ; SSE3-NEXT: haddps %xmm1, %xmm1 300 ; SSE3-NEXT: retq 301 ; 302 ; AVX-LABEL: vhaddps3: 303 ; AVX: # %bb.0: 304 ; AVX-NEXT: vhaddps %ymm0, %ymm0, %ymm0 305 ; AVX-NEXT: retq 306 %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14> 307 %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15> 308 %r = fadd <8 x float> %a, %b 309 ret <8 x float> %r 310 } 311 312 define <8 x float> @vhsubps1(<8 x float> %x, <8 x float> %y) { 313 ; SSE3-LABEL: vhsubps1: 314 ; SSE3: # %bb.0: 315 ; SSE3-NEXT: hsubps %xmm2, %xmm0 316 ; SSE3-NEXT: hsubps %xmm3, %xmm1 317 ; SSE3-NEXT: retq 318 ; 319 ; AVX-LABEL: vhsubps1: 320 ; AVX: # %bb.0: 321 ; AVX-NEXT: vhsubps %ymm1, %ymm0, %ymm0 322 ; AVX-NEXT: retq 323 %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14> 324 %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15> 325 %r = fsub <8 x float> %a, %b 326 ret <8 x float> %r 327 } 328 329 define <8 x float> @vhsubps3(<8 x float> %x) { 330 ; SSE3-LABEL: vhsubps3: 331 ; SSE3: # %bb.0: 332 ; SSE3-NEXT: hsubps %xmm0, %xmm0 333 ; SSE3-NEXT: hsubps %xmm1, %xmm1 334 ; SSE3-NEXT: retq 335 ; 336 ; AVX-LABEL: vhsubps3: 337 ; AVX: # %bb.0: 338 ; AVX-NEXT: vhsubps %ymm0, %ymm0, %ymm0 339 ; AVX-NEXT: retq 340 %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14> 341 %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15> 342 %r = fsub <8 x float> %a, %b 343 ret <8 x float> %r 344 } 345 346 define <4 x double> @vhaddpd1(<4 x double> %x, <4 x double> %y) { 347 ; SSE3-LABEL: vhaddpd1: 348 ; SSE3: # %bb.0: 349 ; SSE3-NEXT: haddpd %xmm2, %xmm0 350 ; SSE3-NEXT: haddpd %xmm3, %xmm1 351 ; SSE3-NEXT: retq 352 ; 353 ; AVX-LABEL: vhaddpd1: 354 ; AVX: # %bb.0: 355 ; AVX-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 356 ; AVX-NEXT: retq 357 %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 358 %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 359 %r = fadd <4 x double> %a, %b 360 ret <4 x double> %r 361 } 362 363 define <4 x double> @vhsubpd1(<4 x double> %x, <4 x double> %y) { 364 ; SSE3-LABEL: vhsubpd1: 365 ; SSE3: # %bb.0: 366 ; SSE3-NEXT: hsubpd %xmm2, %xmm0 367 ; SSE3-NEXT: hsubpd %xmm3, %xmm1 368 ; SSE3-NEXT: retq 369 ; 370 ; AVX-LABEL: vhsubpd1: 371 ; AVX: # %bb.0: 372 ; AVX-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 373 ; AVX-NEXT: retq 374 %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 375 %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 376 %r = fsub <4 x double> %a, %b 377 ret <4 x double> %r 378 } 379 380 define <2 x float> @haddps_v2f32(<4 x float> %v0) { 381 ; SSE3-LABEL: haddps_v2f32: 382 ; SSE3: # %bb.0: 383 ; SSE3-NEXT: haddps %xmm0, %xmm0 384 ; SSE3-NEXT: retq 385 ; 386 ; AVX-LABEL: haddps_v2f32: 387 ; AVX: # %bb.0: 388 ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 389 ; AVX-NEXT: retq 390 %v0.0 = extractelement <4 x float> %v0, i32 0 391 %v0.1 = extractelement <4 x float> %v0, i32 1 392 %v0.2 = extractelement <4 x float> %v0, i32 2 393 %v0.3 = extractelement <4 x float> %v0, i32 3 394 %op0 = fadd float %v0.0, %v0.1 395 %op1 = fadd float %v0.2, %v0.3 396 %res0 = insertelement <2 x float> undef, float %op0, i32 0 397 %res1 = insertelement <2 x float> %res0, float %op1, i32 1 398 ret <2 x float> %res1 399 } 400 401