1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx | FileCheck %s --check-prefixes=X86,X86-MMX 3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE2 4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSSE3 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE2 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSSE3 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx512f | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512 10 11 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) 12 13 ; 14 ; v2i32 15 ; 16 17 define void @build_v2i32_01(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind { 18 ; X86-LABEL: build_v2i32_01: 19 ; X86: # %bb.0: 20 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 21 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 22 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1 23 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 24 ; X86-NEXT: paddd %mm1, %mm1 25 ; X86-NEXT: movq %mm1, (%eax) 26 ; X86-NEXT: retl 27 ; 28 ; X64-LABEL: build_v2i32_01: 29 ; X64: # %bb.0: 30 ; X64-NEXT: movd %edx, %mm0 31 ; X64-NEXT: movd %esi, %mm1 32 ; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 33 ; X64-NEXT: paddd %mm1, %mm1 34 ; X64-NEXT: movq %mm1, (%rdi) 35 ; X64-NEXT: retq 36 %1 = insertelement <2 x i32> undef, i32 %a0, i32 0 37 %2 = insertelement <2 x i32> %1, i32 %a1, i32 1 38 %3 = bitcast <2 x i32> %2 to x86_mmx 39 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) 40 store x86_mmx %4, x86_mmx *%p0 41 ret void 42 } 43 44 define void @build_v2i32_0z(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind { 45 ; X86-LABEL: build_v2i32_0z: 46 ; X86: # %bb.0: 47 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 48 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 49 ; X86-NEXT: paddd %mm0, %mm0 50 ; X86-NEXT: movq %mm0, (%eax) 51 ; X86-NEXT: retl 52 ; 53 ; X64-LABEL: build_v2i32_0z: 54 ; X64: # %bb.0: 55 ; X64-NEXT: movd %esi, %mm0 56 ; X64-NEXT: paddd %mm0, %mm0 57 ; X64-NEXT: movq %mm0, (%rdi) 58 ; X64-NEXT: retq 59 %1 = insertelement <2 x i32> undef, i32 %a0, i32 0 60 %2 = insertelement <2 x i32> %1, i32 0, i32 1 61 %3 = bitcast <2 x i32> %2 to x86_mmx 62 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) 63 store x86_mmx %4, x86_mmx *%p0 64 ret void 65 } 66 67 define void @build_v2i32_u1(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind { 68 ; X86-MMX-LABEL: build_v2i32_u1: 69 ; X86-MMX: # %bb.0: 70 ; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax 71 ; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0 72 ; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0] 73 ; X86-MMX-NEXT: paddd %mm0, %mm0 74 ; X86-MMX-NEXT: movq %mm0, (%eax) 75 ; X86-MMX-NEXT: retl 76 ; 77 ; X86-SSE-LABEL: build_v2i32_u1: 78 ; X86-SSE: # %bb.0: 79 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 80 ; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %mm0 81 ; X86-SSE-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1] 82 ; X86-SSE-NEXT: paddd %mm0, %mm0 83 ; X86-SSE-NEXT: movq %mm0, (%eax) 84 ; X86-SSE-NEXT: retl 85 ; 86 ; X64-LABEL: build_v2i32_u1: 87 ; X64: # %bb.0: 88 ; X64-NEXT: movd %edx, %mm0 89 ; X64-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1] 90 ; X64-NEXT: paddd %mm0, %mm0 91 ; X64-NEXT: movq %mm0, (%rdi) 92 ; X64-NEXT: retq 93 %1 = insertelement <2 x i32> undef, i32 undef, i32 0 94 %2 = insertelement <2 x i32> %1, i32 %a1, i32 1 95 %3 = bitcast <2 x i32> %2 to x86_mmx 96 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) 97 store x86_mmx %4, x86_mmx *%p0 98 ret void 99 } 100 101 define void @build_v2i32_z1(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind { 102 ; X86-LABEL: build_v2i32_z1: 103 ; X86: # %bb.0: 104 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 105 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 106 ; X86-NEXT: pxor %mm1, %mm1 107 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 108 ; X86-NEXT: paddd %mm1, %mm1 109 ; X86-NEXT: movq %mm1, (%eax) 110 ; X86-NEXT: retl 111 ; 112 ; X64-LABEL: build_v2i32_z1: 113 ; X64: # %bb.0: 114 ; X64-NEXT: movd %edx, %mm0 115 ; X64-NEXT: pxor %mm1, %mm1 116 ; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 117 ; X64-NEXT: paddd %mm1, %mm1 118 ; X64-NEXT: movq %mm1, (%rdi) 119 ; X64-NEXT: retq 120 %1 = insertelement <2 x i32> undef, i32 0, i32 0 121 %2 = insertelement <2 x i32> %1, i32 %a1, i32 1 122 %3 = bitcast <2 x i32> %2 to x86_mmx 123 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) 124 store x86_mmx %4, x86_mmx *%p0 125 ret void 126 } 127 128 define void @build_v2i32_00(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind { 129 ; X86-MMX-LABEL: build_v2i32_00: 130 ; X86-MMX: # %bb.0: 131 ; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax 132 ; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0 133 ; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0] 134 ; X86-MMX-NEXT: paddd %mm0, %mm0 135 ; X86-MMX-NEXT: movq %mm0, (%eax) 136 ; X86-MMX-NEXT: retl 137 ; 138 ; X86-SSE-LABEL: build_v2i32_00: 139 ; X86-SSE: # %bb.0: 140 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 141 ; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %mm0 142 ; X86-SSE-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1] 143 ; X86-SSE-NEXT: paddd %mm0, %mm0 144 ; X86-SSE-NEXT: movq %mm0, (%eax) 145 ; X86-SSE-NEXT: retl 146 ; 147 ; X64-LABEL: build_v2i32_00: 148 ; X64: # %bb.0: 149 ; X64-NEXT: movd %esi, %mm0 150 ; X64-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1] 151 ; X64-NEXT: paddd %mm0, %mm0 152 ; X64-NEXT: movq %mm0, (%rdi) 153 ; X64-NEXT: retq 154 %1 = insertelement <2 x i32> undef, i32 %a0, i32 0 155 %2 = insertelement <2 x i32> %1, i32 %a0, i32 1 156 %3 = bitcast <2 x i32> %2 to x86_mmx 157 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) 158 store x86_mmx %4, x86_mmx *%p0 159 ret void 160 } 161 162 ; 163 ; v4i16 164 ; 165 166 define void @build_v4i16_0123(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind { 167 ; X86-LABEL: build_v4i16_0123: 168 ; X86: # %bb.0: 169 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 170 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 171 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1 172 ; X86-NEXT: punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1] 173 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 174 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm2 175 ; X86-NEXT: punpcklwd %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1] 176 ; X86-NEXT: punpckldq %mm1, %mm2 # mm2 = mm2[0],mm1[0] 177 ; X86-NEXT: paddd %mm2, %mm2 178 ; X86-NEXT: movq %mm2, (%eax) 179 ; X86-NEXT: retl 180 ; 181 ; X64-LABEL: build_v4i16_0123: 182 ; X64: # %bb.0: 183 ; X64-NEXT: movd %r8d, %mm0 184 ; X64-NEXT: movd %ecx, %mm1 185 ; X64-NEXT: punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1] 186 ; X64-NEXT: movd %edx, %mm0 187 ; X64-NEXT: movd %esi, %mm2 188 ; X64-NEXT: punpcklwd %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1] 189 ; X64-NEXT: punpckldq %mm1, %mm2 # mm2 = mm2[0],mm1[0] 190 ; X64-NEXT: paddd %mm2, %mm2 191 ; X64-NEXT: movq %mm2, (%rdi) 192 ; X64-NEXT: retq 193 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0 194 %2 = insertelement <4 x i16> %1, i16 %a1, i32 1 195 %3 = insertelement <4 x i16> %2, i16 %a2, i32 2 196 %4 = insertelement <4 x i16> %3, i16 %a3, i32 3 197 %5 = bitcast <4 x i16> %4 to x86_mmx 198 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) 199 store x86_mmx %6, x86_mmx *%p0 200 ret void 201 } 202 203 define void @build_v4i16_01zz(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind { 204 ; X86-LABEL: build_v4i16_01zz: 205 ; X86: # %bb.0: 206 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 207 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 208 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1 209 ; X86-NEXT: punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1] 210 ; X86-NEXT: pxor %mm0, %mm0 211 ; X86-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1] 212 ; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 213 ; X86-NEXT: paddd %mm1, %mm1 214 ; X86-NEXT: movq %mm1, (%eax) 215 ; X86-NEXT: retl 216 ; 217 ; X64-LABEL: build_v4i16_01zz: 218 ; X64: # %bb.0: 219 ; X64-NEXT: movd %edx, %mm0 220 ; X64-NEXT: movd %esi, %mm1 221 ; X64-NEXT: punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1] 222 ; X64-NEXT: pxor %mm0, %mm0 223 ; X64-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1] 224 ; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 225 ; X64-NEXT: paddd %mm1, %mm1 226 ; X64-NEXT: movq %mm1, (%rdi) 227 ; X64-NEXT: retq 228 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0 229 %2 = insertelement <4 x i16> %1, i16 %a1, i32 1 230 %3 = insertelement <4 x i16> %2, i16 0, i32 2 231 %4 = insertelement <4 x i16> %3, i16 0, i32 3 232 %5 = bitcast <4 x i16> %4 to x86_mmx 233 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) 234 store x86_mmx %6, x86_mmx *%p0 235 ret void 236 } 237 238 define void @build_v4i16_0uuz(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind { 239 ; X86-LABEL: build_v4i16_0uuz: 240 ; X86: # %bb.0: 241 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 242 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 243 ; X86-NEXT: paddd %mm0, %mm0 244 ; X86-NEXT: movq %mm0, (%eax) 245 ; X86-NEXT: retl 246 ; 247 ; X64-LABEL: build_v4i16_0uuz: 248 ; X64: # %bb.0: 249 ; X64-NEXT: movd %esi, %mm0 250 ; X64-NEXT: paddd %mm0, %mm0 251 ; X64-NEXT: movq %mm0, (%rdi) 252 ; X64-NEXT: retq 253 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0 254 %2 = insertelement <4 x i16> %1, i16 undef, i32 1 255 %3 = insertelement <4 x i16> %2, i16 undef, i32 2 256 %4 = insertelement <4 x i16> %3, i16 0, i32 3 257 %5 = bitcast <4 x i16> %4 to x86_mmx 258 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) 259 store x86_mmx %6, x86_mmx *%p0 260 ret void 261 } 262 263 define void @build_v4i16_0zuz(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind { 264 ; X86-LABEL: build_v4i16_0zuz: 265 ; X86: # %bb.0: 266 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 267 ; X86-NEXT: movd %eax, %mm0 268 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 269 ; X86-NEXT: paddd %mm0, %mm0 270 ; X86-NEXT: movq %mm0, (%eax) 271 ; X86-NEXT: retl 272 ; 273 ; X64-LABEL: build_v4i16_0zuz: 274 ; X64: # %bb.0: 275 ; X64-NEXT: movzwl %si, %eax 276 ; X64-NEXT: movd %eax, %mm0 277 ; X64-NEXT: paddd %mm0, %mm0 278 ; X64-NEXT: movq %mm0, (%rdi) 279 ; X64-NEXT: retq 280 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0 281 %2 = insertelement <4 x i16> %1, i16 0, i32 1 282 %3 = insertelement <4 x i16> %2, i16 undef, i32 2 283 %4 = insertelement <4 x i16> %3, i16 0, i32 3 284 %5 = bitcast <4 x i16> %4 to x86_mmx 285 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) 286 store x86_mmx %6, x86_mmx *%p0 287 ret void 288 } 289 290 define void @build_v4i16_012u(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind { 291 ; X86-LABEL: build_v4i16_012u: 292 ; X86: # %bb.0: 293 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 294 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 295 ; X86-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1] 296 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1 297 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm2 298 ; X86-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1] 299 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 300 ; X86-NEXT: paddd %mm2, %mm2 301 ; X86-NEXT: movq %mm2, (%eax) 302 ; X86-NEXT: retl 303 ; 304 ; X64-LABEL: build_v4i16_012u: 305 ; X64: # %bb.0: 306 ; X64-NEXT: movd %ecx, %mm0 307 ; X64-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1] 308 ; X64-NEXT: movd %edx, %mm1 309 ; X64-NEXT: movd %esi, %mm2 310 ; X64-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1] 311 ; X64-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 312 ; X64-NEXT: paddd %mm2, %mm2 313 ; X64-NEXT: movq %mm2, (%rdi) 314 ; X64-NEXT: retq 315 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0 316 %2 = insertelement <4 x i16> %1, i16 %a1, i32 1 317 %3 = insertelement <4 x i16> %2, i16 %a2, i32 2 318 %4 = insertelement <4 x i16> %3, i16 undef, i32 3 319 %5 = bitcast <4 x i16> %4 to x86_mmx 320 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) 321 store x86_mmx %6, x86_mmx *%p0 322 ret void 323 } 324 325 define void @build_v4i16_0u00(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind { 326 ; X86-MMX-LABEL: build_v4i16_0u00: 327 ; X86-MMX: # %bb.0: 328 ; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax 329 ; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0 330 ; X86-MMX-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1] 331 ; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0] 332 ; X86-MMX-NEXT: paddd %mm0, %mm0 333 ; X86-MMX-NEXT: movq %mm0, (%eax) 334 ; X86-MMX-NEXT: retl 335 ; 336 ; X86-SSE-LABEL: build_v4i16_0u00: 337 ; X86-SSE: # %bb.0: 338 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 339 ; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %mm0 340 ; X86-SSE-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] 341 ; X86-SSE-NEXT: paddd %mm0, %mm0 342 ; X86-SSE-NEXT: movq %mm0, (%eax) 343 ; X86-SSE-NEXT: retl 344 ; 345 ; X64-LABEL: build_v4i16_0u00: 346 ; X64: # %bb.0: 347 ; X64-NEXT: movd %esi, %mm0 348 ; X64-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] 349 ; X64-NEXT: paddd %mm0, %mm0 350 ; X64-NEXT: movq %mm0, (%rdi) 351 ; X64-NEXT: retq 352 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0 353 %2 = insertelement <4 x i16> %1, i16 undef, i32 1 354 %3 = insertelement <4 x i16> %2, i16 %a0, i32 2 355 %4 = insertelement <4 x i16> %3, i16 %a0, i32 3 356 %5 = bitcast <4 x i16> %4 to x86_mmx 357 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) 358 store x86_mmx %6, x86_mmx *%p0 359 ret void 360 } 361 362 ; 363 ; v8i8 364 ; 365 366 define void @build_v8i8_01234567(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind { 367 ; X86-LABEL: build_v8i8_01234567: 368 ; X86: # %bb.0: 369 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 370 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 371 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1 372 ; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 373 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 374 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm2 375 ; X86-NEXT: punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3] 376 ; X86-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1] 377 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 378 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1 379 ; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 380 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 381 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm3 382 ; X86-NEXT: punpcklbw %mm0, %mm3 # mm3 = mm3[0],mm0[0],mm3[1],mm0[1],mm3[2],mm0[2],mm3[3],mm0[3] 383 ; X86-NEXT: punpcklwd %mm1, %mm3 # mm3 = mm3[0],mm1[0],mm3[1],mm1[1] 384 ; X86-NEXT: punpckldq %mm2, %mm3 # mm3 = mm3[0],mm2[0] 385 ; X86-NEXT: paddd %mm3, %mm3 386 ; X86-NEXT: movq %mm3, (%eax) 387 ; X86-NEXT: retl 388 ; 389 ; X64-LABEL: build_v8i8_01234567: 390 ; X64: # %bb.0: 391 ; X64-NEXT: movd {{[0-9]+}}(%rsp), %mm0 392 ; X64-NEXT: movd {{[0-9]+}}(%rsp), %mm1 393 ; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 394 ; X64-NEXT: movd %r9d, %mm0 395 ; X64-NEXT: movd {{[0-9]+}}(%rsp), %mm2 396 ; X64-NEXT: punpcklbw %mm2, %mm0 # mm0 = mm0[0],mm2[0],mm0[1],mm2[1],mm0[2],mm2[2],mm0[3],mm2[3] 397 ; X64-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] 398 ; X64-NEXT: movd %r8d, %mm1 399 ; X64-NEXT: movd %ecx, %mm2 400 ; X64-NEXT: punpcklbw %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1],mm2[2],mm1[2],mm2[3],mm1[3] 401 ; X64-NEXT: movd %edx, %mm1 402 ; X64-NEXT: movd %esi, %mm3 403 ; X64-NEXT: punpcklbw %mm1, %mm3 # mm3 = mm3[0],mm1[0],mm3[1],mm1[1],mm3[2],mm1[2],mm3[3],mm1[3] 404 ; X64-NEXT: punpcklwd %mm2, %mm3 # mm3 = mm3[0],mm2[0],mm3[1],mm2[1] 405 ; X64-NEXT: punpckldq %mm0, %mm3 # mm3 = mm3[0],mm0[0] 406 ; X64-NEXT: paddd %mm3, %mm3 407 ; X64-NEXT: movq %mm3, (%rdi) 408 ; X64-NEXT: retq 409 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0 410 %2 = insertelement <8 x i8> %1, i8 %a1, i32 1 411 %3 = insertelement <8 x i8> %2, i8 %a2, i32 2 412 %4 = insertelement <8 x i8> %3, i8 %a3, i32 3 413 %5 = insertelement <8 x i8> %4, i8 %a4, i32 4 414 %6 = insertelement <8 x i8> %5, i8 %a5, i32 5 415 %7 = insertelement <8 x i8> %6, i8 %a6, i32 6 416 %8 = insertelement <8 x i8> %7, i8 %a7, i32 7 417 %9 = bitcast <8 x i8> %8 to x86_mmx 418 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9) 419 store x86_mmx %10, x86_mmx *%p0 420 ret void 421 } 422 423 define void @build_v8i8_0u2345z7(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind { 424 ; X86-LABEL: build_v8i8_0u2345z7: 425 ; X86: # %bb.0: 426 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 427 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 428 ; X86-NEXT: pxor %mm1, %mm1 429 ; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 430 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 431 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm2 432 ; X86-NEXT: punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3] 433 ; X86-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1] 434 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 435 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1 436 ; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 437 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 438 ; X86-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3] 439 ; X86-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] 440 ; X86-NEXT: punpckldq %mm2, %mm0 # mm0 = mm0[0],mm2[0] 441 ; X86-NEXT: paddd %mm0, %mm0 442 ; X86-NEXT: movq %mm0, (%eax) 443 ; X86-NEXT: retl 444 ; 445 ; X64-LABEL: build_v8i8_0u2345z7: 446 ; X64: # %bb.0: 447 ; X64-NEXT: movd {{[0-9]+}}(%rsp), %mm0 448 ; X64-NEXT: pxor %mm1, %mm1 449 ; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 450 ; X64-NEXT: movd %r9d, %mm0 451 ; X64-NEXT: movd {{[0-9]+}}(%rsp), %mm2 452 ; X64-NEXT: punpcklbw %mm2, %mm0 # mm0 = mm0[0],mm2[0],mm0[1],mm2[1],mm0[2],mm2[2],mm0[3],mm2[3] 453 ; X64-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] 454 ; X64-NEXT: movd %r8d, %mm1 455 ; X64-NEXT: movd %ecx, %mm2 456 ; X64-NEXT: punpcklbw %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1],mm2[2],mm1[2],mm2[3],mm1[3] 457 ; X64-NEXT: movd %esi, %mm1 458 ; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 459 ; X64-NEXT: punpcklwd %mm2, %mm1 # mm1 = mm1[0],mm2[0],mm1[1],mm2[1] 460 ; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 461 ; X64-NEXT: paddd %mm1, %mm1 462 ; X64-NEXT: movq %mm1, (%rdi) 463 ; X64-NEXT: retq 464 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0 465 %2 = insertelement <8 x i8> %1, i8 undef, i32 1 466 %3 = insertelement <8 x i8> %2, i8 %a2, i32 2 467 %4 = insertelement <8 x i8> %3, i8 %a3, i32 3 468 %5 = insertelement <8 x i8> %4, i8 %a4, i32 4 469 %6 = insertelement <8 x i8> %5, i8 %a5, i32 5 470 %7 = insertelement <8 x i8> %6, i8 0, i32 6 471 %8 = insertelement <8 x i8> %7, i8 %a7, i32 7 472 %9 = bitcast <8 x i8> %8 to x86_mmx 473 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9) 474 store x86_mmx %10, x86_mmx *%p0 475 ret void 476 } 477 478 define void @build_v8i8_0123zzzu(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind { 479 ; X86-LABEL: build_v8i8_0123zzzu: 480 ; X86: # %bb.0: 481 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 482 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 483 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1 484 ; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 485 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 486 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm2 487 ; X86-NEXT: punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3] 488 ; X86-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1] 489 ; X86-NEXT: pxor %mm0, %mm0 490 ; X86-NEXT: pxor %mm1, %mm1 491 ; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 492 ; X86-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3] 493 ; X86-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] 494 ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 495 ; X86-NEXT: paddd %mm2, %mm2 496 ; X86-NEXT: movq %mm2, (%eax) 497 ; X86-NEXT: retl 498 ; 499 ; X64-LABEL: build_v8i8_0123zzzu: 500 ; X64: # %bb.0: 501 ; X64-NEXT: movd %r8d, %mm0 502 ; X64-NEXT: movd %ecx, %mm1 503 ; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 504 ; X64-NEXT: movd %edx, %mm0 505 ; X64-NEXT: movd %esi, %mm2 506 ; X64-NEXT: punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3] 507 ; X64-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1] 508 ; X64-NEXT: pxor %mm0, %mm0 509 ; X64-NEXT: pxor %mm1, %mm1 510 ; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 511 ; X64-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3] 512 ; X64-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] 513 ; X64-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 514 ; X64-NEXT: paddd %mm2, %mm2 515 ; X64-NEXT: movq %mm2, (%rdi) 516 ; X64-NEXT: retq 517 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0 518 %2 = insertelement <8 x i8> %1, i8 %a1, i32 1 519 %3 = insertelement <8 x i8> %2, i8 %a2, i32 2 520 %4 = insertelement <8 x i8> %3, i8 %a3, i32 3 521 %5 = insertelement <8 x i8> %4, i8 0, i32 4 522 %6 = insertelement <8 x i8> %5, i8 0, i32 5 523 %7 = insertelement <8 x i8> %6, i8 0, i32 6 524 %8 = insertelement <8 x i8> %7, i8 undef, i32 7 525 %9 = bitcast <8 x i8> %8 to x86_mmx 526 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9) 527 store x86_mmx %10, x86_mmx *%p0 528 ret void 529 } 530 531 define void @build_v8i8_0uuuuzzz(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind { 532 ; X86-LABEL: build_v8i8_0uuuuzzz: 533 ; X86: # %bb.0: 534 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 535 ; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 536 ; X86-NEXT: paddd %mm0, %mm0 537 ; X86-NEXT: movq %mm0, (%eax) 538 ; X86-NEXT: retl 539 ; 540 ; X64-LABEL: build_v8i8_0uuuuzzz: 541 ; X64: # %bb.0: 542 ; X64-NEXT: movd %esi, %mm0 543 ; X64-NEXT: paddd %mm0, %mm0 544 ; X64-NEXT: movq %mm0, (%rdi) 545 ; X64-NEXT: retq 546 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0 547 %2 = insertelement <8 x i8> %1, i8 undef, i32 1 548 %3 = insertelement <8 x i8> %2, i8 undef, i32 2 549 %4 = insertelement <8 x i8> %3, i8 undef, i32 3 550 %5 = insertelement <8 x i8> %4, i8 undef, i32 4 551 %6 = insertelement <8 x i8> %5, i8 0, i32 5 552 %7 = insertelement <8 x i8> %6, i8 0, i32 6 553 %8 = insertelement <8 x i8> %7, i8 0, i32 7 554 %9 = bitcast <8 x i8> %8 to x86_mmx 555 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9) 556 store x86_mmx %10, x86_mmx *%p0 557 ret void 558 } 559 560 define void @build_v8i8_0zzzzzzu(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind { 561 ; X86-LABEL: build_v8i8_0zzzzzzu: 562 ; X86: # %bb.0: 563 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 564 ; X86-NEXT: movd %eax, %mm0 565 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 566 ; X86-NEXT: paddd %mm0, %mm0 567 ; X86-NEXT: movq %mm0, (%eax) 568 ; X86-NEXT: retl 569 ; 570 ; X64-LABEL: build_v8i8_0zzzzzzu: 571 ; X64: # %bb.0: 572 ; X64-NEXT: movzbl %sil, %eax 573 ; X64-NEXT: movd %eax, %mm0 574 ; X64-NEXT: paddd %mm0, %mm0 575 ; X64-NEXT: movq %mm0, (%rdi) 576 ; X64-NEXT: retq 577 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0 578 %2 = insertelement <8 x i8> %1, i8 0, i32 1 579 %3 = insertelement <8 x i8> %2, i8 0, i32 2 580 %4 = insertelement <8 x i8> %3, i8 0, i32 3 581 %5 = insertelement <8 x i8> %4, i8 0, i32 4 582 %6 = insertelement <8 x i8> %5, i8 0, i32 5 583 %7 = insertelement <8 x i8> %6, i8 0, i32 6 584 %8 = insertelement <8 x i8> %7, i8 undef, i32 7 585 %9 = bitcast <8 x i8> %8 to x86_mmx 586 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9) 587 store x86_mmx %10, x86_mmx *%p0 588 ret void 589 } 590 591 define void @build_v8i8_00000000(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind { 592 ; X86-MMX-LABEL: build_v8i8_00000000: 593 ; X86-MMX: # %bb.0: 594 ; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax 595 ; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0 596 ; X86-MMX-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3] 597 ; X86-MMX-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1] 598 ; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0] 599 ; X86-MMX-NEXT: paddd %mm0, %mm0 600 ; X86-MMX-NEXT: movq %mm0, (%eax) 601 ; X86-MMX-NEXT: retl 602 ; 603 ; X86-SSE-LABEL: build_v8i8_00000000: 604 ; X86-SSE: # %bb.0: 605 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 606 ; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %mm0 607 ; X86-SSE-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3] 608 ; X86-SSE-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] 609 ; X86-SSE-NEXT: paddd %mm0, %mm0 610 ; X86-SSE-NEXT: movq %mm0, (%eax) 611 ; X86-SSE-NEXT: retl 612 ; 613 ; X64-LABEL: build_v8i8_00000000: 614 ; X64: # %bb.0: 615 ; X64-NEXT: movd %esi, %mm0 616 ; X64-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3] 617 ; X64-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] 618 ; X64-NEXT: paddd %mm0, %mm0 619 ; X64-NEXT: movq %mm0, (%rdi) 620 ; X64-NEXT: retq 621 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0 622 %2 = insertelement <8 x i8> %1, i8 %a0, i32 1 623 %3 = insertelement <8 x i8> %2, i8 %a0, i32 2 624 %4 = insertelement <8 x i8> %3, i8 %a0, i32 3 625 %5 = insertelement <8 x i8> %4, i8 %a0, i32 4 626 %6 = insertelement <8 x i8> %5, i8 %a0, i32 5 627 %7 = insertelement <8 x i8> %6, i8 %a0, i32 6 628 %8 = insertelement <8 x i8> %7, i8 %a0, i32 7 629 %9 = bitcast <8 x i8> %8 to x86_mmx 630 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9) 631 store x86_mmx %10, x86_mmx *%p0 632 ret void 633 } 634 635 ; 636 ; v2f32 637 ; 638 639 define void @build_v2f32_01(x86_mmx *%p0, float %a0, float %a1) nounwind { 640 ; X86-MMX-LABEL: build_v2f32_01: 641 ; X86-MMX: # %bb.0: 642 ; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax 643 ; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0 644 ; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm1 645 ; X86-MMX-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 646 ; X86-MMX-NEXT: paddd %mm1, %mm1 647 ; X86-MMX-NEXT: movq %mm1, (%eax) 648 ; X86-MMX-NEXT: retl 649 ; 650 ; X86-SSE-LABEL: build_v2f32_01: 651 ; X86-SSE: # %bb.0: 652 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 653 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 654 ; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 655 ; X86-SSE-NEXT: movdq2q %xmm1, %mm0 656 ; X86-SSE-NEXT: movdq2q %xmm0, %mm1 657 ; X86-SSE-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 658 ; X86-SSE-NEXT: paddd %mm1, %mm1 659 ; X86-SSE-NEXT: movq %mm1, (%eax) 660 ; X86-SSE-NEXT: retl 661 ; 662 ; X64-LABEL: build_v2f32_01: 663 ; X64: # %bb.0: 664 ; X64-NEXT: movdq2q %xmm1, %mm0 665 ; X64-NEXT: movdq2q %xmm0, %mm1 666 ; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 667 ; X64-NEXT: paddd %mm1, %mm1 668 ; X64-NEXT: movq %mm1, (%rdi) 669 ; X64-NEXT: retq 670 %1 = insertelement <2 x float> undef, float %a0, i32 0 671 %2 = insertelement <2 x float> %1, float %a1, i32 1 672 %3 = bitcast <2 x float> %2 to x86_mmx 673 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) 674 store x86_mmx %4, x86_mmx *%p0 675 ret void 676 } 677 678 define void @build_v2f32_0z(x86_mmx *%p0, float %a0, float %a1) nounwind { 679 ; X86-MMX-LABEL: build_v2f32_0z: 680 ; X86-MMX: # %bb.0: 681 ; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax 682 ; X86-MMX-NEXT: pxor %mm0, %mm0 683 ; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm1 684 ; X86-MMX-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 685 ; X86-MMX-NEXT: paddd %mm1, %mm1 686 ; X86-MMX-NEXT: movq %mm1, (%eax) 687 ; X86-MMX-NEXT: retl 688 ; 689 ; X86-SSE-LABEL: build_v2f32_0z: 690 ; X86-SSE: # %bb.0: 691 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 692 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 693 ; X86-SSE-NEXT: movdq2q %xmm0, %mm0 694 ; X86-SSE-NEXT: pxor %mm1, %mm1 695 ; X86-SSE-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] 696 ; X86-SSE-NEXT: paddd %mm0, %mm0 697 ; X86-SSE-NEXT: movq %mm0, (%eax) 698 ; X86-SSE-NEXT: retl 699 ; 700 ; X64-LABEL: build_v2f32_0z: 701 ; X64: # %bb.0: 702 ; X64-NEXT: movdq2q %xmm0, %mm0 703 ; X64-NEXT: pxor %mm1, %mm1 704 ; X64-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] 705 ; X64-NEXT: paddd %mm0, %mm0 706 ; X64-NEXT: movq %mm0, (%rdi) 707 ; X64-NEXT: retq 708 %1 = insertelement <2 x float> undef, float %a0, i32 0 709 %2 = insertelement <2 x float> %1, float 0.0, i32 1 710 %3 = bitcast <2 x float> %2 to x86_mmx 711 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) 712 store x86_mmx %4, x86_mmx *%p0 713 ret void 714 } 715 716 define void @build_v2f32_u1(x86_mmx *%p0, float %a0, float %a1) nounwind { 717 ; X86-MMX-LABEL: build_v2f32_u1: 718 ; X86-MMX: # %bb.0: 719 ; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax 720 ; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0 721 ; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0] 722 ; X86-MMX-NEXT: paddd %mm0, %mm0 723 ; X86-MMX-NEXT: movq %mm0, (%eax) 724 ; X86-MMX-NEXT: retl 725 ; 726 ; X86-SSE-LABEL: build_v2f32_u1: 727 ; X86-SSE: # %bb.0: 728 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 729 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 730 ; X86-SSE-NEXT: movdq2q %xmm0, %mm0 731 ; X86-SSE-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1] 732 ; X86-SSE-NEXT: paddd %mm0, %mm0 733 ; X86-SSE-NEXT: movq %mm0, (%eax) 734 ; X86-SSE-NEXT: retl 735 ; 736 ; X64-LABEL: build_v2f32_u1: 737 ; X64: # %bb.0: 738 ; X64-NEXT: movdq2q %xmm1, %mm0 739 ; X64-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1] 740 ; X64-NEXT: paddd %mm0, %mm0 741 ; X64-NEXT: movq %mm0, (%rdi) 742 ; X64-NEXT: retq 743 %1 = insertelement <2 x float> undef, float undef, i32 0 744 %2 = insertelement <2 x float> %1, float %a1, i32 1 745 %3 = bitcast <2 x float> %2 to x86_mmx 746 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) 747 store x86_mmx %4, x86_mmx *%p0 748 ret void 749 } 750 751 define void @build_v2f32_z1(x86_mmx *%p0, float %a0, float %a1) nounwind { 752 ; X86-MMX-LABEL: build_v2f32_z1: 753 ; X86-MMX: # %bb.0: 754 ; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax 755 ; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0 756 ; X86-MMX-NEXT: pxor %mm1, %mm1 757 ; X86-MMX-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 758 ; X86-MMX-NEXT: paddd %mm1, %mm1 759 ; X86-MMX-NEXT: movq %mm1, (%eax) 760 ; X86-MMX-NEXT: retl 761 ; 762 ; X86-SSE-LABEL: build_v2f32_z1: 763 ; X86-SSE: # %bb.0: 764 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 765 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 766 ; X86-SSE-NEXT: movdq2q %xmm0, %mm0 767 ; X86-SSE-NEXT: pxor %mm1, %mm1 768 ; X86-SSE-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 769 ; X86-SSE-NEXT: paddd %mm1, %mm1 770 ; X86-SSE-NEXT: movq %mm1, (%eax) 771 ; X86-SSE-NEXT: retl 772 ; 773 ; X64-LABEL: build_v2f32_z1: 774 ; X64: # %bb.0: 775 ; X64-NEXT: movdq2q %xmm1, %mm0 776 ; X64-NEXT: pxor %mm1, %mm1 777 ; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 778 ; X64-NEXT: paddd %mm1, %mm1 779 ; X64-NEXT: movq %mm1, (%rdi) 780 ; X64-NEXT: retq 781 %1 = insertelement <2 x float> undef, float 0.0, i32 0 782 %2 = insertelement <2 x float> %1, float %a1, i32 1 783 %3 = bitcast <2 x float> %2 to x86_mmx 784 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) 785 store x86_mmx %4, x86_mmx *%p0 786 ret void 787 } 788 789 define void @build_v2f32_00(x86_mmx *%p0, float %a0, float %a1) nounwind { 790 ; X86-MMX-LABEL: build_v2f32_00: 791 ; X86-MMX: # %bb.0: 792 ; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax 793 ; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0 794 ; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0] 795 ; X86-MMX-NEXT: paddd %mm0, %mm0 796 ; X86-MMX-NEXT: movq %mm0, (%eax) 797 ; X86-MMX-NEXT: retl 798 ; 799 ; X86-SSE-LABEL: build_v2f32_00: 800 ; X86-SSE: # %bb.0: 801 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 802 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 803 ; X86-SSE-NEXT: movdq2q %xmm0, %mm0 804 ; X86-SSE-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1] 805 ; X86-SSE-NEXT: paddd %mm0, %mm0 806 ; X86-SSE-NEXT: movq %mm0, (%eax) 807 ; X86-SSE-NEXT: retl 808 ; 809 ; X64-LABEL: build_v2f32_00: 810 ; X64: # %bb.0: 811 ; X64-NEXT: movdq2q %xmm0, %mm0 812 ; X64-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1] 813 ; X64-NEXT: paddd %mm0, %mm0 814 ; X64-NEXT: movq %mm0, (%rdi) 815 ; X64-NEXT: retq 816 %1 = insertelement <2 x float> undef, float %a0, i32 0 817 %2 = insertelement <2 x float> %1, float %a0, i32 1 818 %3 = bitcast <2 x float> %2 to x86_mmx 819 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) 820 store x86_mmx %4, x86_mmx *%p0 821 ret void 822 } 823