Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown   -mattr=+mmx          | FileCheck %s --check-prefixes=X86,X86-MMX
      3 ; RUN: llc < %s -mtriple=i686-unknown-unknown   -mattr=+mmx,+sse2    | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE2
      4 ; RUN: llc < %s -mtriple=i686-unknown-unknown   -mattr=+mmx,+ssse3   | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSSE3
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2    | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE2
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+ssse3   | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSSE3
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx     | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx2    | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
      9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx512f | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512
     10 
     11 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
     12 
     13 ;
     14 ; v2i32
     15 ;
     16 
     17 define void @build_v2i32_01(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind {
     18 ; X86-LABEL: build_v2i32_01:
     19 ; X86:       # %bb.0:
     20 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
     21 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
     22 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm1
     23 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
     24 ; X86-NEXT:    paddd %mm1, %mm1
     25 ; X86-NEXT:    movq %mm1, (%eax)
     26 ; X86-NEXT:    retl
     27 ;
     28 ; X64-LABEL: build_v2i32_01:
     29 ; X64:       # %bb.0:
     30 ; X64-NEXT:    movd %edx, %mm0
     31 ; X64-NEXT:    movd %esi, %mm1
     32 ; X64-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
     33 ; X64-NEXT:    paddd %mm1, %mm1
     34 ; X64-NEXT:    movq %mm1, (%rdi)
     35 ; X64-NEXT:    retq
     36   %1 = insertelement <2 x i32> undef, i32 %a0, i32 0
     37   %2 = insertelement <2 x i32>    %1, i32 %a1, i32 1
     38   %3 = bitcast <2 x i32> %2 to x86_mmx
     39   %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
     40   store x86_mmx %4, x86_mmx *%p0
     41   ret void
     42 }
     43 
     44 define void @build_v2i32_0z(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind {
     45 ; X86-LABEL: build_v2i32_0z:
     46 ; X86:       # %bb.0:
     47 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
     48 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
     49 ; X86-NEXT:    paddd %mm0, %mm0
     50 ; X86-NEXT:    movq %mm0, (%eax)
     51 ; X86-NEXT:    retl
     52 ;
     53 ; X64-LABEL: build_v2i32_0z:
     54 ; X64:       # %bb.0:
     55 ; X64-NEXT:    movd %esi, %mm0
     56 ; X64-NEXT:    paddd %mm0, %mm0
     57 ; X64-NEXT:    movq %mm0, (%rdi)
     58 ; X64-NEXT:    retq
     59   %1 = insertelement <2 x i32> undef, i32 %a0, i32 0
     60   %2 = insertelement <2 x i32>    %1, i32   0, i32 1
     61   %3 = bitcast <2 x i32> %2 to x86_mmx
     62   %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
     63   store x86_mmx %4, x86_mmx *%p0
     64   ret void
     65 }
     66 
     67 define void @build_v2i32_u1(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind {
     68 ; X86-MMX-LABEL: build_v2i32_u1:
     69 ; X86-MMX:       # %bb.0:
     70 ; X86-MMX-NEXT:    movl {{[0-9]+}}(%esp), %eax
     71 ; X86-MMX-NEXT:    movd {{[0-9]+}}(%esp), %mm0
     72 ; X86-MMX-NEXT:    punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
     73 ; X86-MMX-NEXT:    paddd %mm0, %mm0
     74 ; X86-MMX-NEXT:    movq %mm0, (%eax)
     75 ; X86-MMX-NEXT:    retl
     76 ;
     77 ; X86-SSE-LABEL: build_v2i32_u1:
     78 ; X86-SSE:       # %bb.0:
     79 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
     80 ; X86-SSE-NEXT:    movd {{[0-9]+}}(%esp), %mm0
     81 ; X86-SSE-NEXT:    pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
     82 ; X86-SSE-NEXT:    paddd %mm0, %mm0
     83 ; X86-SSE-NEXT:    movq %mm0, (%eax)
     84 ; X86-SSE-NEXT:    retl
     85 ;
     86 ; X64-LABEL: build_v2i32_u1:
     87 ; X64:       # %bb.0:
     88 ; X64-NEXT:    movd %edx, %mm0
     89 ; X64-NEXT:    pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
     90 ; X64-NEXT:    paddd %mm0, %mm0
     91 ; X64-NEXT:    movq %mm0, (%rdi)
     92 ; X64-NEXT:    retq
     93   %1 = insertelement <2 x i32> undef, i32 undef, i32 0
     94   %2 = insertelement <2 x i32>    %1, i32   %a1, i32 1
     95   %3 = bitcast <2 x i32> %2 to x86_mmx
     96   %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
     97   store x86_mmx %4, x86_mmx *%p0
     98   ret void
     99 }
    100 
    101 define void @build_v2i32_z1(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind {
    102 ; X86-LABEL: build_v2i32_z1:
    103 ; X86:       # %bb.0:
    104 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    105 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    106 ; X86-NEXT:    pxor %mm1, %mm1
    107 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    108 ; X86-NEXT:    paddd %mm1, %mm1
    109 ; X86-NEXT:    movq %mm1, (%eax)
    110 ; X86-NEXT:    retl
    111 ;
    112 ; X64-LABEL: build_v2i32_z1:
    113 ; X64:       # %bb.0:
    114 ; X64-NEXT:    movd %edx, %mm0
    115 ; X64-NEXT:    pxor %mm1, %mm1
    116 ; X64-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    117 ; X64-NEXT:    paddd %mm1, %mm1
    118 ; X64-NEXT:    movq %mm1, (%rdi)
    119 ; X64-NEXT:    retq
    120   %1 = insertelement <2 x i32> undef, i32   0, i32 0
    121   %2 = insertelement <2 x i32>    %1, i32 %a1, i32 1
    122   %3 = bitcast <2 x i32> %2 to x86_mmx
    123   %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
    124   store x86_mmx %4, x86_mmx *%p0
    125   ret void
    126 }
    127 
    128 define void @build_v2i32_00(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind {
    129 ; X86-MMX-LABEL: build_v2i32_00:
    130 ; X86-MMX:       # %bb.0:
    131 ; X86-MMX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    132 ; X86-MMX-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    133 ; X86-MMX-NEXT:    punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
    134 ; X86-MMX-NEXT:    paddd %mm0, %mm0
    135 ; X86-MMX-NEXT:    movq %mm0, (%eax)
    136 ; X86-MMX-NEXT:    retl
    137 ;
    138 ; X86-SSE-LABEL: build_v2i32_00:
    139 ; X86-SSE:       # %bb.0:
    140 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
    141 ; X86-SSE-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    142 ; X86-SSE-NEXT:    pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
    143 ; X86-SSE-NEXT:    paddd %mm0, %mm0
    144 ; X86-SSE-NEXT:    movq %mm0, (%eax)
    145 ; X86-SSE-NEXT:    retl
    146 ;
    147 ; X64-LABEL: build_v2i32_00:
    148 ; X64:       # %bb.0:
    149 ; X64-NEXT:    movd %esi, %mm0
    150 ; X64-NEXT:    pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
    151 ; X64-NEXT:    paddd %mm0, %mm0
    152 ; X64-NEXT:    movq %mm0, (%rdi)
    153 ; X64-NEXT:    retq
    154   %1 = insertelement <2 x i32> undef, i32 %a0, i32 0
    155   %2 = insertelement <2 x i32>    %1, i32 %a0, i32 1
    156   %3 = bitcast <2 x i32> %2 to x86_mmx
    157   %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
    158   store x86_mmx %4, x86_mmx *%p0
    159   ret void
    160 }
    161 
    162 ;
    163 ; v4i16
    164 ;
    165 
    166 define void @build_v4i16_0123(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
    167 ; X86-LABEL: build_v4i16_0123:
    168 ; X86:       # %bb.0:
    169 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    170 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    171 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm1
    172 ; X86-NEXT:    punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1]
    173 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    174 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm2
    175 ; X86-NEXT:    punpcklwd %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1]
    176 ; X86-NEXT:    punpckldq %mm1, %mm2 # mm2 = mm2[0],mm1[0]
    177 ; X86-NEXT:    paddd %mm2, %mm2
    178 ; X86-NEXT:    movq %mm2, (%eax)
    179 ; X86-NEXT:    retl
    180 ;
    181 ; X64-LABEL: build_v4i16_0123:
    182 ; X64:       # %bb.0:
    183 ; X64-NEXT:    movd %r8d, %mm0
    184 ; X64-NEXT:    movd %ecx, %mm1
    185 ; X64-NEXT:    punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1]
    186 ; X64-NEXT:    movd %edx, %mm0
    187 ; X64-NEXT:    movd %esi, %mm2
    188 ; X64-NEXT:    punpcklwd %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1]
    189 ; X64-NEXT:    punpckldq %mm1, %mm2 # mm2 = mm2[0],mm1[0]
    190 ; X64-NEXT:    paddd %mm2, %mm2
    191 ; X64-NEXT:    movq %mm2, (%rdi)
    192 ; X64-NEXT:    retq
    193   %1 = insertelement <4 x i16> undef, i16 %a0, i32 0
    194   %2 = insertelement <4 x i16>    %1, i16 %a1, i32 1
    195   %3 = insertelement <4 x i16>    %2, i16 %a2, i32 2
    196   %4 = insertelement <4 x i16>    %3, i16 %a3, i32 3
    197   %5 = bitcast <4 x i16> %4 to x86_mmx
    198   %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
    199   store x86_mmx %6, x86_mmx *%p0
    200   ret void
    201 }
    202 
    203 define void @build_v4i16_01zz(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
    204 ; X86-LABEL: build_v4i16_01zz:
    205 ; X86:       # %bb.0:
    206 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    207 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    208 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm1
    209 ; X86-NEXT:    punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1]
    210 ; X86-NEXT:    pxor %mm0, %mm0
    211 ; X86-NEXT:    punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
    212 ; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    213 ; X86-NEXT:    paddd %mm1, %mm1
    214 ; X86-NEXT:    movq %mm1, (%eax)
    215 ; X86-NEXT:    retl
    216 ;
    217 ; X64-LABEL: build_v4i16_01zz:
    218 ; X64:       # %bb.0:
    219 ; X64-NEXT:    movd %edx, %mm0
    220 ; X64-NEXT:    movd %esi, %mm1
    221 ; X64-NEXT:    punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1]
    222 ; X64-NEXT:    pxor %mm0, %mm0
    223 ; X64-NEXT:    punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
    224 ; X64-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    225 ; X64-NEXT:    paddd %mm1, %mm1
    226 ; X64-NEXT:    movq %mm1, (%rdi)
    227 ; X64-NEXT:    retq
    228   %1 = insertelement <4 x i16> undef, i16 %a0, i32 0
    229   %2 = insertelement <4 x i16>    %1, i16 %a1, i32 1
    230   %3 = insertelement <4 x i16>    %2, i16   0, i32 2
    231   %4 = insertelement <4 x i16>    %3, i16   0, i32 3
    232   %5 = bitcast <4 x i16> %4 to x86_mmx
    233   %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
    234   store x86_mmx %6, x86_mmx *%p0
    235   ret void
    236 }
    237 
    238 define void @build_v4i16_0uuz(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
    239 ; X86-LABEL: build_v4i16_0uuz:
    240 ; X86:       # %bb.0:
    241 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    242 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    243 ; X86-NEXT:    paddd %mm0, %mm0
    244 ; X86-NEXT:    movq %mm0, (%eax)
    245 ; X86-NEXT:    retl
    246 ;
    247 ; X64-LABEL: build_v4i16_0uuz:
    248 ; X64:       # %bb.0:
    249 ; X64-NEXT:    movd %esi, %mm0
    250 ; X64-NEXT:    paddd %mm0, %mm0
    251 ; X64-NEXT:    movq %mm0, (%rdi)
    252 ; X64-NEXT:    retq
    253   %1 = insertelement <4 x i16> undef, i16   %a0, i32 0
    254   %2 = insertelement <4 x i16>    %1, i16 undef, i32 1
    255   %3 = insertelement <4 x i16>    %2, i16 undef, i32 2
    256   %4 = insertelement <4 x i16>    %3, i16     0, i32 3
    257   %5 = bitcast <4 x i16> %4 to x86_mmx
    258   %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
    259   store x86_mmx %6, x86_mmx *%p0
    260   ret void
    261 }
    262 
    263 define void @build_v4i16_0zuz(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
    264 ; X86-LABEL: build_v4i16_0zuz:
    265 ; X86:       # %bb.0:
    266 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    267 ; X86-NEXT:    movd %eax, %mm0
    268 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    269 ; X86-NEXT:    paddd %mm0, %mm0
    270 ; X86-NEXT:    movq %mm0, (%eax)
    271 ; X86-NEXT:    retl
    272 ;
    273 ; X64-LABEL: build_v4i16_0zuz:
    274 ; X64:       # %bb.0:
    275 ; X64-NEXT:    movzwl %si, %eax
    276 ; X64-NEXT:    movd %eax, %mm0
    277 ; X64-NEXT:    paddd %mm0, %mm0
    278 ; X64-NEXT:    movq %mm0, (%rdi)
    279 ; X64-NEXT:    retq
    280   %1 = insertelement <4 x i16> undef, i16   %a0, i32 0
    281   %2 = insertelement <4 x i16>    %1, i16     0, i32 1
    282   %3 = insertelement <4 x i16>    %2, i16 undef, i32 2
    283   %4 = insertelement <4 x i16>    %3, i16     0, i32 3
    284   %5 = bitcast <4 x i16> %4 to x86_mmx
    285   %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
    286   store x86_mmx %6, x86_mmx *%p0
    287   ret void
    288 }
    289 
    290 define void @build_v4i16_012u(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
    291 ; X86-LABEL: build_v4i16_012u:
    292 ; X86:       # %bb.0:
    293 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    294 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    295 ; X86-NEXT:    punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
    296 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm1
    297 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm2
    298 ; X86-NEXT:    punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
    299 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
    300 ; X86-NEXT:    paddd %mm2, %mm2
    301 ; X86-NEXT:    movq %mm2, (%eax)
    302 ; X86-NEXT:    retl
    303 ;
    304 ; X64-LABEL: build_v4i16_012u:
    305 ; X64:       # %bb.0:
    306 ; X64-NEXT:    movd %ecx, %mm0
    307 ; X64-NEXT:    punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
    308 ; X64-NEXT:    movd %edx, %mm1
    309 ; X64-NEXT:    movd %esi, %mm2
    310 ; X64-NEXT:    punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
    311 ; X64-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
    312 ; X64-NEXT:    paddd %mm2, %mm2
    313 ; X64-NEXT:    movq %mm2, (%rdi)
    314 ; X64-NEXT:    retq
    315   %1 = insertelement <4 x i16> undef, i16   %a0, i32 0
    316   %2 = insertelement <4 x i16>    %1, i16   %a1, i32 1
    317   %3 = insertelement <4 x i16>    %2, i16   %a2, i32 2
    318   %4 = insertelement <4 x i16>    %3, i16 undef, i32 3
    319   %5 = bitcast <4 x i16> %4 to x86_mmx
    320   %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
    321   store x86_mmx %6, x86_mmx *%p0
    322   ret void
    323 }
    324 
    325 define void @build_v4i16_0u00(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
    326 ; X86-MMX-LABEL: build_v4i16_0u00:
    327 ; X86-MMX:       # %bb.0:
    328 ; X86-MMX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    329 ; X86-MMX-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    330 ; X86-MMX-NEXT:    punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
    331 ; X86-MMX-NEXT:    punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
    332 ; X86-MMX-NEXT:    paddd %mm0, %mm0
    333 ; X86-MMX-NEXT:    movq %mm0, (%eax)
    334 ; X86-MMX-NEXT:    retl
    335 ;
    336 ; X86-SSE-LABEL: build_v4i16_0u00:
    337 ; X86-SSE:       # %bb.0:
    338 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
    339 ; X86-SSE-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    340 ; X86-SSE-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0]
    341 ; X86-SSE-NEXT:    paddd %mm0, %mm0
    342 ; X86-SSE-NEXT:    movq %mm0, (%eax)
    343 ; X86-SSE-NEXT:    retl
    344 ;
    345 ; X64-LABEL: build_v4i16_0u00:
    346 ; X64:       # %bb.0:
    347 ; X64-NEXT:    movd %esi, %mm0
    348 ; X64-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0]
    349 ; X64-NEXT:    paddd %mm0, %mm0
    350 ; X64-NEXT:    movq %mm0, (%rdi)
    351 ; X64-NEXT:    retq
    352   %1 = insertelement <4 x i16> undef, i16   %a0, i32 0
    353   %2 = insertelement <4 x i16>    %1, i16 undef, i32 1
    354   %3 = insertelement <4 x i16>    %2, i16   %a0, i32 2
    355   %4 = insertelement <4 x i16>    %3, i16   %a0, i32 3
    356   %5 = bitcast <4 x i16> %4 to x86_mmx
    357   %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
    358   store x86_mmx %6, x86_mmx *%p0
    359   ret void
    360 }
    361 
    362 ;
    363 ; v8i8
    364 ;
    365 
    366 define void @build_v8i8_01234567(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
    367 ; X86-LABEL: build_v8i8_01234567:
    368 ; X86:       # %bb.0:
    369 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    370 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    371 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm1
    372 ; X86-NEXT:    punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
    373 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    374 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm2
    375 ; X86-NEXT:    punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3]
    376 ; X86-NEXT:    punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
    377 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    378 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm1
    379 ; X86-NEXT:    punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
    380 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    381 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm3
    382 ; X86-NEXT:    punpcklbw %mm0, %mm3 # mm3 = mm3[0],mm0[0],mm3[1],mm0[1],mm3[2],mm0[2],mm3[3],mm0[3]
    383 ; X86-NEXT:    punpcklwd %mm1, %mm3 # mm3 = mm3[0],mm1[0],mm3[1],mm1[1]
    384 ; X86-NEXT:    punpckldq %mm2, %mm3 # mm3 = mm3[0],mm2[0]
    385 ; X86-NEXT:    paddd %mm3, %mm3
    386 ; X86-NEXT:    movq %mm3, (%eax)
    387 ; X86-NEXT:    retl
    388 ;
    389 ; X64-LABEL: build_v8i8_01234567:
    390 ; X64:       # %bb.0:
    391 ; X64-NEXT:    movd {{[0-9]+}}(%rsp), %mm0
    392 ; X64-NEXT:    movd {{[0-9]+}}(%rsp), %mm1
    393 ; X64-NEXT:    punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
    394 ; X64-NEXT:    movd %r9d, %mm0
    395 ; X64-NEXT:    movd {{[0-9]+}}(%rsp), %mm2
    396 ; X64-NEXT:    punpcklbw %mm2, %mm0 # mm0 = mm0[0],mm2[0],mm0[1],mm2[1],mm0[2],mm2[2],mm0[3],mm2[3]
    397 ; X64-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
    398 ; X64-NEXT:    movd %r8d, %mm1
    399 ; X64-NEXT:    movd %ecx, %mm2
    400 ; X64-NEXT:    punpcklbw %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1],mm2[2],mm1[2],mm2[3],mm1[3]
    401 ; X64-NEXT:    movd %edx, %mm1
    402 ; X64-NEXT:    movd %esi, %mm3
    403 ; X64-NEXT:    punpcklbw %mm1, %mm3 # mm3 = mm3[0],mm1[0],mm3[1],mm1[1],mm3[2],mm1[2],mm3[3],mm1[3]
    404 ; X64-NEXT:    punpcklwd %mm2, %mm3 # mm3 = mm3[0],mm2[0],mm3[1],mm2[1]
    405 ; X64-NEXT:    punpckldq %mm0, %mm3 # mm3 = mm3[0],mm0[0]
    406 ; X64-NEXT:    paddd %mm3, %mm3
    407 ; X64-NEXT:    movq %mm3, (%rdi)
    408 ; X64-NEXT:    retq
    409   %1  = insertelement <8 x i8> undef, i8 %a0, i32 0
    410   %2  = insertelement <8 x i8>    %1, i8 %a1, i32 1
    411   %3  = insertelement <8 x i8>    %2, i8 %a2, i32 2
    412   %4  = insertelement <8 x i8>    %3, i8 %a3, i32 3
    413   %5  = insertelement <8 x i8>    %4, i8 %a4, i32 4
    414   %6  = insertelement <8 x i8>    %5, i8 %a5, i32 5
    415   %7  = insertelement <8 x i8>    %6, i8 %a6, i32 6
    416   %8  = insertelement <8 x i8>    %7, i8 %a7, i32 7
    417   %9  = bitcast <8 x i8> %8 to x86_mmx
    418   %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
    419   store x86_mmx %10, x86_mmx *%p0
    420   ret void
    421 }
    422 
    423 define void @build_v8i8_0u2345z7(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
    424 ; X86-LABEL: build_v8i8_0u2345z7:
    425 ; X86:       # %bb.0:
    426 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    427 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    428 ; X86-NEXT:    pxor %mm1, %mm1
    429 ; X86-NEXT:    punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
    430 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    431 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm2
    432 ; X86-NEXT:    punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3]
    433 ; X86-NEXT:    punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
    434 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    435 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm1
    436 ; X86-NEXT:    punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
    437 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    438 ; X86-NEXT:    punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
    439 ; X86-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
    440 ; X86-NEXT:    punpckldq %mm2, %mm0 # mm0 = mm0[0],mm2[0]
    441 ; X86-NEXT:    paddd %mm0, %mm0
    442 ; X86-NEXT:    movq %mm0, (%eax)
    443 ; X86-NEXT:    retl
    444 ;
    445 ; X64-LABEL: build_v8i8_0u2345z7:
    446 ; X64:       # %bb.0:
    447 ; X64-NEXT:    movd {{[0-9]+}}(%rsp), %mm0
    448 ; X64-NEXT:    pxor %mm1, %mm1
    449 ; X64-NEXT:    punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
    450 ; X64-NEXT:    movd %r9d, %mm0
    451 ; X64-NEXT:    movd {{[0-9]+}}(%rsp), %mm2
    452 ; X64-NEXT:    punpcklbw %mm2, %mm0 # mm0 = mm0[0],mm2[0],mm0[1],mm2[1],mm0[2],mm2[2],mm0[3],mm2[3]
    453 ; X64-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
    454 ; X64-NEXT:    movd %r8d, %mm1
    455 ; X64-NEXT:    movd %ecx, %mm2
    456 ; X64-NEXT:    punpcklbw %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1],mm2[2],mm1[2],mm2[3],mm1[3]
    457 ; X64-NEXT:    movd %esi, %mm1
    458 ; X64-NEXT:    punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
    459 ; X64-NEXT:    punpcklwd %mm2, %mm1 # mm1 = mm1[0],mm2[0],mm1[1],mm2[1]
    460 ; X64-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    461 ; X64-NEXT:    paddd %mm1, %mm1
    462 ; X64-NEXT:    movq %mm1, (%rdi)
    463 ; X64-NEXT:    retq
    464   %1  = insertelement <8 x i8> undef, i8   %a0, i32 0
    465   %2  = insertelement <8 x i8>    %1, i8 undef, i32 1
    466   %3  = insertelement <8 x i8>    %2, i8   %a2, i32 2
    467   %4  = insertelement <8 x i8>    %3, i8   %a3, i32 3
    468   %5  = insertelement <8 x i8>    %4, i8   %a4, i32 4
    469   %6  = insertelement <8 x i8>    %5, i8   %a5, i32 5
    470   %7  = insertelement <8 x i8>    %6, i8    0,  i32 6
    471   %8  = insertelement <8 x i8>    %7, i8   %a7, i32 7
    472   %9  = bitcast <8 x i8> %8 to x86_mmx
    473   %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
    474   store x86_mmx %10, x86_mmx *%p0
    475   ret void
    476 }
    477 
    478 define void @build_v8i8_0123zzzu(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
    479 ; X86-LABEL: build_v8i8_0123zzzu:
    480 ; X86:       # %bb.0:
    481 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    482 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    483 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm1
    484 ; X86-NEXT:    punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
    485 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    486 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm2
    487 ; X86-NEXT:    punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3]
    488 ; X86-NEXT:    punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
    489 ; X86-NEXT:    pxor %mm0, %mm0
    490 ; X86-NEXT:    pxor %mm1, %mm1
    491 ; X86-NEXT:    punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
    492 ; X86-NEXT:    punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
    493 ; X86-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
    494 ; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
    495 ; X86-NEXT:    paddd %mm2, %mm2
    496 ; X86-NEXT:    movq %mm2, (%eax)
    497 ; X86-NEXT:    retl
    498 ;
    499 ; X64-LABEL: build_v8i8_0123zzzu:
    500 ; X64:       # %bb.0:
    501 ; X64-NEXT:    movd %r8d, %mm0
    502 ; X64-NEXT:    movd %ecx, %mm1
    503 ; X64-NEXT:    punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
    504 ; X64-NEXT:    movd %edx, %mm0
    505 ; X64-NEXT:    movd %esi, %mm2
    506 ; X64-NEXT:    punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3]
    507 ; X64-NEXT:    punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
    508 ; X64-NEXT:    pxor %mm0, %mm0
    509 ; X64-NEXT:    pxor %mm1, %mm1
    510 ; X64-NEXT:    punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
    511 ; X64-NEXT:    punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
    512 ; X64-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
    513 ; X64-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
    514 ; X64-NEXT:    paddd %mm2, %mm2
    515 ; X64-NEXT:    movq %mm2, (%rdi)
    516 ; X64-NEXT:    retq
    517   %1  = insertelement <8 x i8> undef, i8   %a0, i32 0
    518   %2  = insertelement <8 x i8>    %1, i8   %a1, i32 1
    519   %3  = insertelement <8 x i8>    %2, i8   %a2, i32 2
    520   %4  = insertelement <8 x i8>    %3, i8   %a3, i32 3
    521   %5  = insertelement <8 x i8>    %4, i8     0, i32 4
    522   %6  = insertelement <8 x i8>    %5, i8     0, i32 5
    523   %7  = insertelement <8 x i8>    %6, i8     0, i32 6
    524   %8  = insertelement <8 x i8>    %7, i8 undef, i32 7
    525   %9  = bitcast <8 x i8> %8 to x86_mmx
    526   %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
    527   store x86_mmx %10, x86_mmx *%p0
    528   ret void
    529 }
    530 
    531 define void @build_v8i8_0uuuuzzz(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
    532 ; X86-LABEL: build_v8i8_0uuuuzzz:
    533 ; X86:       # %bb.0:
    534 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    535 ; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    536 ; X86-NEXT:    paddd %mm0, %mm0
    537 ; X86-NEXT:    movq %mm0, (%eax)
    538 ; X86-NEXT:    retl
    539 ;
    540 ; X64-LABEL: build_v8i8_0uuuuzzz:
    541 ; X64:       # %bb.0:
    542 ; X64-NEXT:    movd %esi, %mm0
    543 ; X64-NEXT:    paddd %mm0, %mm0
    544 ; X64-NEXT:    movq %mm0, (%rdi)
    545 ; X64-NEXT:    retq
    546   %1  = insertelement <8 x i8> undef, i8   %a0, i32 0
    547   %2  = insertelement <8 x i8>    %1, i8 undef, i32 1
    548   %3  = insertelement <8 x i8>    %2, i8 undef, i32 2
    549   %4  = insertelement <8 x i8>    %3, i8 undef, i32 3
    550   %5  = insertelement <8 x i8>    %4, i8 undef, i32 4
    551   %6  = insertelement <8 x i8>    %5, i8     0, i32 5
    552   %7  = insertelement <8 x i8>    %6, i8     0, i32 6
    553   %8  = insertelement <8 x i8>    %7, i8     0, i32 7
    554   %9  = bitcast <8 x i8> %8 to x86_mmx
    555   %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
    556   store x86_mmx %10, x86_mmx *%p0
    557   ret void
    558 }
    559 
    560 define void @build_v8i8_0zzzzzzu(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
    561 ; X86-LABEL: build_v8i8_0zzzzzzu:
    562 ; X86:       # %bb.0:
    563 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
    564 ; X86-NEXT:    movd %eax, %mm0
    565 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    566 ; X86-NEXT:    paddd %mm0, %mm0
    567 ; X86-NEXT:    movq %mm0, (%eax)
    568 ; X86-NEXT:    retl
    569 ;
    570 ; X64-LABEL: build_v8i8_0zzzzzzu:
    571 ; X64:       # %bb.0:
    572 ; X64-NEXT:    movzbl %sil, %eax
    573 ; X64-NEXT:    movd %eax, %mm0
    574 ; X64-NEXT:    paddd %mm0, %mm0
    575 ; X64-NEXT:    movq %mm0, (%rdi)
    576 ; X64-NEXT:    retq
    577   %1  = insertelement <8 x i8> undef, i8   %a0, i32 0
    578   %2  = insertelement <8 x i8>    %1, i8     0, i32 1
    579   %3  = insertelement <8 x i8>    %2, i8     0, i32 2
    580   %4  = insertelement <8 x i8>    %3, i8     0, i32 3
    581   %5  = insertelement <8 x i8>    %4, i8     0, i32 4
    582   %6  = insertelement <8 x i8>    %5, i8     0, i32 5
    583   %7  = insertelement <8 x i8>    %6, i8     0, i32 6
    584   %8  = insertelement <8 x i8>    %7, i8 undef, i32 7
    585   %9  = bitcast <8 x i8> %8 to x86_mmx
    586   %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
    587   store x86_mmx %10, x86_mmx *%p0
    588   ret void
    589 }
    590 
    591 define void @build_v8i8_00000000(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
    592 ; X86-MMX-LABEL: build_v8i8_00000000:
    593 ; X86-MMX:       # %bb.0:
    594 ; X86-MMX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    595 ; X86-MMX-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    596 ; X86-MMX-NEXT:    punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
    597 ; X86-MMX-NEXT:    punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
    598 ; X86-MMX-NEXT:    punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
    599 ; X86-MMX-NEXT:    paddd %mm0, %mm0
    600 ; X86-MMX-NEXT:    movq %mm0, (%eax)
    601 ; X86-MMX-NEXT:    retl
    602 ;
    603 ; X86-SSE-LABEL: build_v8i8_00000000:
    604 ; X86-SSE:       # %bb.0:
    605 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
    606 ; X86-SSE-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    607 ; X86-SSE-NEXT:    punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
    608 ; X86-SSE-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0]
    609 ; X86-SSE-NEXT:    paddd %mm0, %mm0
    610 ; X86-SSE-NEXT:    movq %mm0, (%eax)
    611 ; X86-SSE-NEXT:    retl
    612 ;
    613 ; X64-LABEL: build_v8i8_00000000:
    614 ; X64:       # %bb.0:
    615 ; X64-NEXT:    movd %esi, %mm0
    616 ; X64-NEXT:    punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
    617 ; X64-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0]
    618 ; X64-NEXT:    paddd %mm0, %mm0
    619 ; X64-NEXT:    movq %mm0, (%rdi)
    620 ; X64-NEXT:    retq
    621   %1  = insertelement <8 x i8> undef, i8 %a0, i32 0
    622   %2  = insertelement <8 x i8>    %1, i8 %a0, i32 1
    623   %3  = insertelement <8 x i8>    %2, i8 %a0, i32 2
    624   %4  = insertelement <8 x i8>    %3, i8 %a0, i32 3
    625   %5  = insertelement <8 x i8>    %4, i8 %a0, i32 4
    626   %6  = insertelement <8 x i8>    %5, i8 %a0, i32 5
    627   %7  = insertelement <8 x i8>    %6, i8 %a0, i32 6
    628   %8  = insertelement <8 x i8>    %7, i8 %a0, i32 7
    629   %9  = bitcast <8 x i8> %8 to x86_mmx
    630   %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
    631   store x86_mmx %10, x86_mmx *%p0
    632   ret void
    633 }
    634 
    635 ;
    636 ; v2f32
    637 ;
    638 
    639 define void @build_v2f32_01(x86_mmx *%p0, float %a0, float %a1) nounwind {
    640 ; X86-MMX-LABEL: build_v2f32_01:
    641 ; X86-MMX:       # %bb.0:
    642 ; X86-MMX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    643 ; X86-MMX-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    644 ; X86-MMX-NEXT:    movd {{[0-9]+}}(%esp), %mm1
    645 ; X86-MMX-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    646 ; X86-MMX-NEXT:    paddd %mm1, %mm1
    647 ; X86-MMX-NEXT:    movq %mm1, (%eax)
    648 ; X86-MMX-NEXT:    retl
    649 ;
    650 ; X86-SSE-LABEL: build_v2f32_01:
    651 ; X86-SSE:       # %bb.0:
    652 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
    653 ; X86-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    654 ; X86-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    655 ; X86-SSE-NEXT:    movdq2q %xmm1, %mm0
    656 ; X86-SSE-NEXT:    movdq2q %xmm0, %mm1
    657 ; X86-SSE-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    658 ; X86-SSE-NEXT:    paddd %mm1, %mm1
    659 ; X86-SSE-NEXT:    movq %mm1, (%eax)
    660 ; X86-SSE-NEXT:    retl
    661 ;
    662 ; X64-LABEL: build_v2f32_01:
    663 ; X64:       # %bb.0:
    664 ; X64-NEXT:    movdq2q %xmm1, %mm0
    665 ; X64-NEXT:    movdq2q %xmm0, %mm1
    666 ; X64-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    667 ; X64-NEXT:    paddd %mm1, %mm1
    668 ; X64-NEXT:    movq %mm1, (%rdi)
    669 ; X64-NEXT:    retq
    670   %1 = insertelement <2 x float> undef, float %a0, i32 0
    671   %2 = insertelement <2 x float>    %1, float %a1, i32 1
    672   %3 = bitcast <2 x float> %2 to x86_mmx
    673   %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
    674   store x86_mmx %4, x86_mmx *%p0
    675   ret void
    676 }
    677 
    678 define void @build_v2f32_0z(x86_mmx *%p0, float %a0, float %a1) nounwind {
    679 ; X86-MMX-LABEL: build_v2f32_0z:
    680 ; X86-MMX:       # %bb.0:
    681 ; X86-MMX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    682 ; X86-MMX-NEXT:    pxor %mm0, %mm0
    683 ; X86-MMX-NEXT:    movd {{[0-9]+}}(%esp), %mm1
    684 ; X86-MMX-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    685 ; X86-MMX-NEXT:    paddd %mm1, %mm1
    686 ; X86-MMX-NEXT:    movq %mm1, (%eax)
    687 ; X86-MMX-NEXT:    retl
    688 ;
    689 ; X86-SSE-LABEL: build_v2f32_0z:
    690 ; X86-SSE:       # %bb.0:
    691 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
    692 ; X86-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    693 ; X86-SSE-NEXT:    movdq2q %xmm0, %mm0
    694 ; X86-SSE-NEXT:    pxor %mm1, %mm1
    695 ; X86-SSE-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0]
    696 ; X86-SSE-NEXT:    paddd %mm0, %mm0
    697 ; X86-SSE-NEXT:    movq %mm0, (%eax)
    698 ; X86-SSE-NEXT:    retl
    699 ;
    700 ; X64-LABEL: build_v2f32_0z:
    701 ; X64:       # %bb.0:
    702 ; X64-NEXT:    movdq2q %xmm0, %mm0
    703 ; X64-NEXT:    pxor %mm1, %mm1
    704 ; X64-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0]
    705 ; X64-NEXT:    paddd %mm0, %mm0
    706 ; X64-NEXT:    movq %mm0, (%rdi)
    707 ; X64-NEXT:    retq
    708   %1 = insertelement <2 x float> undef, float %a0, i32 0
    709   %2 = insertelement <2 x float>    %1, float 0.0, i32 1
    710   %3 = bitcast <2 x float> %2 to x86_mmx
    711   %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
    712   store x86_mmx %4, x86_mmx *%p0
    713   ret void
    714 }
    715 
    716 define void @build_v2f32_u1(x86_mmx *%p0, float %a0, float %a1) nounwind {
    717 ; X86-MMX-LABEL: build_v2f32_u1:
    718 ; X86-MMX:       # %bb.0:
    719 ; X86-MMX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    720 ; X86-MMX-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    721 ; X86-MMX-NEXT:    punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
    722 ; X86-MMX-NEXT:    paddd %mm0, %mm0
    723 ; X86-MMX-NEXT:    movq %mm0, (%eax)
    724 ; X86-MMX-NEXT:    retl
    725 ;
    726 ; X86-SSE-LABEL: build_v2f32_u1:
    727 ; X86-SSE:       # %bb.0:
    728 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
    729 ; X86-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    730 ; X86-SSE-NEXT:    movdq2q %xmm0, %mm0
    731 ; X86-SSE-NEXT:    pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
    732 ; X86-SSE-NEXT:    paddd %mm0, %mm0
    733 ; X86-SSE-NEXT:    movq %mm0, (%eax)
    734 ; X86-SSE-NEXT:    retl
    735 ;
    736 ; X64-LABEL: build_v2f32_u1:
    737 ; X64:       # %bb.0:
    738 ; X64-NEXT:    movdq2q %xmm1, %mm0
    739 ; X64-NEXT:    pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
    740 ; X64-NEXT:    paddd %mm0, %mm0
    741 ; X64-NEXT:    movq %mm0, (%rdi)
    742 ; X64-NEXT:    retq
    743   %1 = insertelement <2 x float> undef, float undef, i32 0
    744   %2 = insertelement <2 x float>    %1, float   %a1, i32 1
    745   %3 = bitcast <2 x float> %2 to x86_mmx
    746   %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
    747   store x86_mmx %4, x86_mmx *%p0
    748   ret void
    749 }
    750 
    751 define void @build_v2f32_z1(x86_mmx *%p0, float %a0, float %a1) nounwind {
    752 ; X86-MMX-LABEL: build_v2f32_z1:
    753 ; X86-MMX:       # %bb.0:
    754 ; X86-MMX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    755 ; X86-MMX-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    756 ; X86-MMX-NEXT:    pxor %mm1, %mm1
    757 ; X86-MMX-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    758 ; X86-MMX-NEXT:    paddd %mm1, %mm1
    759 ; X86-MMX-NEXT:    movq %mm1, (%eax)
    760 ; X86-MMX-NEXT:    retl
    761 ;
    762 ; X86-SSE-LABEL: build_v2f32_z1:
    763 ; X86-SSE:       # %bb.0:
    764 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
    765 ; X86-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    766 ; X86-SSE-NEXT:    movdq2q %xmm0, %mm0
    767 ; X86-SSE-NEXT:    pxor %mm1, %mm1
    768 ; X86-SSE-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    769 ; X86-SSE-NEXT:    paddd %mm1, %mm1
    770 ; X86-SSE-NEXT:    movq %mm1, (%eax)
    771 ; X86-SSE-NEXT:    retl
    772 ;
    773 ; X64-LABEL: build_v2f32_z1:
    774 ; X64:       # %bb.0:
    775 ; X64-NEXT:    movdq2q %xmm1, %mm0
    776 ; X64-NEXT:    pxor %mm1, %mm1
    777 ; X64-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
    778 ; X64-NEXT:    paddd %mm1, %mm1
    779 ; X64-NEXT:    movq %mm1, (%rdi)
    780 ; X64-NEXT:    retq
    781   %1 = insertelement <2 x float> undef, float 0.0, i32 0
    782   %2 = insertelement <2 x float>    %1, float %a1, i32 1
    783   %3 = bitcast <2 x float> %2 to x86_mmx
    784   %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
    785   store x86_mmx %4, x86_mmx *%p0
    786   ret void
    787 }
    788 
    789 define void @build_v2f32_00(x86_mmx *%p0, float %a0, float %a1) nounwind {
    790 ; X86-MMX-LABEL: build_v2f32_00:
    791 ; X86-MMX:       # %bb.0:
    792 ; X86-MMX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    793 ; X86-MMX-NEXT:    movd {{[0-9]+}}(%esp), %mm0
    794 ; X86-MMX-NEXT:    punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
    795 ; X86-MMX-NEXT:    paddd %mm0, %mm0
    796 ; X86-MMX-NEXT:    movq %mm0, (%eax)
    797 ; X86-MMX-NEXT:    retl
    798 ;
    799 ; X86-SSE-LABEL: build_v2f32_00:
    800 ; X86-SSE:       # %bb.0:
    801 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
    802 ; X86-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    803 ; X86-SSE-NEXT:    movdq2q %xmm0, %mm0
    804 ; X86-SSE-NEXT:    pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
    805 ; X86-SSE-NEXT:    paddd %mm0, %mm0
    806 ; X86-SSE-NEXT:    movq %mm0, (%eax)
    807 ; X86-SSE-NEXT:    retl
    808 ;
    809 ; X64-LABEL: build_v2f32_00:
    810 ; X64:       # %bb.0:
    811 ; X64-NEXT:    movdq2q %xmm0, %mm0
    812 ; X64-NEXT:    pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
    813 ; X64-NEXT:    paddd %mm0, %mm0
    814 ; X64-NEXT:    movq %mm0, (%rdi)
    815 ; X64-NEXT:    retq
    816   %1 = insertelement <2 x float> undef, float %a0, i32 0
    817   %2 = insertelement <2 x float>    %1, float %a0, i32 1
    818   %3 = bitcast <2 x float> %2 to x86_mmx
    819   %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
    820   store x86_mmx %4, x86_mmx *%p0
    821   ret void
    822 }
    823