Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X86
      3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X64
      4 
      5 define i64 @t0(<1 x i64>* %a, i32* %b) nounwind {
      6 ; X86-LABEL: t0:
      7 ; X86:       # %bb.0: # %entry
      8 ; X86-NEXT:    pushl %ebp
      9 ; X86-NEXT:    movl %esp, %ebp
     10 ; X86-NEXT:    andl $-8, %esp
     11 ; X86-NEXT:    subl $8, %esp
     12 ; X86-NEXT:    movl 12(%ebp), %eax
     13 ; X86-NEXT:    movl 8(%ebp), %ecx
     14 ; X86-NEXT:    movq (%ecx), %mm0
     15 ; X86-NEXT:    movd (%eax), %mm1
     16 ; X86-NEXT:    psllq %mm1, %mm0
     17 ; X86-NEXT:    movq %mm0, (%esp)
     18 ; X86-NEXT:    movl (%esp), %eax
     19 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
     20 ; X86-NEXT:    movl %ebp, %esp
     21 ; X86-NEXT:    popl %ebp
     22 ; X86-NEXT:    retl
     23 ;
     24 ; X64-LABEL: t0:
     25 ; X64:       # %bb.0: # %entry
     26 ; X64-NEXT:    movq (%rdi), %mm0
     27 ; X64-NEXT:    movd (%rsi), %mm1
     28 ; X64-NEXT:    psllq %mm1, %mm0
     29 ; X64-NEXT:    movq %mm0, %rax
     30 ; X64-NEXT:    retq
     31 entry:
     32   %0 = bitcast <1 x i64>* %a to x86_mmx*
     33   %1 = load x86_mmx, x86_mmx* %0, align 8
     34   %2 = load i32, i32* %b, align 4
     35   %3 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %1, i32 %2)
     36   %4 = bitcast x86_mmx %3 to i64
     37   ret i64 %4
     38 }
     39 declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32)
     40 
     41 define i64 @t1(<1 x i64>* %a, i32* %b) nounwind {
     42 ; X86-LABEL: t1:
     43 ; X86:       # %bb.0: # %entry
     44 ; X86-NEXT:    pushl %ebp
     45 ; X86-NEXT:    movl %esp, %ebp
     46 ; X86-NEXT:    andl $-8, %esp
     47 ; X86-NEXT:    subl $8, %esp
     48 ; X86-NEXT:    movl 12(%ebp), %eax
     49 ; X86-NEXT:    movl 8(%ebp), %ecx
     50 ; X86-NEXT:    movq (%ecx), %mm0
     51 ; X86-NEXT:    movd (%eax), %mm1
     52 ; X86-NEXT:    psrlq %mm1, %mm0
     53 ; X86-NEXT:    movq %mm0, (%esp)
     54 ; X86-NEXT:    movl (%esp), %eax
     55 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
     56 ; X86-NEXT:    movl %ebp, %esp
     57 ; X86-NEXT:    popl %ebp
     58 ; X86-NEXT:    retl
     59 ;
     60 ; X64-LABEL: t1:
     61 ; X64:       # %bb.0: # %entry
     62 ; X64-NEXT:    movq (%rdi), %mm0
     63 ; X64-NEXT:    movd (%rsi), %mm1
     64 ; X64-NEXT:    psrlq %mm1, %mm0
     65 ; X64-NEXT:    movq %mm0, %rax
     66 ; X64-NEXT:    retq
     67 entry:
     68   %0 = bitcast <1 x i64>* %a to x86_mmx*
     69   %1 = load x86_mmx, x86_mmx* %0, align 8
     70   %2 = load i32, i32* %b, align 4
     71   %3 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %1, i32 %2)
     72   %4 = bitcast x86_mmx %3 to i64
     73   ret i64 %4
     74 }
     75 declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32)
     76 
     77 define i64 @t2(<1 x i64>* %a, i32* %b) nounwind {
     78 ; X86-LABEL: t2:
     79 ; X86:       # %bb.0: # %entry
     80 ; X86-NEXT:    pushl %ebp
     81 ; X86-NEXT:    movl %esp, %ebp
     82 ; X86-NEXT:    andl $-8, %esp
     83 ; X86-NEXT:    subl $8, %esp
     84 ; X86-NEXT:    movl 12(%ebp), %eax
     85 ; X86-NEXT:    movl 8(%ebp), %ecx
     86 ; X86-NEXT:    movq (%ecx), %mm0
     87 ; X86-NEXT:    movd (%eax), %mm1
     88 ; X86-NEXT:    psllw %mm1, %mm0
     89 ; X86-NEXT:    movq %mm0, (%esp)
     90 ; X86-NEXT:    movl (%esp), %eax
     91 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
     92 ; X86-NEXT:    movl %ebp, %esp
     93 ; X86-NEXT:    popl %ebp
     94 ; X86-NEXT:    retl
     95 ;
     96 ; X64-LABEL: t2:
     97 ; X64:       # %bb.0: # %entry
     98 ; X64-NEXT:    movq (%rdi), %mm0
     99 ; X64-NEXT:    movd (%rsi), %mm1
    100 ; X64-NEXT:    psllw %mm1, %mm0
    101 ; X64-NEXT:    movq %mm0, %rax
    102 ; X64-NEXT:    retq
    103 entry:
    104   %0 = bitcast <1 x i64>* %a to x86_mmx*
    105   %1 = load x86_mmx, x86_mmx* %0, align 8
    106   %2 = load i32, i32* %b, align 4
    107   %3 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %1, i32 %2)
    108   %4 = bitcast x86_mmx %3 to i64
    109   ret i64 %4
    110 }
    111 declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32)
    112 
    113 define i64 @t3(<1 x i64>* %a, i32* %b) nounwind {
    114 ; X86-LABEL: t3:
    115 ; X86:       # %bb.0: # %entry
    116 ; X86-NEXT:    pushl %ebp
    117 ; X86-NEXT:    movl %esp, %ebp
    118 ; X86-NEXT:    andl $-8, %esp
    119 ; X86-NEXT:    subl $8, %esp
    120 ; X86-NEXT:    movl 12(%ebp), %eax
    121 ; X86-NEXT:    movl 8(%ebp), %ecx
    122 ; X86-NEXT:    movq (%ecx), %mm0
    123 ; X86-NEXT:    movd (%eax), %mm1
    124 ; X86-NEXT:    psrlw %mm1, %mm0
    125 ; X86-NEXT:    movq %mm0, (%esp)
    126 ; X86-NEXT:    movl (%esp), %eax
    127 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
    128 ; X86-NEXT:    movl %ebp, %esp
    129 ; X86-NEXT:    popl %ebp
    130 ; X86-NEXT:    retl
    131 ;
    132 ; X64-LABEL: t3:
    133 ; X64:       # %bb.0: # %entry
    134 ; X64-NEXT:    movq (%rdi), %mm0
    135 ; X64-NEXT:    movd (%rsi), %mm1
    136 ; X64-NEXT:    psrlw %mm1, %mm0
    137 ; X64-NEXT:    movq %mm0, %rax
    138 ; X64-NEXT:    retq
    139 entry:
    140   %0 = bitcast <1 x i64>* %a to x86_mmx*
    141   %1 = load x86_mmx, x86_mmx* %0, align 8
    142   %2 = load i32, i32* %b, align 4
    143   %3 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %1, i32 %2)
    144   %4 = bitcast x86_mmx %3 to i64
    145   ret i64 %4
    146 }
    147 declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32)
    148 
    149 define i64 @t4(<1 x i64>* %a, i32* %b) nounwind {
    150 ; X86-LABEL: t4:
    151 ; X86:       # %bb.0: # %entry
    152 ; X86-NEXT:    pushl %ebp
    153 ; X86-NEXT:    movl %esp, %ebp
    154 ; X86-NEXT:    andl $-8, %esp
    155 ; X86-NEXT:    subl $8, %esp
    156 ; X86-NEXT:    movl 12(%ebp), %eax
    157 ; X86-NEXT:    movl 8(%ebp), %ecx
    158 ; X86-NEXT:    movq (%ecx), %mm0
    159 ; X86-NEXT:    movd (%eax), %mm1
    160 ; X86-NEXT:    pslld %mm1, %mm0
    161 ; X86-NEXT:    movq %mm0, (%esp)
    162 ; X86-NEXT:    movl (%esp), %eax
    163 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
    164 ; X86-NEXT:    movl %ebp, %esp
    165 ; X86-NEXT:    popl %ebp
    166 ; X86-NEXT:    retl
    167 ;
    168 ; X64-LABEL: t4:
    169 ; X64:       # %bb.0: # %entry
    170 ; X64-NEXT:    movq (%rdi), %mm0
    171 ; X64-NEXT:    movd (%rsi), %mm1
    172 ; X64-NEXT:    pslld %mm1, %mm0
    173 ; X64-NEXT:    movq %mm0, %rax
    174 ; X64-NEXT:    retq
    175 entry:
    176   %0 = bitcast <1 x i64>* %a to x86_mmx*
    177   %1 = load x86_mmx, x86_mmx* %0, align 8
    178   %2 = load i32, i32* %b, align 4
    179   %3 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %1, i32 %2)
    180   %4 = bitcast x86_mmx %3 to i64
    181   ret i64 %4
    182 }
    183 declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32)
    184 
    185 define i64 @t5(<1 x i64>* %a, i32* %b) nounwind {
    186 ; X86-LABEL: t5:
    187 ; X86:       # %bb.0: # %entry
    188 ; X86-NEXT:    pushl %ebp
    189 ; X86-NEXT:    movl %esp, %ebp
    190 ; X86-NEXT:    andl $-8, %esp
    191 ; X86-NEXT:    subl $8, %esp
    192 ; X86-NEXT:    movl 12(%ebp), %eax
    193 ; X86-NEXT:    movl 8(%ebp), %ecx
    194 ; X86-NEXT:    movq (%ecx), %mm0
    195 ; X86-NEXT:    movd (%eax), %mm1
    196 ; X86-NEXT:    psrld %mm1, %mm0
    197 ; X86-NEXT:    movq %mm0, (%esp)
    198 ; X86-NEXT:    movl (%esp), %eax
    199 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
    200 ; X86-NEXT:    movl %ebp, %esp
    201 ; X86-NEXT:    popl %ebp
    202 ; X86-NEXT:    retl
    203 ;
    204 ; X64-LABEL: t5:
    205 ; X64:       # %bb.0: # %entry
    206 ; X64-NEXT:    movq (%rdi), %mm0
    207 ; X64-NEXT:    movd (%rsi), %mm1
    208 ; X64-NEXT:    psrld %mm1, %mm0
    209 ; X64-NEXT:    movq %mm0, %rax
    210 ; X64-NEXT:    retq
    211 entry:
    212   %0 = bitcast <1 x i64>* %a to x86_mmx*
    213   %1 = load x86_mmx, x86_mmx* %0, align 8
    214   %2 = load i32, i32* %b, align 4
    215   %3 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %1, i32 %2)
    216   %4 = bitcast x86_mmx %3 to i64
    217   ret i64 %4
    218 }
    219 declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32)
    220 
    221 define i64 @t6(<1 x i64>* %a, i32* %b) nounwind {
    222 ; X86-LABEL: t6:
    223 ; X86:       # %bb.0: # %entry
    224 ; X86-NEXT:    pushl %ebp
    225 ; X86-NEXT:    movl %esp, %ebp
    226 ; X86-NEXT:    andl $-8, %esp
    227 ; X86-NEXT:    subl $8, %esp
    228 ; X86-NEXT:    movl 12(%ebp), %eax
    229 ; X86-NEXT:    movl 8(%ebp), %ecx
    230 ; X86-NEXT:    movq (%ecx), %mm0
    231 ; X86-NEXT:    movd (%eax), %mm1
    232 ; X86-NEXT:    psraw %mm1, %mm0
    233 ; X86-NEXT:    movq %mm0, (%esp)
    234 ; X86-NEXT:    movl (%esp), %eax
    235 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
    236 ; X86-NEXT:    movl %ebp, %esp
    237 ; X86-NEXT:    popl %ebp
    238 ; X86-NEXT:    retl
    239 ;
    240 ; X64-LABEL: t6:
    241 ; X64:       # %bb.0: # %entry
    242 ; X64-NEXT:    movq (%rdi), %mm0
    243 ; X64-NEXT:    movd (%rsi), %mm1
    244 ; X64-NEXT:    psraw %mm1, %mm0
    245 ; X64-NEXT:    movq %mm0, %rax
    246 ; X64-NEXT:    retq
    247 entry:
    248   %0 = bitcast <1 x i64>* %a to x86_mmx*
    249   %1 = load x86_mmx, x86_mmx* %0, align 8
    250   %2 = load i32, i32* %b, align 4
    251   %3 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %1, i32 %2)
    252   %4 = bitcast x86_mmx %3 to i64
    253   ret i64 %4
    254 }
    255 declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32)
    256 
    257 define i64 @t7(<1 x i64>* %a, i32* %b) nounwind {
    258 ; X86-LABEL: t7:
    259 ; X86:       # %bb.0: # %entry
    260 ; X86-NEXT:    pushl %ebp
    261 ; X86-NEXT:    movl %esp, %ebp
    262 ; X86-NEXT:    andl $-8, %esp
    263 ; X86-NEXT:    subl $8, %esp
    264 ; X86-NEXT:    movl 12(%ebp), %eax
    265 ; X86-NEXT:    movl 8(%ebp), %ecx
    266 ; X86-NEXT:    movq (%ecx), %mm0
    267 ; X86-NEXT:    movd (%eax), %mm1
    268 ; X86-NEXT:    psrad %mm1, %mm0
    269 ; X86-NEXT:    movq %mm0, (%esp)
    270 ; X86-NEXT:    movl (%esp), %eax
    271 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
    272 ; X86-NEXT:    movl %ebp, %esp
    273 ; X86-NEXT:    popl %ebp
    274 ; X86-NEXT:    retl
    275 ;
    276 ; X64-LABEL: t7:
    277 ; X64:       # %bb.0: # %entry
    278 ; X64-NEXT:    movq (%rdi), %mm0
    279 ; X64-NEXT:    movd (%rsi), %mm1
    280 ; X64-NEXT:    psrad %mm1, %mm0
    281 ; X64-NEXT:    movq %mm0, %rax
    282 ; X64-NEXT:    retq
    283 entry:
    284   %0 = bitcast <1 x i64>* %a to x86_mmx*
    285   %1 = load x86_mmx, x86_mmx* %0, align 8
    286   %2 = load i32, i32* %b, align 4
    287   %3 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %1, i32 %2)
    288   %4 = bitcast x86_mmx %3 to i64
    289   ret i64 %4
    290 }
    291 declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32)
    292 
    293 define i64 @tt0(x86_mmx %t, x86_mmx* %q) nounwind {
    294 ; X86-LABEL: tt0:
    295 ; X86:       # %bb.0: # %entry
    296 ; X86-NEXT:    pushl %ebp
    297 ; X86-NEXT:    movl %esp, %ebp
    298 ; X86-NEXT:    andl $-8, %esp
    299 ; X86-NEXT:    subl $8, %esp
    300 ; X86-NEXT:    movl 8(%ebp), %eax
    301 ; X86-NEXT:    paddb (%eax), %mm0
    302 ; X86-NEXT:    movq %mm0, (%esp)
    303 ; X86-NEXT:    movl (%esp), %eax
    304 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
    305 ; X86-NEXT:    emms
    306 ; X86-NEXT:    movl %ebp, %esp
    307 ; X86-NEXT:    popl %ebp
    308 ; X86-NEXT:    retl
    309 ;
    310 ; X64-LABEL: tt0:
    311 ; X64:       # %bb.0: # %entry
    312 ; X64-NEXT:    paddb (%rdi), %mm0
    313 ; X64-NEXT:    movq %mm0, %rax
    314 ; X64-NEXT:    emms
    315 ; X64-NEXT:    retq
    316 entry:
    317   %v = load x86_mmx, x86_mmx* %q
    318   %u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %v)
    319   %s = bitcast x86_mmx %u to i64
    320   call void @llvm.x86.mmx.emms()
    321   ret i64 %s
    322 }
    323 declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
    324 declare void @llvm.x86.mmx.emms()
    325 
    326 define i64 @tt1(x86_mmx %t, x86_mmx* %q) nounwind {
    327 ; X86-LABEL: tt1:
    328 ; X86:       # %bb.0: # %entry
    329 ; X86-NEXT:    pushl %ebp
    330 ; X86-NEXT:    movl %esp, %ebp
    331 ; X86-NEXT:    andl $-8, %esp
    332 ; X86-NEXT:    subl $8, %esp
    333 ; X86-NEXT:    movl 8(%ebp), %eax
    334 ; X86-NEXT:    paddw (%eax), %mm0
    335 ; X86-NEXT:    movq %mm0, (%esp)
    336 ; X86-NEXT:    movl (%esp), %eax
    337 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
    338 ; X86-NEXT:    emms
    339 ; X86-NEXT:    movl %ebp, %esp
    340 ; X86-NEXT:    popl %ebp
    341 ; X86-NEXT:    retl
    342 ;
    343 ; X64-LABEL: tt1:
    344 ; X64:       # %bb.0: # %entry
    345 ; X64-NEXT:    paddw (%rdi), %mm0
    346 ; X64-NEXT:    movq %mm0, %rax
    347 ; X64-NEXT:    emms
    348 ; X64-NEXT:    retq
    349 entry:
    350   %v = load x86_mmx, x86_mmx* %q
    351   %u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %v)
    352   %s = bitcast x86_mmx %u to i64
    353   call void @llvm.x86.mmx.emms()
    354   ret i64 %s
    355 }
    356 declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
    357 
    358 define i64 @tt2(x86_mmx %t, x86_mmx* %q) nounwind {
    359 ; X86-LABEL: tt2:
    360 ; X86:       # %bb.0: # %entry
    361 ; X86-NEXT:    pushl %ebp
    362 ; X86-NEXT:    movl %esp, %ebp
    363 ; X86-NEXT:    andl $-8, %esp
    364 ; X86-NEXT:    subl $8, %esp
    365 ; X86-NEXT:    movl 8(%ebp), %eax
    366 ; X86-NEXT:    paddd (%eax), %mm0
    367 ; X86-NEXT:    movq %mm0, (%esp)
    368 ; X86-NEXT:    movl (%esp), %eax
    369 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
    370 ; X86-NEXT:    emms
    371 ; X86-NEXT:    movl %ebp, %esp
    372 ; X86-NEXT:    popl %ebp
    373 ; X86-NEXT:    retl
    374 ;
    375 ; X64-LABEL: tt2:
    376 ; X64:       # %bb.0: # %entry
    377 ; X64-NEXT:    paddd (%rdi), %mm0
    378 ; X64-NEXT:    movq %mm0, %rax
    379 ; X64-NEXT:    emms
    380 ; X64-NEXT:    retq
    381 entry:
    382   %v = load x86_mmx, x86_mmx* %q
    383   %u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %v)
    384   %s = bitcast x86_mmx %u to i64
    385   call void @llvm.x86.mmx.emms()
    386   ret i64 %s
    387 }
    388 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
    389 
    390 define i64 @tt3(x86_mmx %t, x86_mmx* %q) nounwind {
    391 ; X86-LABEL: tt3:
    392 ; X86:       # %bb.0: # %entry
    393 ; X86-NEXT:    pushl %ebp
    394 ; X86-NEXT:    movl %esp, %ebp
    395 ; X86-NEXT:    andl $-8, %esp
    396 ; X86-NEXT:    subl $8, %esp
    397 ; X86-NEXT:    movl 8(%ebp), %eax
    398 ; X86-NEXT:    paddq (%eax), %mm0
    399 ; X86-NEXT:    movq %mm0, (%esp)
    400 ; X86-NEXT:    movl (%esp), %eax
    401 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
    402 ; X86-NEXT:    emms
    403 ; X86-NEXT:    movl %ebp, %esp
    404 ; X86-NEXT:    popl %ebp
    405 ; X86-NEXT:    retl
    406 ;
    407 ; X64-LABEL: tt3:
    408 ; X64:       # %bb.0: # %entry
    409 ; X64-NEXT:    paddq (%rdi), %mm0
    410 ; X64-NEXT:    movq %mm0, %rax
    411 ; X64-NEXT:    emms
    412 ; X64-NEXT:    retq
    413 entry:
    414   %v = load x86_mmx, x86_mmx* %q
    415   %u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %v)
    416   %s = bitcast x86_mmx %u to i64
    417   call void @llvm.x86.mmx.emms()
    418   ret i64 %s
    419 }
    420 declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
    421 
    422 define i64 @tt4(x86_mmx %t, x86_mmx* %q) nounwind {
    423 ; X86-LABEL: tt4:
    424 ; X86:       # %bb.0: # %entry
    425 ; X86-NEXT:    pushl %ebp
    426 ; X86-NEXT:    movl %esp, %ebp
    427 ; X86-NEXT:    andl $-8, %esp
    428 ; X86-NEXT:    subl $8, %esp
    429 ; X86-NEXT:    movl 8(%ebp), %eax
    430 ; X86-NEXT:    paddusb (%eax), %mm0
    431 ; X86-NEXT:    movq %mm0, (%esp)
    432 ; X86-NEXT:    movl (%esp), %eax
    433 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
    434 ; X86-NEXT:    emms
    435 ; X86-NEXT:    movl %ebp, %esp
    436 ; X86-NEXT:    popl %ebp
    437 ; X86-NEXT:    retl
    438 ;
    439 ; X64-LABEL: tt4:
    440 ; X64:       # %bb.0: # %entry
    441 ; X64-NEXT:    paddusb (%rdi), %mm0
    442 ; X64-NEXT:    movq %mm0, %rax
    443 ; X64-NEXT:    emms
    444 ; X64-NEXT:    retq
    445 entry:
    446   %v = load x86_mmx, x86_mmx* %q
    447   %u = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %t, x86_mmx %v)
    448   %s = bitcast x86_mmx %u to i64
    449   call void @llvm.x86.mmx.emms()
    450   ret i64 %s
    451 }
    452 declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx)
    453 
    454 define i64 @tt5(x86_mmx %t, x86_mmx* %q) nounwind {
    455 ; X86-LABEL: tt5:
    456 ; X86:       # %bb.0: # %entry
    457 ; X86-NEXT:    pushl %ebp
    458 ; X86-NEXT:    movl %esp, %ebp
    459 ; X86-NEXT:    andl $-8, %esp
    460 ; X86-NEXT:    subl $8, %esp
    461 ; X86-NEXT:    movl 8(%ebp), %eax
    462 ; X86-NEXT:    paddusw (%eax), %mm0
    463 ; X86-NEXT:    movq %mm0, (%esp)
    464 ; X86-NEXT:    movl (%esp), %eax
    465 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
    466 ; X86-NEXT:    emms
    467 ; X86-NEXT:    movl %ebp, %esp
    468 ; X86-NEXT:    popl %ebp
    469 ; X86-NEXT:    retl
    470 ;
    471 ; X64-LABEL: tt5:
    472 ; X64:       # %bb.0: # %entry
    473 ; X64-NEXT:    paddusw (%rdi), %mm0
    474 ; X64-NEXT:    movq %mm0, %rax
    475 ; X64-NEXT:    emms
    476 ; X64-NEXT:    retq
    477 entry:
    478   %v = load x86_mmx, x86_mmx* %q
    479   %u = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %t, x86_mmx %v)
    480   %s = bitcast x86_mmx %u to i64
    481   call void @llvm.x86.mmx.emms()
    482   ret i64 %s
    483 }
    484 declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
    485 
    486 define i64 @tt6(x86_mmx %t, x86_mmx* %q) nounwind {
    487 ; X86-LABEL: tt6:
    488 ; X86:       # %bb.0: # %entry
    489 ; X86-NEXT:    pushl %ebp
    490 ; X86-NEXT:    movl %esp, %ebp
    491 ; X86-NEXT:    andl $-8, %esp
    492 ; X86-NEXT:    subl $8, %esp
    493 ; X86-NEXT:    movl 8(%ebp), %eax
    494 ; X86-NEXT:    psrlw (%eax), %mm0
    495 ; X86-NEXT:    movq %mm0, (%esp)
    496 ; X86-NEXT:    movl (%esp), %eax
    497 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
    498 ; X86-NEXT:    emms
    499 ; X86-NEXT:    movl %ebp, %esp
    500 ; X86-NEXT:    popl %ebp
    501 ; X86-NEXT:    retl
    502 ;
    503 ; X64-LABEL: tt6:
    504 ; X64:       # %bb.0: # %entry
    505 ; X64-NEXT:    psrlw (%rdi), %mm0
    506 ; X64-NEXT:    movq %mm0, %rax
    507 ; X64-NEXT:    emms
    508 ; X64-NEXT:    retq
    509 entry:
    510   %v = load x86_mmx, x86_mmx* %q
    511   %u = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %t, x86_mmx %v)
    512   %s = bitcast x86_mmx %u to i64
    513   call void @llvm.x86.mmx.emms()
    514   ret i64 %s
    515 }
    516 declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx)
    517 
    518 define i64 @tt7(x86_mmx %t, x86_mmx* %q) nounwind {
    519 ; X86-LABEL: tt7:
    520 ; X86:       # %bb.0: # %entry
    521 ; X86-NEXT:    pushl %ebp
    522 ; X86-NEXT:    movl %esp, %ebp
    523 ; X86-NEXT:    andl $-8, %esp
    524 ; X86-NEXT:    subl $8, %esp
    525 ; X86-NEXT:    movl 8(%ebp), %eax
    526 ; X86-NEXT:    psrld (%eax), %mm0
    527 ; X86-NEXT:    movq %mm0, (%esp)
    528 ; X86-NEXT:    movl (%esp), %eax
    529 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
    530 ; X86-NEXT:    emms
    531 ; X86-NEXT:    movl %ebp, %esp
    532 ; X86-NEXT:    popl %ebp
    533 ; X86-NEXT:    retl
    534 ;
    535 ; X64-LABEL: tt7:
    536 ; X64:       # %bb.0: # %entry
    537 ; X64-NEXT:    psrld (%rdi), %mm0
    538 ; X64-NEXT:    movq %mm0, %rax
    539 ; X64-NEXT:    emms
    540 ; X64-NEXT:    retq
    541 entry:
    542   %v = load x86_mmx, x86_mmx* %q
    543   %u = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %t, x86_mmx %v)
    544   %s = bitcast x86_mmx %u to i64
    545   call void @llvm.x86.mmx.emms()
    546   ret i64 %s
    547 }
    548 declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx)
    549 
    550 define i64 @tt8(x86_mmx %t, x86_mmx* %q) nounwind {
    551 ; X86-LABEL: tt8:
    552 ; X86:       # %bb.0: # %entry
    553 ; X86-NEXT:    pushl %ebp
    554 ; X86-NEXT:    movl %esp, %ebp
    555 ; X86-NEXT:    andl $-8, %esp
    556 ; X86-NEXT:    subl $8, %esp
    557 ; X86-NEXT:    movl 8(%ebp), %eax
    558 ; X86-NEXT:    psrlq (%eax), %mm0
    559 ; X86-NEXT:    movq %mm0, (%esp)
    560 ; X86-NEXT:    movl (%esp), %eax
    561 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
    562 ; X86-NEXT:    emms
    563 ; X86-NEXT:    movl %ebp, %esp
    564 ; X86-NEXT:    popl %ebp
    565 ; X86-NEXT:    retl
    566 ;
    567 ; X64-LABEL: tt8:
    568 ; X64:       # %bb.0: # %entry
    569 ; X64-NEXT:    psrlq (%rdi), %mm0
    570 ; X64-NEXT:    movq %mm0, %rax
    571 ; X64-NEXT:    emms
    572 ; X64-NEXT:    retq
    573 entry:
    574   %v = load x86_mmx, x86_mmx* %q
    575   %u = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %t, x86_mmx %v)
    576   %s = bitcast x86_mmx %u to i64
    577   call void @llvm.x86.mmx.emms()
    578   ret i64 %s
    579 }
    580 declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx)
    581 
    582 define void @test_psrlq_by_volatile_shift_amount(x86_mmx* %t) nounwind {
    583 ; X86-LABEL: test_psrlq_by_volatile_shift_amount:
    584 ; X86:       # %bb.0: # %entry
    585 ; X86-NEXT:    pushl %eax
    586 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    587 ; X86-NEXT:    movl $1, (%esp)
    588 ; X86-NEXT:    movd (%esp), %mm0
    589 ; X86-NEXT:    movl $255, %ecx
    590 ; X86-NEXT:    movd %ecx, %mm1
    591 ; X86-NEXT:    psrlq %mm0, %mm1
    592 ; X86-NEXT:    movq %mm1, (%eax)
    593 ; X86-NEXT:    popl %eax
    594 ; X86-NEXT:    retl
    595 ;
    596 ; X64-LABEL: test_psrlq_by_volatile_shift_amount:
    597 ; X64:       # %bb.0: # %entry
    598 ; X64-NEXT:    movl $1, -{{[0-9]+}}(%rsp)
    599 ; X64-NEXT:    movd -{{[0-9]+}}(%rsp), %mm0
    600 ; X64-NEXT:    movl $255, %eax
    601 ; X64-NEXT:    movd %eax, %mm1
    602 ; X64-NEXT:    psrlq %mm0, %mm1
    603 ; X64-NEXT:    movq %mm1, (%rdi)
    604 ; X64-NEXT:    retq
    605 entry:
    606   %0 = alloca i32, align 4
    607   %1 = bitcast i32* %0 to i8*
    608   call void @llvm.lifetime.start(i64 4, i8* nonnull %1)
    609   store volatile i32 1, i32* %0, align 4
    610   %2 = load volatile i32, i32* %0, align 4
    611   %3 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx bitcast (<1 x i64> <i64 255> to x86_mmx), i32 %2)
    612   store x86_mmx %3, x86_mmx* %t, align 8
    613   call void @llvm.lifetime.end(i64 4, i8* nonnull %1)
    614   ret void
    615 }
    616 
    617 declare void @llvm.lifetime.start(i64, i8* nocapture)
    618 declare void @llvm.lifetime.end(i64, i8* nocapture)
    619