Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s
      2 
      3 define x86_mmx @stack_fold_cvtpd2pi(<2 x double> %a0) {
      4   ;CHECK-LABEL: stack_fold_cvtpd2pi
      5   ;CHECK:       cvtpd2pi {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 16-byte Folded Reload
      6   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
      7   %2 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a0) nounwind readnone
      8   ret x86_mmx %2
      9 }
     10 declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
     11 
     12 define <2 x double> @stack_fold_cvtpi2pd(x86_mmx %a0) {
     13   ;CHECK-LABEL: stack_fold_cvtpi2pd
     14   ;CHECK:       cvtpi2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
     15   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
     16   %2 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %a0) nounwind readnone
     17   ret <2 x double> %2
     18 }
     19 declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
     20 
     21 define <4 x float> @stack_fold_cvtpi2ps(<4 x float> %a0, x86_mmx %a1) {
     22   ;CHECK-LABEL: stack_fold_cvtpi2ps
     23   ;CHECK:       cvtpi2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
     24   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
     25   %2 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a0, x86_mmx %a1) nounwind readnone
     26   ret <4 x float> %2
     27 }
     28 declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
     29 
     30 define x86_mmx @stack_fold_cvtps2pi(<4 x float> %a0) {
     31   ;CHECK-LABEL: stack_fold_cvtps2pi
     32   ;CHECK:       cvtps2pi {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 16-byte Folded Reload
     33   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
     34   %2 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0) nounwind readnone
     35   ret x86_mmx %2
     36 }
     37 declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone
     38 
     39 define x86_mmx @stack_fold_cvttpd2pi(<2 x double> %a0) {
     40   ;CHECK-LABEL: stack_fold_cvttpd2pi
     41   ;CHECK:       cvttpd2pi {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 16-byte Folded Reload
     42   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
     43   %2 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a0) nounwind readnone
     44   ret x86_mmx %2
     45 }
     46 declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
     47 
     48 define x86_mmx @stack_fold_cvttps2pi(<4 x float> %a0) {
     49   ;CHECK-LABEL: stack_fold_cvttps2pi
     50   ;CHECK:       cvttps2pi {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 16-byte Folded Reload
     51   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
     52   %2 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %a0) nounwind readnone
     53   ret x86_mmx %2
     54 }
     55 declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone
     56 
     57 ; TODO stack_fold_movd_load
     58 ; TODO stack_fold_movd_store
     59 ; TODO stack_fold_movq_load
     60 ; TODO stack_fold_movq_store
     61 
     62 define x86_mmx @stack_fold_pabsb(x86_mmx %a0) {
     63   ;CHECK-LABEL: stack_fold_pabsb
     64   ;CHECK:       pabsb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
     65   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
     66   %2 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %a0) nounwind readnone
     67   ret x86_mmx %2
     68 }
     69 declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
     70 
     71 define x86_mmx @stack_fold_pabsd(x86_mmx %a0) {
     72   ;CHECK-LABEL: stack_fold_pabsd
     73   ;CHECK:       pabsd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
     74   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
     75   %2 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %a0) nounwind readnone
     76   ret x86_mmx %2
     77 }
     78 declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
     79 
     80 define x86_mmx @stack_fold_pabsw(x86_mmx %a0) {
     81   ;CHECK-LABEL: stack_fold_pabsw
     82   ;CHECK:       pabsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
     83   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
     84   %2 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %a0) nounwind readnone
     85   ret x86_mmx %2
     86 }
     87 declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
     88 
     89 define x86_mmx @stack_fold_packssdw(x86_mmx %a, x86_mmx %b) {
     90   ;CHECK-LABEL: stack_fold_packssdw
     91   ;CHECK:       packssdw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
     92   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
     93   %2 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %a, x86_mmx %b) nounwind readnone
     94   ret x86_mmx %2
     95 }
     96 declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
     97 
     98 define x86_mmx @stack_fold_packsswb(x86_mmx %a, x86_mmx %b) {
     99   ;CHECK-LABEL: stack_fold_packsswb
    100   ;CHECK:       packsswb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    101   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    102   %2 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %a, x86_mmx %b) nounwind readnone
    103   ret x86_mmx %2
    104 }
    105 declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
    106 
    107 define x86_mmx @stack_fold_packuswb(x86_mmx %a, x86_mmx %b) {
    108   ;CHECK-LABEL: stack_fold_packuswb
    109   ;CHECK:       packuswb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    110   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    111   %2 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %a, x86_mmx %b) nounwind readnone
    112   ret x86_mmx %2
    113 }
    114 declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
    115 
    116 define x86_mmx @stack_fold_paddb(x86_mmx %a, x86_mmx %b) {
    117   ;CHECK-LABEL: stack_fold_paddb
    118   ;CHECK:       paddb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    119   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    120   %2 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a, x86_mmx %b) nounwind readnone
    121   ret x86_mmx %2
    122 }
    123 declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
    124 
    125 define x86_mmx @stack_fold_paddd(x86_mmx %a, x86_mmx %b) {
    126   ;CHECK-LABEL: stack_fold_paddd
    127   ;CHECK:       paddd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    128   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    129   %2 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a, x86_mmx %b) nounwind readnone
    130   ret x86_mmx %2
    131 }
    132 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
    133 
    134 define x86_mmx @stack_fold_paddq(x86_mmx %a, x86_mmx %b) {
    135   ;CHECK-LABEL: stack_fold_paddq
    136   ;CHECK:       paddq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    137   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    138   %2 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %a, x86_mmx %b) nounwind readnone
    139   ret x86_mmx %2
    140 }
    141 declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
    142 
    143 define x86_mmx @stack_fold_paddsb(x86_mmx %a, x86_mmx %b) {
    144   ;CHECK-LABEL: stack_fold_paddsb
    145   ;CHECK:       paddsb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    146   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    147   %2 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %a, x86_mmx %b) nounwind readnone
    148   ret x86_mmx %2
    149 }
    150 declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
    151 
    152 define x86_mmx @stack_fold_paddsw(x86_mmx %a, x86_mmx %b) {
    153   ;CHECK-LABEL: stack_fold_paddsw
    154   ;CHECK:       paddsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    155   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    156   %2 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %a, x86_mmx %b) nounwind readnone
    157   ret x86_mmx %2
    158 }
    159 declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
    160 
    161 define x86_mmx @stack_fold_paddusb(x86_mmx %a, x86_mmx %b) {
    162   ;CHECK-LABEL: stack_fold_paddusb
    163   ;CHECK:       paddusb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    164   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    165   %2 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %a, x86_mmx %b) nounwind readnone
    166   ret x86_mmx %2
    167 }
    168 declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
    169 
    170 define x86_mmx @stack_fold_paddusw(x86_mmx %a, x86_mmx %b) {
    171   ;CHECK-LABEL: stack_fold_paddusw
    172   ;CHECK:       paddusw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    173   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    174   %2 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %a, x86_mmx %b) nounwind readnone
    175   ret x86_mmx %2
    176 }
    177 declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
    178 
    179 define x86_mmx @stack_fold_paddw(x86_mmx %a, x86_mmx %b) {
    180   ;CHECK-LABEL: stack_fold_paddw
    181   ;CHECK:       paddw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    182   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    183   %2 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %a, x86_mmx %b) nounwind readnone
    184   ret x86_mmx %2
    185 }
    186 declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
    187 
    188 define x86_mmx @stack_fold_palignr(x86_mmx %a, x86_mmx %b) {
    189   ;CHECK-LABEL: stack_fold_palignr
    190   ;CHECK:       palignr $1, {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    191   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    192   %2 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %a, x86_mmx %b, i8 1) nounwind readnone
    193   ret x86_mmx %2
    194 }
    195 declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
    196 
    197 define x86_mmx @stack_fold_pand(x86_mmx %a, x86_mmx %b) {
    198   ;CHECK-LABEL: stack_fold_pand
    199   ;CHECK:       pand {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    200   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    201   %2 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %a, x86_mmx %b) nounwind readnone
    202   ret x86_mmx %2
    203 }
    204 declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
    205 
    206 define x86_mmx @stack_fold_pandn(x86_mmx %a, x86_mmx %b) {
    207   ;CHECK-LABEL: stack_fold_pandn
    208   ;CHECK:       pandn {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    209   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    210   %2 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %a, x86_mmx %b) nounwind readnone
    211   ret x86_mmx %2
    212 }
    213 declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
    214 
    215 define x86_mmx @stack_fold_pavgb(x86_mmx %a, x86_mmx %b) {
    216   ;CHECK-LABEL: stack_fold_pavgb
    217   ;CHECK:       pavgb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    218   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    219   %2 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %a, x86_mmx %b) nounwind readnone
    220   ret x86_mmx %2
    221 }
    222 declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
    223 
    224 define x86_mmx @stack_fold_pavgw(x86_mmx %a, x86_mmx %b) {
    225   ;CHECK-LABEL: stack_fold_pavgw
    226   ;CHECK:       pavgw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    227   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    228   %2 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %a, x86_mmx %b) nounwind readnone
    229   ret x86_mmx %2
    230 }
    231 declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
    232 
    233 define x86_mmx @stack_fold_pcmpeqb(x86_mmx %a, x86_mmx %b) {
    234   ;CHECK-LABEL: stack_fold_pcmpeqb
    235   ;CHECK:       pcmpeqb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    236   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    237   %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %a, x86_mmx %b) nounwind readnone
    238   ret x86_mmx %2
    239 }
    240 declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
    241 
    242 define x86_mmx @stack_fold_pcmpeqd(x86_mmx %a, x86_mmx %b) {
    243   ;CHECK-LABEL: stack_fold_pcmpeqd
    244   ;CHECK:       pcmpeqd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    245   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    246   %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %a, x86_mmx %b) nounwind readnone
    247   ret x86_mmx %2
    248 }
    249 declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
    250 
    251 define x86_mmx @stack_fold_pcmpeqw(x86_mmx %a, x86_mmx %b) {
    252   ;CHECK-LABEL: stack_fold_pcmpeqw
    253   ;CHECK:       pcmpeqw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    254   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    255   %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %a, x86_mmx %b) nounwind readnone
    256   ret x86_mmx %2
    257 }
    258 declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
    259 
    260 define x86_mmx @stack_fold_pcmpgtb(x86_mmx %a, x86_mmx %b) {
    261   ;CHECK-LABEL: stack_fold_pcmpgtb
    262   ;CHECK:       pcmpgtb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    263   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    264   %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %a, x86_mmx %b) nounwind readnone
    265   ret x86_mmx %2
    266 }
    267 declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
    268 
    269 define x86_mmx @stack_fold_pcmpgtd(x86_mmx %a, x86_mmx %b) {
    270   ;CHECK-LABEL: stack_fold_pcmpgtd
    271   ;CHECK:       pcmpgtd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    272   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    273   %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %a, x86_mmx %b) nounwind readnone
    274   ret x86_mmx %2
    275 }
    276 declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
    277 
    278 define x86_mmx @stack_fold_pcmpgtw(x86_mmx %a, x86_mmx %b) {
    279   ;CHECK-LABEL: stack_fold_pcmpgtw
    280   ;CHECK:       pcmpgtw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    281   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    282   %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %a, x86_mmx %b) nounwind readnone
    283   ret x86_mmx %2
    284 }
    285 declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
    286 
    287 define x86_mmx @stack_fold_phaddd(x86_mmx %a, x86_mmx %b) {
    288   ;CHECK-LABEL: stack_fold_phaddd
    289   ;CHECK:       phaddd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    290   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    291   %2 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %a, x86_mmx %b) nounwind readnone
    292   ret x86_mmx %2
    293 }
    294 declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
    295 
    296 define x86_mmx @stack_fold_phaddsw(x86_mmx %a, x86_mmx %b) {
    297   ;CHECK-LABEL: stack_fold_phaddsw
    298   ;CHECK:       phaddsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    299   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    300   %2 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
    301   ret x86_mmx %2
    302 }
    303 declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
    304 
    305 define x86_mmx @stack_fold_phaddw(x86_mmx %a, x86_mmx %b) {
    306   ;CHECK-LABEL: stack_fold_phaddw
    307   ;CHECK:       phaddw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    308   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    309   %2 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %a, x86_mmx %b) nounwind readnone
    310   ret x86_mmx %2
    311 }
    312 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
    313 
    314 define x86_mmx @stack_fold_phsubd(x86_mmx %a, x86_mmx %b) {
    315   ;CHECK-LABEL: stack_fold_phsubd
    316   ;CHECK:       phsubd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    317   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    318   %2 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %a, x86_mmx %b) nounwind readnone
    319   ret x86_mmx %2
    320 }
    321 declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
    322 
    323 define x86_mmx @stack_fold_phsubsw(x86_mmx %a, x86_mmx %b) {
    324   ;CHECK-LABEL: stack_fold_phsubsw
    325   ;CHECK:       phsubsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    326   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    327   %2 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
    328   ret x86_mmx %2
    329 }
    330 declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
    331 
    332 define x86_mmx @stack_fold_phsubw(x86_mmx %a, x86_mmx %b) {
    333   ;CHECK-LABEL: stack_fold_phsubw
    334   ;CHECK:       phsubw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    335   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    336   %2 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %a, x86_mmx %b) nounwind readnone
    337   ret x86_mmx %2
    338 }
    339 declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
    340 
    341 ; TODO stack_fold_pinsrw
    342 
    343 define x86_mmx @stack_fold_pmaddubsw(x86_mmx %a, x86_mmx %b) {
    344   ;CHECK-LABEL: stack_fold_pmaddubsw
    345   ;CHECK:       pmaddubsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    346   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    347   %2 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
    348   ret x86_mmx %2
    349 }
    350 declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
    351 
    352 define x86_mmx @stack_fold_pmaddwd(x86_mmx %a, x86_mmx %b) {
    353   ;CHECK-LABEL: stack_fold_pmaddwd
    354   ;CHECK:       pmaddwd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    355   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    356   %2 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %a, x86_mmx %b) nounwind readnone
    357   ret x86_mmx %2
    358 }
    359 declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
    360 
    361 define x86_mmx @stack_fold_pmaxsw(x86_mmx %a, x86_mmx %b) {
    362   ;CHECK-LABEL: stack_fold_pmaxsw
    363   ;CHECK:       pmaxsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    364   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    365   %2 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %a, x86_mmx %b) nounwind readnone
    366   ret x86_mmx %2
    367 }
    368 declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
    369 
    370 define x86_mmx @stack_fold_pmaxub(x86_mmx %a, x86_mmx %b) {
    371   ;CHECK-LABEL: stack_fold_pmaxub
    372   ;CHECK:       pmaxub {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    373   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    374   %2 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %a, x86_mmx %b) nounwind readnone
    375   ret x86_mmx %2
    376 }
    377 declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
    378 
    379 define x86_mmx @stack_fold_pminsw(x86_mmx %a, x86_mmx %b) {
    380   ;CHECK-LABEL: stack_fold_pminsw
    381   ;CHECK:       pminsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    382   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    383   %2 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %a, x86_mmx %b) nounwind readnone
    384   ret x86_mmx %2
    385 }
    386 declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
    387 
    388 define x86_mmx @stack_fold_pminub(x86_mmx %a, x86_mmx %b) {
    389   ;CHECK-LABEL: stack_fold_pminub
    390   ;CHECK:       pminub {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    391   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    392   %2 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %a, x86_mmx %b) nounwind readnone
    393   ret x86_mmx %2
    394 }
    395 declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
    396 
    397 define x86_mmx @stack_fold_pmulhrsw(x86_mmx %a, x86_mmx %b) {
    398   ;CHECK-LABEL: stack_fold_pmulhrsw
    399   ;CHECK:       pmulhrsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    400   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    401   %2 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
    402   ret x86_mmx %2
    403 }
    404 declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
    405 
    406 define x86_mmx @stack_fold_pmulhuw(x86_mmx %a, x86_mmx %b) {
    407   ;CHECK-LABEL: stack_fold_pmulhuw
    408   ;CHECK:       pmulhuw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    409   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    410   %2 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %a, x86_mmx %b) nounwind readnone
    411   ret x86_mmx %2
    412 }
    413 declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
    414 
    415 define x86_mmx @stack_fold_pmulhw(x86_mmx %a, x86_mmx %b) {
    416   ;CHECK-LABEL: stack_fold_pmulhw
    417   ;CHECK:       pmulhw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    418   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    419   %2 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %a, x86_mmx %b) nounwind readnone
    420   ret x86_mmx %2
    421 }
    422 declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
    423 
    424 define x86_mmx @stack_fold_pmullw(x86_mmx %a, x86_mmx %b) {
    425   ;CHECK-LABEL: stack_fold_pmullw
    426   ;CHECK:       pmullw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    427   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    428   %2 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %a, x86_mmx %b) nounwind readnone
    429   ret x86_mmx %2
    430 }
    431 declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
    432 
    433 define x86_mmx @stack_fold_pmuludq(x86_mmx %a, x86_mmx %b) {
    434   ;CHECK-LABEL: stack_fold_pmuludq
    435   ;CHECK:       pmuludq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    436   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    437   %2 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %a, x86_mmx %b) nounwind readnone
    438   ret x86_mmx %2
    439 }
    440 declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
    441 
    442 define x86_mmx @stack_fold_por(x86_mmx %a, x86_mmx %b) {
    443   ;CHECK-LABEL: stack_fold_por
    444   ;CHECK:       por {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    445   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    446   %2 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %a, x86_mmx %b) nounwind readnone
    447   ret x86_mmx %2
    448 }
    449 declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
    450 
    451 define x86_mmx @stack_fold_psadbw(x86_mmx %a, x86_mmx %b) {
    452   ;CHECK-LABEL: stack_fold_psadbw
    453   ;CHECK:       psadbw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    454   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    455   %2 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a, x86_mmx %b) nounwind readnone
    456   ret x86_mmx %2
    457 }
    458 declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
    459 
    460 define x86_mmx @stack_fold_pshufb(x86_mmx %a, x86_mmx %b) {
    461   ;CHECK-LABEL: stack_fold_pshufb
    462   ;CHECK:       pshufb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    463   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    464   %2 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %a, x86_mmx %b) nounwind readnone
    465   ret x86_mmx %2
    466 }
    467 declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
    468 
    469 define x86_mmx @stack_fold_pshufw(x86_mmx %a) {
    470   ;CHECK-LABEL: stack_fold_pshufw
    471   ;CHECK:       pshufw $1, {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    472   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    473   %2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %a, i8 1) nounwind readnone
    474   ret x86_mmx %2
    475 }
    476 declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
    477 
    478 define x86_mmx @stack_fold_psignb(x86_mmx %a0, x86_mmx %a1) {
    479   ;CHECK-LABEL: stack_fold_psignb
    480   ;CHECK:       psignb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    481   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    482   %2 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %a0, x86_mmx %a1) nounwind readnone
    483   ret x86_mmx %2
    484 }
    485 declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
    486 
    487 define x86_mmx @stack_fold_psignd(x86_mmx %a0, x86_mmx %a1) {
    488   ;CHECK-LABEL: stack_fold_psignd
    489   ;CHECK:       psignd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    490   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    491   %2 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %a0, x86_mmx %a1) nounwind readnone
    492   ret x86_mmx %2
    493 }
    494 declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
    495 
    496 define x86_mmx @stack_fold_psignw(x86_mmx %a0, x86_mmx %a1) {
    497   ;CHECK-LABEL: stack_fold_psignw
    498   ;CHECK:       psignw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    499   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    500   %2 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %a0, x86_mmx %a1) nounwind readnone
    501   ret x86_mmx %2
    502 }
    503 declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
    504 
    505 define x86_mmx @stack_fold_pslld(x86_mmx %a, x86_mmx %b) {
    506   ;CHECK-LABEL: stack_fold_pslld
    507   ;CHECK:       pslld {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    508   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    509   %2 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %a, x86_mmx %b) nounwind readnone
    510   ret x86_mmx %2
    511 }
    512 declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
    513 
    514 define x86_mmx @stack_fold_psllq(x86_mmx %a, x86_mmx %b) {
    515   ;CHECK-LABEL: stack_fold_psllq
    516   ;CHECK:       psllq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    517   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    518   %2 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %a, x86_mmx %b) nounwind readnone
    519   ret x86_mmx %2
    520 }
    521 declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
    522 
    523 define x86_mmx @stack_fold_psllw(x86_mmx %a, x86_mmx %b) {
    524   ;CHECK-LABEL: stack_fold_psllw
    525   ;CHECK:       psllw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    526   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    527   %2 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %a, x86_mmx %b) nounwind readnone
    528   ret x86_mmx %2
    529 }
    530 declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
    531 
    532 define x86_mmx @stack_fold_psrad(x86_mmx %a, x86_mmx %b) {
    533   ;CHECK-LABEL: stack_fold_psrad
    534   ;CHECK:       psrad {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    535   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    536   %2 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %a, x86_mmx %b) nounwind readnone
    537   ret x86_mmx %2
    538 }
    539 declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
    540 
    541 define x86_mmx @stack_fold_psraw(x86_mmx %a, x86_mmx %b) {
    542   ;CHECK-LABEL: stack_fold_psraw
    543   ;CHECK:       psraw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    544   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    545   %2 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %a, x86_mmx %b) nounwind readnone
    546   ret x86_mmx %2
    547 }
    548 declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
    549 
    550 define x86_mmx @stack_fold_psrld(x86_mmx %a, x86_mmx %b) {
    551   ;CHECK-LABEL: stack_fold_psrld
    552   ;CHECK:       psrld {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    553   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    554   %2 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %a, x86_mmx %b) nounwind readnone
    555   ret x86_mmx %2
    556 }
    557 declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
    558 
    559 define x86_mmx @stack_fold_psrlq(x86_mmx %a, x86_mmx %b) {
    560   ;CHECK-LABEL: stack_fold_psrlq
    561   ;CHECK:       psrlq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    562   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    563   %2 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %a, x86_mmx %b) nounwind readnone
    564   ret x86_mmx %2
    565 }
    566 declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
    567 
    568 define x86_mmx @stack_fold_psrlw(x86_mmx %a, x86_mmx %b) {
    569   ;CHECK-LABEL: stack_fold_psrlw
    570   ;CHECK:       psrlw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    571   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    572   %2 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %a, x86_mmx %b) nounwind readnone
    573   ret x86_mmx %2
    574 }
    575 declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
    576 
    577 define x86_mmx @stack_fold_psubb(x86_mmx %a, x86_mmx %b) {
    578   ;CHECK-LABEL: stack_fold_psubb
    579   ;CHECK:       psubb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    580   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    581   %2 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %a, x86_mmx %b) nounwind readnone
    582   ret x86_mmx %2
    583 }
    584 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
    585 
    586 define x86_mmx @stack_fold_psubd(x86_mmx %a, x86_mmx %b) {
    587   ;CHECK-LABEL: stack_fold_psubd
    588   ;CHECK:       psubd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    589   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    590   %2 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %a, x86_mmx %b) nounwind readnone
    591   ret x86_mmx %2
    592 }
    593 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
    594 
    595 define x86_mmx @stack_fold_psubq(x86_mmx %a, x86_mmx %b) {
    596   ;CHECK-LABEL: stack_fold_psubq
    597   ;CHECK:       psubq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    598   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    599   %2 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %a, x86_mmx %b) nounwind readnone
    600   ret x86_mmx %2
    601 }
    602 declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
    603 
    604 define x86_mmx @stack_fold_psubsb(x86_mmx %a, x86_mmx %b) {
    605   ;CHECK-LABEL: stack_fold_psubsb
    606   ;CHECK:       psubsb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    607   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    608   %2 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %a, x86_mmx %b) nounwind readnone
    609   ret x86_mmx %2
    610 }
    611 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
    612 
    613 define x86_mmx @stack_fold_psubsw(x86_mmx %a, x86_mmx %b) {
    614   ;CHECK-LABEL: stack_fold_psubsw
    615   ;CHECK:       psubsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    616   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    617   %2 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %a, x86_mmx %b) nounwind readnone
    618   ret x86_mmx %2
    619 }
    620 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
    621 
    622 define x86_mmx @stack_fold_psubusb(x86_mmx %a, x86_mmx %b) {
    623   ;CHECK-LABEL: stack_fold_psubusb
    624   ;CHECK:       psubusb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    625   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    626   %2 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %a, x86_mmx %b) nounwind readnone
    627   ret x86_mmx %2
    628 }
    629 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
    630 
    631 define x86_mmx @stack_fold_psubusw(x86_mmx %a, x86_mmx %b) {
    632   ;CHECK-LABEL: stack_fold_psubusw
    633   ;CHECK:       psubusw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    634   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    635   %2 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %a, x86_mmx %b) nounwind readnone
    636   ret x86_mmx %2
    637 }
    638 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
    639 
    640 define x86_mmx @stack_fold_psubw(x86_mmx %a, x86_mmx %b) {
    641   ;CHECK-LABEL: stack_fold_psubw
    642   ;CHECK:       psubw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    643   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    644   %2 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %a, x86_mmx %b) nounwind readnone
    645   ret x86_mmx %2
    646 }
    647 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
    648 
    649 define x86_mmx @stack_fold_punpckhbw(x86_mmx %a, x86_mmx %b) {
    650   ;CHECK-LABEL: stack_fold_punpckhbw
    651   ;CHECK:       punpckhbw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    652   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    653   %2 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %a, x86_mmx %b) nounwind readnone
    654   ret x86_mmx %2
    655 }
    656 declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
    657 
    658 define x86_mmx @stack_fold_punpckhdq(x86_mmx %a, x86_mmx %b) {
    659   ;CHECK-LABEL: stack_fold_punpckhdq
    660   ;CHECK:       punpckhdq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    661   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    662   %2 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %a, x86_mmx %b) nounwind readnone
    663   ret x86_mmx %2
    664 }
    665 declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
    666 
    667 define x86_mmx @stack_fold_punpckhwd(x86_mmx %a, x86_mmx %b) {
    668   ;CHECK-LABEL: stack_fold_punpckhwd
    669   ;CHECK:       punpckhwd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    670   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    671   %2 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %a, x86_mmx %b) nounwind readnone
    672   ret x86_mmx %2
    673 }
    674 declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
    675 
    676 define x86_mmx @stack_fold_punpcklbw(x86_mmx %a, x86_mmx %b) {
    677   ;CHECK-LABEL: stack_fold_punpcklbw
    678   ;CHECK:       punpcklbw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    679   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    680   %2 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %a, x86_mmx %b) nounwind readnone
    681   ret x86_mmx %2
    682 }
    683 declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
    684 
    685 define x86_mmx @stack_fold_punpckldq(x86_mmx %a, x86_mmx %b) {
    686   ;CHECK-LABEL: stack_fold_punpckldq
    687   ;CHECK:       punpckldq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    688   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    689   %2 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %a, x86_mmx %b) nounwind readnone
    690   ret x86_mmx %2
    691 }
    692 declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
    693 
    694 define x86_mmx @stack_fold_punpcklwd(x86_mmx %a, x86_mmx %b) {
    695   ;CHECK-LABEL: stack_fold_punpcklwd
    696   ;CHECK:       punpcklwd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    697   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    698   %2 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %a, x86_mmx %b) nounwind readnone
    699   ret x86_mmx %2
    700 }
    701 declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
    702 
    703 define x86_mmx @stack_fold_pxor(x86_mmx %a, x86_mmx %b) {
    704   ;CHECK-LABEL: stack_fold_pxor
    705   ;CHECK:       pxor {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
    706   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
    707   %2 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %a, x86_mmx %b) nounwind readnone
    708   ret x86_mmx %2
    709 }
    710 declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
    711