Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
      9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
     10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
     11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
     12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
     13 
     14 define i64 @test_cvtpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize {
     15 ; GENERIC-LABEL: test_cvtpd2pi:
     16 ; GENERIC:       # %bb.0:
     17 ; GENERIC-NEXT:    cvtpd2pi (%rdi), %mm0 # sched: [10:1.00]
     18 ; GENERIC-NEXT:    cvtpd2pi %xmm0, %mm1 # sched: [4:1.00]
     19 ; GENERIC-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
     20 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
     21 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     22 ;
     23 ; ATOM-LABEL: test_cvtpd2pi:
     24 ; ATOM:       # %bb.0:
     25 ; ATOM-NEXT:    cvtpd2pi (%rdi), %mm0 # sched: [8:4.00]
     26 ; ATOM-NEXT:    cvtpd2pi %xmm0, %mm1 # sched: [7:3.50]
     27 ; ATOM-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
     28 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
     29 ; ATOM-NEXT:    retq # sched: [79:39.50]
     30 ;
     31 ; SLM-LABEL: test_cvtpd2pi:
     32 ; SLM:       # %bb.0:
     33 ; SLM-NEXT:    cvtpd2pi (%rdi), %mm1 # sched: [7:1.00]
     34 ; SLM-NEXT:    cvtpd2pi %xmm0, %mm0 # sched: [4:0.50]
     35 ; SLM-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
     36 ; SLM-NEXT:    movq %mm1, %rax # sched: [1:0.50]
     37 ; SLM-NEXT:    retq # sched: [4:1.00]
     38 ;
     39 ; SANDY-LABEL: test_cvtpd2pi:
     40 ; SANDY:       # %bb.0:
     41 ; SANDY-NEXT:    cvtpd2pi (%rdi), %mm0 # sched: [10:1.00]
     42 ; SANDY-NEXT:    cvtpd2pi %xmm0, %mm1 # sched: [4:1.00]
     43 ; SANDY-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
     44 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
     45 ; SANDY-NEXT:    retq # sched: [1:1.00]
     46 ;
     47 ; HASWELL-LABEL: test_cvtpd2pi:
     48 ; HASWELL:       # %bb.0:
     49 ; HASWELL-NEXT:    cvtpd2pi (%rdi), %mm0 # sched: [10:1.00]
     50 ; HASWELL-NEXT:    cvtpd2pi %xmm0, %mm1 # sched: [4:1.00]
     51 ; HASWELL-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
     52 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
     53 ; HASWELL-NEXT:    retq # sched: [7:1.00]
     54 ;
     55 ; BROADWELL-LABEL: test_cvtpd2pi:
     56 ; BROADWELL:       # %bb.0:
     57 ; BROADWELL-NEXT:    cvtpd2pi %xmm0, %mm0 # sched: [4:1.00]
     58 ; BROADWELL-NEXT:    cvtpd2pi (%rdi), %mm1 # sched: [9:1.00]
     59 ; BROADWELL-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
     60 ; BROADWELL-NEXT:    movq %mm1, %rax # sched: [1:1.00]
     61 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
     62 ;
     63 ; SKYLAKE-LABEL: test_cvtpd2pi:
     64 ; SKYLAKE:       # %bb.0:
     65 ; SKYLAKE-NEXT:    cvtpd2pi %xmm0, %mm0 # sched: [5:1.00]
     66 ; SKYLAKE-NEXT:    cvtpd2pi (%rdi), %mm1 # sched: [11:1.00]
     67 ; SKYLAKE-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
     68 ; SKYLAKE-NEXT:    movq %mm1, %rax # sched: [2:1.00]
     69 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
     70 ;
     71 ; SKX-LABEL: test_cvtpd2pi:
     72 ; SKX:       # %bb.0:
     73 ; SKX-NEXT:    cvtpd2pi %xmm0, %mm0 # sched: [5:1.00]
     74 ; SKX-NEXT:    cvtpd2pi (%rdi), %mm1 # sched: [11:1.00]
     75 ; SKX-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
     76 ; SKX-NEXT:    movq %mm1, %rax # sched: [2:1.00]
     77 ; SKX-NEXT:    retq # sched: [7:1.00]
     78 ;
     79 ; BTVER2-LABEL: test_cvtpd2pi:
     80 ; BTVER2:       # %bb.0:
     81 ; BTVER2-NEXT:    cvtpd2pi (%rdi), %mm1 # sched: [8:1.00]
     82 ; BTVER2-NEXT:    cvtpd2pi %xmm0, %mm0 # sched: [3:1.00]
     83 ; BTVER2-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
     84 ; BTVER2-NEXT:    movq %mm1, %rax # sched: [4:1.00]
     85 ; BTVER2-NEXT:    retq # sched: [4:1.00]
     86 ;
     87 ; ZNVER1-LABEL: test_cvtpd2pi:
     88 ; ZNVER1:       # %bb.0:
     89 ; ZNVER1-NEXT:    cvtpd2pi (%rdi), %mm1 # sched: [12:1.00]
     90 ; ZNVER1-NEXT:    cvtpd2pi %xmm0, %mm0 # sched: [4:1.00]
     91 ; ZNVER1-NEXT:    por %mm0, %mm1 # sched: [1:0.25]
     92 ; ZNVER1-NEXT:    movq %mm1, %rax # sched: [2:1.00]
     93 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
     94   %1 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a0)
     95   %2 = load <2 x double>, <2 x double> *%a1, align 16
     96   %3 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %2)
     97   %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3)
     98   %5 = bitcast x86_mmx %4 to i64
     99   ret i64 %5
    100 }
    101 declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
    102 
    103 define <2 x double> @test_cvtpi2pd(x86_mmx %a0, x86_mmx* %a1) optsize {
    104 ; GENERIC-LABEL: test_cvtpi2pd:
    105 ; GENERIC:       # %bb.0:
    106 ; GENERIC-NEXT:    cvtpi2pd %mm0, %xmm1 # sched: [4:1.00]
    107 ; GENERIC-NEXT:    cvtpi2pd (%rdi), %xmm0 # sched: [10:1.00]
    108 ; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
    109 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    110 ;
    111 ; ATOM-LABEL: test_cvtpi2pd:
    112 ; ATOM:       # %bb.0:
    113 ; ATOM-NEXT:    cvtpi2pd (%rdi), %xmm0 # sched: [8:4.00]
    114 ; ATOM-NEXT:    cvtpi2pd %mm0, %xmm1 # sched: [7:3.50]
    115 ; ATOM-NEXT:    addpd %xmm1, %xmm0 # sched: [6:3.00]
    116 ; ATOM-NEXT:    retq # sched: [79:39.50]
    117 ;
    118 ; SLM-LABEL: test_cvtpi2pd:
    119 ; SLM:       # %bb.0:
    120 ; SLM-NEXT:    cvtpi2pd (%rdi), %xmm0 # sched: [7:1.00]
    121 ; SLM-NEXT:    cvtpi2pd %mm0, %xmm1 # sched: [4:0.50]
    122 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
    123 ; SLM-NEXT:    retq # sched: [4:1.00]
    124 ;
    125 ; SANDY-LABEL: test_cvtpi2pd:
    126 ; SANDY:       # %bb.0:
    127 ; SANDY-NEXT:    cvtpi2pd %mm0, %xmm0 # sched: [4:1.00]
    128 ; SANDY-NEXT:    cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00]
    129 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    130 ; SANDY-NEXT:    retq # sched: [1:1.00]
    131 ;
    132 ; HASWELL-LABEL: test_cvtpi2pd:
    133 ; HASWELL:       # %bb.0:
    134 ; HASWELL-NEXT:    cvtpi2pd %mm0, %xmm0 # sched: [4:1.00]
    135 ; HASWELL-NEXT:    cvtpi2pd (%rdi), %xmm1 # sched: [9:1.00]
    136 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    137 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    138 ;
    139 ; BROADWELL-LABEL: test_cvtpi2pd:
    140 ; BROADWELL:       # %bb.0:
    141 ; BROADWELL-NEXT:    cvtpi2pd (%rdi), %xmm0 # sched: [9:1.00]
    142 ; BROADWELL-NEXT:    cvtpi2pd %mm0, %xmm1 # sched: [4:1.00]
    143 ; BROADWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
    144 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    145 ;
    146 ; SKYLAKE-LABEL: test_cvtpi2pd:
    147 ; SKYLAKE:       # %bb.0:
    148 ; SKYLAKE-NEXT:    cvtpi2pd %mm0, %xmm0 # sched: [5:1.00]
    149 ; SKYLAKE-NEXT:    cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00]
    150 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
    151 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    152 ;
    153 ; SKX-LABEL: test_cvtpi2pd:
    154 ; SKX:       # %bb.0:
    155 ; SKX-NEXT:    cvtpi2pd %mm0, %xmm0 # sched: [4:0.50]
    156 ; SKX-NEXT:    cvtpi2pd (%rdi), %xmm1 # sched: [9:0.50]
    157 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
    158 ; SKX-NEXT:    retq # sched: [7:1.00]
    159 ;
    160 ; BTVER2-LABEL: test_cvtpi2pd:
    161 ; BTVER2:       # %bb.0:
    162 ; BTVER2-NEXT:    cvtpi2pd (%rdi), %xmm1 # sched: [8:1.00]
    163 ; BTVER2-NEXT:    cvtpi2pd %mm0, %xmm0 # sched: [3:1.00]
    164 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    165 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    166 ;
    167 ; ZNVER1-LABEL: test_cvtpi2pd:
    168 ; ZNVER1:       # %bb.0:
    169 ; ZNVER1-NEXT:    cvtpi2pd (%rdi), %xmm1 # sched: [12:1.00]
    170 ; ZNVER1-NEXT:    cvtpi2pd %mm0, %xmm0 # sched: [3:1.00]
    171 ; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    172 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    173   %1 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %a0)
    174   %2 = load x86_mmx, x86_mmx *%a1, align 8
    175   %3 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %2)
    176   %4 = fadd <2 x double> %1, %3
    177   ret <2 x double> %4
    178 }
    179 declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
    180 
    181 define <4 x float> @test_cvtpi2ps(x86_mmx %a0, x86_mmx* %a1, <4 x float> %a2, <4 x float> %a3) optsize {
    182 ; GENERIC-LABEL: test_cvtpi2ps:
    183 ; GENERIC:       # %bb.0:
    184 ; GENERIC-NEXT:    cvtpi2ps %mm0, %xmm0 # sched: [3:1.00]
    185 ; GENERIC-NEXT:    cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00]
    186 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
    187 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    188 ;
    189 ; ATOM-LABEL: test_cvtpi2ps:
    190 ; ATOM:       # %bb.0:
    191 ; ATOM-NEXT:    cvtpi2ps %mm0, %xmm0 # sched: [5:5.00]
    192 ; ATOM-NEXT:    cvtpi2ps (%rdi), %xmm1 # sched: [5:5.00]
    193 ; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
    194 ; ATOM-NEXT:    retq # sched: [79:39.50]
    195 ;
    196 ; SLM-LABEL: test_cvtpi2ps:
    197 ; SLM:       # %bb.0:
    198 ; SLM-NEXT:    cvtpi2ps (%rdi), %xmm1 # sched: [7:1.00]
    199 ; SLM-NEXT:    cvtpi2ps %mm0, %xmm0 # sched: [4:0.50]
    200 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
    201 ; SLM-NEXT:    retq # sched: [4:1.00]
    202 ;
    203 ; SANDY-LABEL: test_cvtpi2ps:
    204 ; SANDY:       # %bb.0:
    205 ; SANDY-NEXT:    cvtpi2ps %mm0, %xmm0 # sched: [3:1.00]
    206 ; SANDY-NEXT:    cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00]
    207 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    208 ; SANDY-NEXT:    retq # sched: [1:1.00]
    209 ;
    210 ; HASWELL-LABEL: test_cvtpi2ps:
    211 ; HASWELL:       # %bb.0:
    212 ; HASWELL-NEXT:    cvtpi2ps %mm0, %xmm0 # sched: [3:1.00]
    213 ; HASWELL-NEXT:    cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00]
    214 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    215 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    216 ;
    217 ; BROADWELL-LABEL: test_cvtpi2ps:
    218 ; BROADWELL:       # %bb.0:
    219 ; BROADWELL-NEXT:    cvtpi2ps %mm0, %xmm0 # sched: [3:1.00]
    220 ; BROADWELL-NEXT:    cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00]
    221 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    222 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    223 ;
    224 ; SKYLAKE-LABEL: test_cvtpi2ps:
    225 ; SKYLAKE:       # %bb.0:
    226 ; SKYLAKE-NEXT:    cvtpi2ps %mm0, %xmm0 # sched: [6:2.00]
    227 ; SKYLAKE-NEXT:    cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00]
    228 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
    229 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    230 ;
    231 ; SKX-LABEL: test_cvtpi2ps:
    232 ; SKX:       # %bb.0:
    233 ; SKX-NEXT:    cvtpi2ps %mm0, %xmm0 # sched: [6:2.00]
    234 ; SKX-NEXT:    cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00]
    235 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
    236 ; SKX-NEXT:    retq # sched: [7:1.00]
    237 ;
    238 ; BTVER2-LABEL: test_cvtpi2ps:
    239 ; BTVER2:       # %bb.0:
    240 ; BTVER2-NEXT:    cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00]
    241 ; BTVER2-NEXT:    cvtpi2ps %mm0, %xmm0 # sched: [3:1.00]
    242 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    243 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    244 ;
    245 ; ZNVER1-LABEL: test_cvtpi2ps:
    246 ; ZNVER1:       # %bb.0:
    247 ; ZNVER1-NEXT:    cvtpi2ps (%rdi), %xmm1 # sched: [12:1.00]
    248 ; ZNVER1-NEXT:    cvtpi2ps %mm0, %xmm0 # sched: [5:1.00]
    249 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    250 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    251   %1 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a2, x86_mmx %a0)
    252   %2 = load x86_mmx, x86_mmx *%a1, align 8
    253   %3 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a3, x86_mmx %2)
    254   %4 = fadd <4 x float> %1, %3
    255   ret <4 x float> %4
    256 }
    257 declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
    258 
    259 define i64 @test_cvtps2pi(<4 x float> %a0, <4 x float>* %a1) optsize {
    260 ; GENERIC-LABEL: test_cvtps2pi:
    261 ; GENERIC:       # %bb.0:
    262 ; GENERIC-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [3:1.00]
    263 ; GENERIC-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [9:1.00]
    264 ; GENERIC-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
    265 ; GENERIC-NEXT:    movq %mm1, %rax # sched: [2:1.00]
    266 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    267 ;
    268 ; ATOM-LABEL: test_cvtps2pi:
    269 ; ATOM:       # %bb.0:
    270 ; ATOM-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [5:5.00]
    271 ; ATOM-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [5:5.00]
    272 ; ATOM-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
    273 ; ATOM-NEXT:    movq %mm1, %rax # sched: [3:3.00]
    274 ; ATOM-NEXT:    retq # sched: [79:39.50]
    275 ;
    276 ; SLM-LABEL: test_cvtps2pi:
    277 ; SLM:       # %bb.0:
    278 ; SLM-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [7:1.00]
    279 ; SLM-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [4:0.50]
    280 ; SLM-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
    281 ; SLM-NEXT:    movq %mm1, %rax # sched: [1:0.50]
    282 ; SLM-NEXT:    retq # sched: [4:1.00]
    283 ;
    284 ; SANDY-LABEL: test_cvtps2pi:
    285 ; SANDY:       # %bb.0:
    286 ; SANDY-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [3:1.00]
    287 ; SANDY-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [9:1.00]
    288 ; SANDY-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
    289 ; SANDY-NEXT:    movq %mm1, %rax # sched: [2:1.00]
    290 ; SANDY-NEXT:    retq # sched: [1:1.00]
    291 ;
    292 ; HASWELL-LABEL: test_cvtps2pi:
    293 ; HASWELL:       # %bb.0:
    294 ; HASWELL-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [4:1.00]
    295 ; HASWELL-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [8:1.00]
    296 ; HASWELL-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
    297 ; HASWELL-NEXT:    movq %mm1, %rax # sched: [1:1.00]
    298 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    299 ;
    300 ; BROADWELL-LABEL: test_cvtps2pi:
    301 ; BROADWELL:       # %bb.0:
    302 ; BROADWELL-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [4:1.00]
    303 ; BROADWELL-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [8:1.00]
    304 ; BROADWELL-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
    305 ; BROADWELL-NEXT:    movq %mm1, %rax # sched: [1:1.00]
    306 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    307 ;
    308 ; SKYLAKE-LABEL: test_cvtps2pi:
    309 ; SKYLAKE:       # %bb.0:
    310 ; SKYLAKE-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [5:1.00]
    311 ; SKYLAKE-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [9:0.50]
    312 ; SKYLAKE-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
    313 ; SKYLAKE-NEXT:    movq %mm1, %rax # sched: [2:1.00]
    314 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    315 ;
    316 ; SKX-LABEL: test_cvtps2pi:
    317 ; SKX:       # %bb.0:
    318 ; SKX-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [5:1.00]
    319 ; SKX-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [9:0.50]
    320 ; SKX-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
    321 ; SKX-NEXT:    movq %mm1, %rax # sched: [2:1.00]
    322 ; SKX-NEXT:    retq # sched: [7:1.00]
    323 ;
    324 ; BTVER2-LABEL: test_cvtps2pi:
    325 ; BTVER2:       # %bb.0:
    326 ; BTVER2-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [8:1.00]
    327 ; BTVER2-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [3:1.00]
    328 ; BTVER2-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
    329 ; BTVER2-NEXT:    movq %mm1, %rax # sched: [4:1.00]
    330 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    331 ;
    332 ; ZNVER1-LABEL: test_cvtps2pi:
    333 ; ZNVER1:       # %bb.0:
    334 ; ZNVER1-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [12:1.00]
    335 ; ZNVER1-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [4:1.00]
    336 ; ZNVER1-NEXT:    por %mm0, %mm1 # sched: [1:0.25]
    337 ; ZNVER1-NEXT:    movq %mm1, %rax # sched: [2:1.00]
    338 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    339   %1 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0)
    340   %2 = load <4 x float>, <4 x float> *%a1, align 16
    341   %3 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %2)
    342   %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3)
    343   %5 = bitcast x86_mmx %4 to i64
    344   ret i64 %5
    345 }
    346 declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone
    347 
    348 define i64 @test_cvttpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize {
    349 ; GENERIC-LABEL: test_cvttpd2pi:
    350 ; GENERIC:       # %bb.0:
    351 ; GENERIC-NEXT:    cvttpd2pi (%rdi), %mm0 # sched: [10:1.00]
    352 ; GENERIC-NEXT:    cvttpd2pi %xmm0, %mm1 # sched: [4:1.00]
    353 ; GENERIC-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
    354 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
    355 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    356 ;
    357 ; ATOM-LABEL: test_cvttpd2pi:
    358 ; ATOM:       # %bb.0:
    359 ; ATOM-NEXT:    cvttpd2pi (%rdi), %mm0 # sched: [8:4.00]
    360 ; ATOM-NEXT:    cvttpd2pi %xmm0, %mm1 # sched: [7:3.50]
    361 ; ATOM-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
    362 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
    363 ; ATOM-NEXT:    retq # sched: [79:39.50]
    364 ;
    365 ; SLM-LABEL: test_cvttpd2pi:
    366 ; SLM:       # %bb.0:
    367 ; SLM-NEXT:    cvttpd2pi (%rdi), %mm1 # sched: [7:1.00]
    368 ; SLM-NEXT:    cvttpd2pi %xmm0, %mm0 # sched: [4:0.50]
    369 ; SLM-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
    370 ; SLM-NEXT:    movq %mm1, %rax # sched: [1:0.50]
    371 ; SLM-NEXT:    retq # sched: [4:1.00]
    372 ;
    373 ; SANDY-LABEL: test_cvttpd2pi:
    374 ; SANDY:       # %bb.0:
    375 ; SANDY-NEXT:    cvttpd2pi (%rdi), %mm0 # sched: [10:1.00]
    376 ; SANDY-NEXT:    cvttpd2pi %xmm0, %mm1 # sched: [4:1.00]
    377 ; SANDY-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
    378 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
    379 ; SANDY-NEXT:    retq # sched: [1:1.00]
    380 ;
    381 ; HASWELL-LABEL: test_cvttpd2pi:
    382 ; HASWELL:       # %bb.0:
    383 ; HASWELL-NEXT:    cvttpd2pi (%rdi), %mm0 # sched: [10:1.00]
    384 ; HASWELL-NEXT:    cvttpd2pi %xmm0, %mm1 # sched: [4:1.00]
    385 ; HASWELL-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
    386 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
    387 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    388 ;
    389 ; BROADWELL-LABEL: test_cvttpd2pi:
    390 ; BROADWELL:       # %bb.0:
    391 ; BROADWELL-NEXT:    cvttpd2pi %xmm0, %mm0 # sched: [4:1.00]
    392 ; BROADWELL-NEXT:    cvttpd2pi (%rdi), %mm1 # sched: [9:1.00]
    393 ; BROADWELL-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
    394 ; BROADWELL-NEXT:    movq %mm1, %rax # sched: [1:1.00]
    395 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    396 ;
    397 ; SKYLAKE-LABEL: test_cvttpd2pi:
    398 ; SKYLAKE:       # %bb.0:
    399 ; SKYLAKE-NEXT:    cvttpd2pi %xmm0, %mm0 # sched: [5:1.00]
    400 ; SKYLAKE-NEXT:    cvttpd2pi (%rdi), %mm1 # sched: [11:1.00]
    401 ; SKYLAKE-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
    402 ; SKYLAKE-NEXT:    movq %mm1, %rax # sched: [2:1.00]
    403 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    404 ;
    405 ; SKX-LABEL: test_cvttpd2pi:
    406 ; SKX:       # %bb.0:
    407 ; SKX-NEXT:    cvttpd2pi %xmm0, %mm0 # sched: [5:1.00]
    408 ; SKX-NEXT:    cvttpd2pi (%rdi), %mm1 # sched: [11:1.00]
    409 ; SKX-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
    410 ; SKX-NEXT:    movq %mm1, %rax # sched: [2:1.00]
    411 ; SKX-NEXT:    retq # sched: [7:1.00]
    412 ;
    413 ; BTVER2-LABEL: test_cvttpd2pi:
    414 ; BTVER2:       # %bb.0:
    415 ; BTVER2-NEXT:    cvttpd2pi (%rdi), %mm1 # sched: [8:1.00]
    416 ; BTVER2-NEXT:    cvttpd2pi %xmm0, %mm0 # sched: [3:1.00]
    417 ; BTVER2-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
    418 ; BTVER2-NEXT:    movq %mm1, %rax # sched: [4:1.00]
    419 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    420 ;
    421 ; ZNVER1-LABEL: test_cvttpd2pi:
    422 ; ZNVER1:       # %bb.0:
    423 ; ZNVER1-NEXT:    cvttpd2pi (%rdi), %mm1 # sched: [12:1.00]
    424 ; ZNVER1-NEXT:    cvttpd2pi %xmm0, %mm0 # sched: [4:1.00]
    425 ; ZNVER1-NEXT:    por %mm0, %mm1 # sched: [1:0.25]
    426 ; ZNVER1-NEXT:    movq %mm1, %rax # sched: [2:1.00]
    427 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    428   %1 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a0)
    429   %2 = load <2 x double>, <2 x double> *%a1, align 16
    430   %3 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %2)
    431   %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3)
    432   %5 = bitcast x86_mmx %4 to i64
    433   ret i64 %5
    434 }
    435 declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
    436 
    437 define i64 @test_cvttps2pi(<4 x float> %a0, <4 x float>* %a1) optsize {
    438 ; GENERIC-LABEL: test_cvttps2pi:
    439 ; GENERIC:       # %bb.0:
    440 ; GENERIC-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [3:1.00]
    441 ; GENERIC-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [9:1.00]
    442 ; GENERIC-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
    443 ; GENERIC-NEXT:    movq %mm1, %rax # sched: [2:1.00]
    444 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    445 ;
    446 ; ATOM-LABEL: test_cvttps2pi:
    447 ; ATOM:       # %bb.0:
    448 ; ATOM-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [5:5.00]
    449 ; ATOM-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [5:5.00]
    450 ; ATOM-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
    451 ; ATOM-NEXT:    movq %mm1, %rax # sched: [3:3.00]
    452 ; ATOM-NEXT:    retq # sched: [79:39.50]
    453 ;
    454 ; SLM-LABEL: test_cvttps2pi:
    455 ; SLM:       # %bb.0:
    456 ; SLM-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [7:1.00]
    457 ; SLM-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [4:0.50]
    458 ; SLM-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
    459 ; SLM-NEXT:    movq %mm1, %rax # sched: [1:0.50]
    460 ; SLM-NEXT:    retq # sched: [4:1.00]
    461 ;
    462 ; SANDY-LABEL: test_cvttps2pi:
    463 ; SANDY:       # %bb.0:
    464 ; SANDY-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [3:1.00]
    465 ; SANDY-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [9:1.00]
    466 ; SANDY-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
    467 ; SANDY-NEXT:    movq %mm1, %rax # sched: [2:1.00]
    468 ; SANDY-NEXT:    retq # sched: [1:1.00]
    469 ;
    470 ; HASWELL-LABEL: test_cvttps2pi:
    471 ; HASWELL:       # %bb.0:
    472 ; HASWELL-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [4:1.00]
    473 ; HASWELL-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [8:1.00]
    474 ; HASWELL-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
    475 ; HASWELL-NEXT:    movq %mm1, %rax # sched: [1:1.00]
    476 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    477 ;
    478 ; BROADWELL-LABEL: test_cvttps2pi:
    479 ; BROADWELL:       # %bb.0:
    480 ; BROADWELL-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [4:1.00]
    481 ; BROADWELL-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [8:1.00]
    482 ; BROADWELL-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
    483 ; BROADWELL-NEXT:    movq %mm1, %rax # sched: [1:1.00]
    484 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    485 ;
    486 ; SKYLAKE-LABEL: test_cvttps2pi:
    487 ; SKYLAKE:       # %bb.0:
    488 ; SKYLAKE-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [5:1.00]
    489 ; SKYLAKE-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [9:0.50]
    490 ; SKYLAKE-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
    491 ; SKYLAKE-NEXT:    movq %mm1, %rax # sched: [2:1.00]
    492 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    493 ;
    494 ; SKX-LABEL: test_cvttps2pi:
    495 ; SKX:       # %bb.0:
    496 ; SKX-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [5:1.00]
    497 ; SKX-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [9:0.50]
    498 ; SKX-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
    499 ; SKX-NEXT:    movq %mm1, %rax # sched: [2:1.00]
    500 ; SKX-NEXT:    retq # sched: [7:1.00]
    501 ;
    502 ; BTVER2-LABEL: test_cvttps2pi:
    503 ; BTVER2:       # %bb.0:
    504 ; BTVER2-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [8:1.00]
    505 ; BTVER2-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [3:1.00]
    506 ; BTVER2-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
    507 ; BTVER2-NEXT:    movq %mm1, %rax # sched: [4:1.00]
    508 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    509 ;
    510 ; ZNVER1-LABEL: test_cvttps2pi:
    511 ; ZNVER1:       # %bb.0:
    512 ; ZNVER1-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [12:1.00]
    513 ; ZNVER1-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [4:1.00]
    514 ; ZNVER1-NEXT:    por %mm0, %mm1 # sched: [1:0.25]
    515 ; ZNVER1-NEXT:    movq %mm1, %rax # sched: [2:1.00]
    516 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    517   %1 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %a0)
    518   %2 = load <4 x float>, <4 x float> *%a1, align 16
    519   %3 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %2)
    520   %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3)
    521   %5 = bitcast x86_mmx %4 to i64
    522   ret i64 %5
    523 }
    524 declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone
    525 
    526 define void @test_emms() optsize {
    527 ; GENERIC-LABEL: test_emms:
    528 ; GENERIC:       # %bb.0:
    529 ; GENERIC-NEXT:    emms # sched: [31:10.33]
    530 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    531 ;
    532 ; ATOM-LABEL: test_emms:
    533 ; ATOM:       # %bb.0:
    534 ; ATOM-NEXT:    emms # sched: [5:2.50]
    535 ; ATOM-NEXT:    retq # sched: [79:39.50]
    536 ;
    537 ; SLM-LABEL: test_emms:
    538 ; SLM:       # %bb.0:
    539 ; SLM-NEXT:    emms # sched: [10:5.00]
    540 ; SLM-NEXT:    retq # sched: [4:1.00]
    541 ;
    542 ; SANDY-LABEL: test_emms:
    543 ; SANDY:       # %bb.0:
    544 ; SANDY-NEXT:    emms # sched: [31:10.33]
    545 ; SANDY-NEXT:    retq # sched: [1:1.00]
    546 ;
    547 ; HASWELL-LABEL: test_emms:
    548 ; HASWELL:       # %bb.0:
    549 ; HASWELL-NEXT:    emms # sched: [31:10.00]
    550 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    551 ;
    552 ; BROADWELL-LABEL: test_emms:
    553 ; BROADWELL:       # %bb.0:
    554 ; BROADWELL-NEXT:    emms # sched: [31:10.00]
    555 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    556 ;
    557 ; SKYLAKE-LABEL: test_emms:
    558 ; SKYLAKE:       # %bb.0:
    559 ; SKYLAKE-NEXT:    emms # sched: [10:4.50]
    560 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    561 ;
    562 ; SKX-LABEL: test_emms:
    563 ; SKX:       # %bb.0:
    564 ; SKX-NEXT:    emms # sched: [10:4.50]
    565 ; SKX-NEXT:    retq # sched: [7:1.00]
    566 ;
    567 ; BTVER2-LABEL: test_emms:
    568 ; BTVER2:       # %bb.0:
    569 ; BTVER2-NEXT:    emms # sched: [2:0.50]
    570 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    571 ;
    572 ; ZNVER1-LABEL: test_emms:
    573 ; ZNVER1:       # %bb.0:
    574 ; ZNVER1-NEXT:    emms # sched: [2:0.25]
    575 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    576   call void @llvm.x86.mmx.emms()
    577   ret void
    578 }
    579 declare void @llvm.x86.mmx.emms()
    580 
    581 define void @test_maskmovq(x86_mmx %a0, x86_mmx %a1, i8* %a2) optsize {
    582 ; GENERIC-LABEL: test_maskmovq:
    583 ; GENERIC:       # %bb.0:
    584 ; GENERIC-NEXT:    maskmovq %mm1, %mm0 # sched: [1:1.00]
    585 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    586 ;
    587 ; ATOM-LABEL: test_maskmovq:
    588 ; ATOM:       # %bb.0:
    589 ; ATOM-NEXT:    maskmovq %mm1, %mm0 # sched: [1:1.00]
    590 ; ATOM-NEXT:    retq # sched: [79:39.50]
    591 ;
    592 ; SLM-LABEL: test_maskmovq:
    593 ; SLM:       # %bb.0:
    594 ; SLM-NEXT:    maskmovq %mm1, %mm0 # sched: [1:1.00]
    595 ; SLM-NEXT:    retq # sched: [4:1.00]
    596 ;
    597 ; SANDY-LABEL: test_maskmovq:
    598 ; SANDY:       # %bb.0:
    599 ; SANDY-NEXT:    maskmovq %mm1, %mm0 # sched: [1:1.00]
    600 ; SANDY-NEXT:    retq # sched: [1:1.00]
    601 ;
    602 ; HASWELL-LABEL: test_maskmovq:
    603 ; HASWELL:       # %bb.0:
    604 ; HASWELL-NEXT:    maskmovq %mm1, %mm0 # sched: [1:1.00]
    605 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    606 ;
    607 ; BROADWELL-LABEL: test_maskmovq:
    608 ; BROADWELL:       # %bb.0:
    609 ; BROADWELL-NEXT:    maskmovq %mm1, %mm0 # sched: [1:1.00]
    610 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    611 ;
    612 ; SKYLAKE-LABEL: test_maskmovq:
    613 ; SKYLAKE:       # %bb.0:
    614 ; SKYLAKE-NEXT:    maskmovq %mm1, %mm0 # sched: [1:1.00]
    615 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    616 ;
    617 ; SKX-LABEL: test_maskmovq:
    618 ; SKX:       # %bb.0:
    619 ; SKX-NEXT:    maskmovq %mm1, %mm0 # sched: [1:1.00]
    620 ; SKX-NEXT:    retq # sched: [7:1.00]
    621 ;
    622 ; BTVER2-LABEL: test_maskmovq:
    623 ; BTVER2:       # %bb.0:
    624 ; BTVER2-NEXT:    maskmovq %mm1, %mm0 # sched: [1:0.50]
    625 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    626 ;
    627 ; ZNVER1-LABEL: test_maskmovq:
    628 ; ZNVER1:       # %bb.0:
    629 ; ZNVER1-NEXT:    maskmovq %mm1, %mm0 # sched: [100:0.25]
    630 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    631   call void @llvm.x86.mmx.maskmovq(x86_mmx %a0, x86_mmx %a1, i8* %a2)
    632   ret void
    633 }
    634 declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
    635 
    636 define i32 @test_movd(x86_mmx %a0, i32 %a1, i32 *%a2) {
    637 ; GENERIC-LABEL: test_movd:
    638 ; GENERIC:       # %bb.0:
    639 ; GENERIC-NEXT:    movd %edi, %mm1 # sched: [1:1.00]
    640 ; GENERIC-NEXT:    movd (%rsi), %mm2 # sched: [5:0.50]
    641 ; GENERIC-NEXT:    paddd %mm1, %mm2 # sched: [3:1.00]
    642 ; GENERIC-NEXT:    paddd %mm2, %mm0 # sched: [3:1.00]
    643 ; GENERIC-NEXT:    movd %mm2, %ecx # sched: [2:1.00]
    644 ; GENERIC-NEXT:    movd %mm0, %eax # sched: [2:1.00]
    645 ; GENERIC-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
    646 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    647 ;
    648 ; ATOM-LABEL: test_movd:
    649 ; ATOM:       # %bb.0:
    650 ; ATOM-NEXT:    movd %edi, %mm1 # sched: [1:1.00]
    651 ; ATOM-NEXT:    movd (%rsi), %mm2 # sched: [1:1.00]
    652 ; ATOM-NEXT:    paddd %mm1, %mm2 # sched: [1:0.50]
    653 ; ATOM-NEXT:    paddd %mm2, %mm0 # sched: [1:0.50]
    654 ; ATOM-NEXT:    movd %mm2, %ecx # sched: [3:3.00]
    655 ; ATOM-NEXT:    movd %mm0, %eax # sched: [3:3.00]
    656 ; ATOM-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
    657 ; ATOM-NEXT:    retq # sched: [79:39.50]
    658 ;
    659 ; SLM-LABEL: test_movd:
    660 ; SLM:       # %bb.0:
    661 ; SLM-NEXT:    movd (%rsi), %mm2 # sched: [3:1.00]
    662 ; SLM-NEXT:    movd %edi, %mm1 # sched: [1:0.50]
    663 ; SLM-NEXT:    paddd %mm1, %mm2 # sched: [1:0.50]
    664 ; SLM-NEXT:    paddd %mm2, %mm0 # sched: [1:0.50]
    665 ; SLM-NEXT:    movd %mm2, %ecx # sched: [1:0.50]
    666 ; SLM-NEXT:    movd %mm0, %eax # sched: [1:0.50]
    667 ; SLM-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
    668 ; SLM-NEXT:    retq # sched: [4:1.00]
    669 ;
    670 ; SANDY-LABEL: test_movd:
    671 ; SANDY:       # %bb.0:
    672 ; SANDY-NEXT:    movd %edi, %mm1 # sched: [1:1.00]
    673 ; SANDY-NEXT:    movd (%rsi), %mm2 # sched: [5:0.50]
    674 ; SANDY-NEXT:    paddd %mm1, %mm2 # sched: [3:1.00]
    675 ; SANDY-NEXT:    paddd %mm2, %mm0 # sched: [3:1.00]
    676 ; SANDY-NEXT:    movd %mm2, %ecx # sched: [2:1.00]
    677 ; SANDY-NEXT:    movd %mm0, %eax # sched: [2:1.00]
    678 ; SANDY-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
    679 ; SANDY-NEXT:    retq # sched: [1:1.00]
    680 ;
    681 ; HASWELL-LABEL: test_movd:
    682 ; HASWELL:       # %bb.0:
    683 ; HASWELL-NEXT:    movd %edi, %mm1 # sched: [1:1.00]
    684 ; HASWELL-NEXT:    movd (%rsi), %mm2 # sched: [5:0.50]
    685 ; HASWELL-NEXT:    paddd %mm1, %mm2 # sched: [1:0.50]
    686 ; HASWELL-NEXT:    paddd %mm2, %mm0 # sched: [1:0.50]
    687 ; HASWELL-NEXT:    movd %mm2, %ecx # sched: [1:1.00]
    688 ; HASWELL-NEXT:    movd %mm0, %eax # sched: [1:1.00]
    689 ; HASWELL-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
    690 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    691 ;
    692 ; BROADWELL-LABEL: test_movd:
    693 ; BROADWELL:       # %bb.0:
    694 ; BROADWELL-NEXT:    movd %edi, %mm1 # sched: [1:1.00]
    695 ; BROADWELL-NEXT:    movd (%rsi), %mm2 # sched: [5:0.50]
    696 ; BROADWELL-NEXT:    paddd %mm1, %mm2 # sched: [1:0.50]
    697 ; BROADWELL-NEXT:    paddd %mm2, %mm0 # sched: [1:0.50]
    698 ; BROADWELL-NEXT:    movd %mm2, %ecx # sched: [1:1.00]
    699 ; BROADWELL-NEXT:    movd %mm0, %eax # sched: [1:1.00]
    700 ; BROADWELL-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
    701 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    702 ;
    703 ; SKYLAKE-LABEL: test_movd:
    704 ; SKYLAKE:       # %bb.0:
    705 ; SKYLAKE-NEXT:    movd %edi, %mm1 # sched: [1:1.00]
    706 ; SKYLAKE-NEXT:    movd (%rsi), %mm2 # sched: [5:0.50]
    707 ; SKYLAKE-NEXT:    paddd %mm1, %mm2 # sched: [1:0.50]
    708 ; SKYLAKE-NEXT:    paddd %mm2, %mm0 # sched: [1:0.50]
    709 ; SKYLAKE-NEXT:    movd %mm2, %ecx # sched: [2:1.00]
    710 ; SKYLAKE-NEXT:    movd %mm0, %eax # sched: [2:1.00]
    711 ; SKYLAKE-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
    712 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    713 ;
    714 ; SKX-LABEL: test_movd:
    715 ; SKX:       # %bb.0:
    716 ; SKX-NEXT:    movd %edi, %mm1 # sched: [1:1.00]
    717 ; SKX-NEXT:    movd (%rsi), %mm2 # sched: [5:0.50]
    718 ; SKX-NEXT:    paddd %mm1, %mm2 # sched: [1:0.50]
    719 ; SKX-NEXT:    paddd %mm2, %mm0 # sched: [1:0.50]
    720 ; SKX-NEXT:    movd %mm2, %ecx # sched: [2:1.00]
    721 ; SKX-NEXT:    movd %mm0, %eax # sched: [2:1.00]
    722 ; SKX-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
    723 ; SKX-NEXT:    retq # sched: [7:1.00]
    724 ;
    725 ; BTVER2-LABEL: test_movd:
    726 ; BTVER2:       # %bb.0:
    727 ; BTVER2-NEXT:    movd %edi, %mm1 # sched: [8:0.50]
    728 ; BTVER2-NEXT:    movd (%rsi), %mm2 # sched: [5:1.00]
    729 ; BTVER2-NEXT:    paddd %mm1, %mm2 # sched: [1:0.50]
    730 ; BTVER2-NEXT:    paddd %mm2, %mm0 # sched: [1:0.50]
    731 ; BTVER2-NEXT:    movd %mm2, %ecx # sched: [4:1.00]
    732 ; BTVER2-NEXT:    movd %mm0, %eax # sched: [4:1.00]
    733 ; BTVER2-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
    734 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    735 ;
    736 ; ZNVER1-LABEL: test_movd:
    737 ; ZNVER1:       # %bb.0:
    738 ; ZNVER1-NEXT:    movd (%rsi), %mm2 # sched: [8:0.50]
    739 ; ZNVER1-NEXT:    movd %edi, %mm1 # sched: [3:1.00]
    740 ; ZNVER1-NEXT:    paddd %mm1, %mm2 # sched: [1:0.25]
    741 ; ZNVER1-NEXT:    paddd %mm2, %mm0 # sched: [1:0.25]
    742 ; ZNVER1-NEXT:    movd %mm2, %ecx # sched: [2:1.00]
    743 ; ZNVER1-NEXT:    movd %mm0, %eax # sched: [2:1.00]
    744 ; ZNVER1-NEXT:    movl %ecx, (%rsi) # sched: [1:0.50]
    745 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    746   %1  = insertelement <2 x i32> undef, i32 %a1, i32 0
    747   %2  = bitcast <2 x i32> %1 to x86_mmx
    748   %3  = load i32, i32 *%a2
    749   %4  = insertelement <2 x i32> undef, i32 %3, i32 0
    750   %5  = bitcast <2 x i32> %4 to x86_mmx
    751   %6  = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %5)
    752   %7  = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a0, x86_mmx %6)
    753   %8  = bitcast x86_mmx %6 to <2 x i32>
    754   %9  = bitcast x86_mmx %7 to <2 x i32>
    755   %10 = extractelement <2 x i32> %8, i32 0
    756   %11 = extractelement <2 x i32> %9, i32 0
    757   store i32 %10, i32* %a2
    758   ret i32 %11
    759 }
    760 
    761 define i64 @test_movdq2q(<2 x i64> %a0) optsize {
    762 ; GENERIC-LABEL: test_movdq2q:
    763 ; GENERIC:       # %bb.0:
    764 ; GENERIC-NEXT:    movdq2q %xmm0, %mm0 # sched: [2:1.00]
    765 ; GENERIC-NEXT:    paddd %mm0, %mm0 # sched: [3:1.00]
    766 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
    767 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    768 ;
    769 ; ATOM-LABEL: test_movdq2q:
    770 ; ATOM:       # %bb.0:
    771 ; ATOM-NEXT:    movdq2q %xmm0, %mm0 # sched: [1:0.50]
    772 ; ATOM-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
    773 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
    774 ; ATOM-NEXT:    retq # sched: [79:39.50]
    775 ;
    776 ; SLM-LABEL: test_movdq2q:
    777 ; SLM:       # %bb.0:
    778 ; SLM-NEXT:    movdq2q %xmm0, %mm0 # sched: [1:0.50]
    779 ; SLM-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
    780 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
    781 ; SLM-NEXT:    retq # sched: [4:1.00]
    782 ;
    783 ; SANDY-LABEL: test_movdq2q:
    784 ; SANDY:       # %bb.0:
    785 ; SANDY-NEXT:    movdq2q %xmm0, %mm0 # sched: [2:1.00]
    786 ; SANDY-NEXT:    paddd %mm0, %mm0 # sched: [3:1.00]
    787 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
    788 ; SANDY-NEXT:    retq # sched: [1:1.00]
    789 ;
    790 ; HASWELL-LABEL: test_movdq2q:
    791 ; HASWELL:       # %bb.0:
    792 ; HASWELL-NEXT:    movdq2q %xmm0, %mm0 # sched: [2:0.67]
    793 ; HASWELL-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
    794 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
    795 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    796 ;
    797 ; BROADWELL-LABEL: test_movdq2q:
    798 ; BROADWELL:       # %bb.0:
    799 ; BROADWELL-NEXT:    movdq2q %xmm0, %mm0 # sched: [2:0.67]
    800 ; BROADWELL-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
    801 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
    802 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    803 ;
    804 ; SKYLAKE-LABEL: test_movdq2q:
    805 ; SKYLAKE:       # %bb.0:
    806 ; SKYLAKE-NEXT:    movdq2q %xmm0, %mm0 # sched: [2:1.00]
    807 ; SKYLAKE-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
    808 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
    809 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    810 ;
    811 ; SKX-LABEL: test_movdq2q:
    812 ; SKX:       # %bb.0:
    813 ; SKX-NEXT:    movdq2q %xmm0, %mm0 # sched: [2:1.00]
    814 ; SKX-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
    815 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
    816 ; SKX-NEXT:    retq # sched: [7:1.00]
    817 ;
    818 ; BTVER2-LABEL: test_movdq2q:
    819 ; BTVER2:       # %bb.0:
    820 ; BTVER2-NEXT:    movdq2q %xmm0, %mm0 # sched: [1:0.50]
    821 ; BTVER2-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
    822 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
    823 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    824 ;
    825 ; ZNVER1-LABEL: test_movdq2q:
    826 ; ZNVER1:       # %bb.0:
    827 ; ZNVER1-NEXT:    movdq2q %xmm0, %mm0 # sched: [1:0.25]
    828 ; ZNVER1-NEXT:    paddd %mm0, %mm0 # sched: [1:0.25]
    829 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
    830 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    831   %1 = extractelement <2 x i64> %a0, i32 0
    832   %2 = bitcast i64 %1 to x86_mmx
    833   %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %2)
    834   %4 = bitcast x86_mmx %3 to i64
    835   ret i64 %4
    836 }
    837 
    838 define void @test_movntq(x86_mmx* %a0, x86_mmx %a1) optsize {
    839 ; GENERIC-LABEL: test_movntq:
    840 ; GENERIC:       # %bb.0:
    841 ; GENERIC-NEXT:    movntq %mm0, (%rdi) # sched: [1:1.00]
    842 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    843 ;
    844 ; ATOM-LABEL: test_movntq:
    845 ; ATOM:       # %bb.0:
    846 ; ATOM-NEXT:    movntq %mm0, (%rdi) # sched: [1:1.00]
    847 ; ATOM-NEXT:    retq # sched: [79:39.50]
    848 ;
    849 ; SLM-LABEL: test_movntq:
    850 ; SLM:       # %bb.0:
    851 ; SLM-NEXT:    movntq %mm0, (%rdi) # sched: [1:1.00]
    852 ; SLM-NEXT:    retq # sched: [4:1.00]
    853 ;
    854 ; SANDY-LABEL: test_movntq:
    855 ; SANDY:       # %bb.0:
    856 ; SANDY-NEXT:    movntq %mm0, (%rdi) # sched: [1:1.00]
    857 ; SANDY-NEXT:    retq # sched: [1:1.00]
    858 ;
    859 ; HASWELL-LABEL: test_movntq:
    860 ; HASWELL:       # %bb.0:
    861 ; HASWELL-NEXT:    movntq %mm0, (%rdi) # sched: [1:1.00]
    862 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    863 ;
    864 ; BROADWELL-LABEL: test_movntq:
    865 ; BROADWELL:       # %bb.0:
    866 ; BROADWELL-NEXT:    movntq %mm0, (%rdi) # sched: [1:1.00]
    867 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    868 ;
    869 ; SKYLAKE-LABEL: test_movntq:
    870 ; SKYLAKE:       # %bb.0:
    871 ; SKYLAKE-NEXT:    movntq %mm0, (%rdi) # sched: [1:1.00]
    872 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    873 ;
    874 ; SKX-LABEL: test_movntq:
    875 ; SKX:       # %bb.0:
    876 ; SKX-NEXT:    movntq %mm0, (%rdi) # sched: [1:1.00]
    877 ; SKX-NEXT:    retq # sched: [7:1.00]
    878 ;
    879 ; BTVER2-LABEL: test_movntq:
    880 ; BTVER2:       # %bb.0:
    881 ; BTVER2-NEXT:    movntq %mm0, (%rdi) # sched: [2:1.00]
    882 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    883 ;
    884 ; ZNVER1-LABEL: test_movntq:
    885 ; ZNVER1:       # %bb.0:
    886 ; ZNVER1-NEXT:    movntq %mm0, (%rdi) # sched: [1:0.50]
    887 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    888   call void @llvm.x86.mmx.movnt.dq(x86_mmx* %a0, x86_mmx %a1)
    889   ret void
    890 }
    891 declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
    892 
    893 define void @test_movq(i64 *%a0) {
    894 ; GENERIC-LABEL: test_movq:
    895 ; GENERIC:       # %bb.0:
    896 ; GENERIC-NEXT:    movq (%rdi), %mm0 # sched: [5:0.50]
    897 ; GENERIC-NEXT:    paddd %mm0, %mm0 # sched: [3:1.00]
    898 ; GENERIC-NEXT:    movq %mm0, (%rdi) # sched: [1:1.00]
    899 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    900 ;
    901 ; ATOM-LABEL: test_movq:
    902 ; ATOM:       # %bb.0:
    903 ; ATOM-NEXT:    movq (%rdi), %mm0 # sched: [1:1.00]
    904 ; ATOM-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
    905 ; ATOM-NEXT:    movq %mm0, (%rdi) # sched: [1:1.00]
    906 ; ATOM-NEXT:    nop # sched: [1:0.50]
    907 ; ATOM-NEXT:    nop # sched: [1:0.50]
    908 ; ATOM-NEXT:    retq # sched: [79:39.50]
    909 ;
    910 ; SLM-LABEL: test_movq:
    911 ; SLM:       # %bb.0:
    912 ; SLM-NEXT:    movq (%rdi), %mm0 # sched: [3:1.00]
    913 ; SLM-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
    914 ; SLM-NEXT:    movq %mm0, (%rdi) # sched: [1:1.00]
    915 ; SLM-NEXT:    retq # sched: [4:1.00]
    916 ;
    917 ; SANDY-LABEL: test_movq:
    918 ; SANDY:       # %bb.0:
    919 ; SANDY-NEXT:    movq (%rdi), %mm0 # sched: [5:0.50]
    920 ; SANDY-NEXT:    paddd %mm0, %mm0 # sched: [3:1.00]
    921 ; SANDY-NEXT:    movq %mm0, (%rdi) # sched: [1:1.00]
    922 ; SANDY-NEXT:    retq # sched: [1:1.00]
    923 ;
    924 ; HASWELL-LABEL: test_movq:
    925 ; HASWELL:       # %bb.0:
    926 ; HASWELL-NEXT:    movq (%rdi), %mm0 # sched: [5:0.50]
    927 ; HASWELL-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
    928 ; HASWELL-NEXT:    movq %mm0, (%rdi) # sched: [1:1.00]
    929 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    930 ;
    931 ; BROADWELL-LABEL: test_movq:
    932 ; BROADWELL:       # %bb.0:
    933 ; BROADWELL-NEXT:    movq (%rdi), %mm0 # sched: [5:0.50]
    934 ; BROADWELL-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
    935 ; BROADWELL-NEXT:    movq %mm0, (%rdi) # sched: [1:1.00]
    936 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    937 ;
    938 ; SKYLAKE-LABEL: test_movq:
    939 ; SKYLAKE:       # %bb.0:
    940 ; SKYLAKE-NEXT:    movq (%rdi), %mm0 # sched: [5:0.50]
    941 ; SKYLAKE-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
    942 ; SKYLAKE-NEXT:    movq %mm0, (%rdi) # sched: [1:1.00]
    943 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    944 ;
    945 ; SKX-LABEL: test_movq:
    946 ; SKX:       # %bb.0:
    947 ; SKX-NEXT:    movq (%rdi), %mm0 # sched: [5:0.50]
    948 ; SKX-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
    949 ; SKX-NEXT:    movq %mm0, (%rdi) # sched: [1:1.00]
    950 ; SKX-NEXT:    retq # sched: [7:1.00]
    951 ;
    952 ; BTVER2-LABEL: test_movq:
    953 ; BTVER2:       # %bb.0:
    954 ; BTVER2-NEXT:    movq (%rdi), %mm0 # sched: [5:1.00]
    955 ; BTVER2-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
    956 ; BTVER2-NEXT:    movq %mm0, (%rdi) # sched: [2:1.00]
    957 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    958 ;
    959 ; ZNVER1-LABEL: test_movq:
    960 ; ZNVER1:       # %bb.0:
    961 ; ZNVER1-NEXT:    movq (%rdi), %mm0 # sched: [8:0.50]
    962 ; ZNVER1-NEXT:    paddd %mm0, %mm0 # sched: [1:0.25]
    963 ; ZNVER1-NEXT:    movq %mm0, (%rdi) # sched: [1:0.50]
    964 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    965   %1 = load i64, i64* %a0, align 8
    966   %2 = bitcast i64 %1 to x86_mmx
    967   %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %2)
    968   %4 = bitcast x86_mmx %3 to i64
    969   store i64 %4, i64* %a0, align 8
    970   ret void
    971 }
    972 
    973 define <2 x i64> @test_movq2dq(x86_mmx %a0) optsize {
    974 ; GENERIC-LABEL: test_movq2dq:
    975 ; GENERIC:       # %bb.0:
    976 ; GENERIC-NEXT:    movq2dq %mm0, %xmm0 # sched: [1:0.33]
    977 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    978 ;
    979 ; ATOM-LABEL: test_movq2dq:
    980 ; ATOM:       # %bb.0:
    981 ; ATOM-NEXT:    movq2dq %mm0, %xmm0 # sched: [1:0.50]
    982 ; ATOM-NEXT:    retq # sched: [79:39.50]
    983 ;
    984 ; SLM-LABEL: test_movq2dq:
    985 ; SLM:       # %bb.0:
    986 ; SLM-NEXT:    movq2dq %mm0, %xmm0 # sched: [1:0.50]
    987 ; SLM-NEXT:    retq # sched: [4:1.00]
    988 ;
    989 ; SANDY-LABEL: test_movq2dq:
    990 ; SANDY:       # %bb.0:
    991 ; SANDY-NEXT:    movq2dq %mm0, %xmm0 # sched: [1:0.33]
    992 ; SANDY-NEXT:    retq # sched: [1:1.00]
    993 ;
    994 ; HASWELL-LABEL: test_movq2dq:
    995 ; HASWELL:       # %bb.0:
    996 ; HASWELL-NEXT:    movq2dq %mm0, %xmm0 # sched: [1:1.00]
    997 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    998 ;
    999 ; BROADWELL-LABEL: test_movq2dq:
   1000 ; BROADWELL:       # %bb.0:
   1001 ; BROADWELL-NEXT:    movq2dq %mm0, %xmm0 # sched: [1:1.00]
   1002 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1003 ;
   1004 ; SKYLAKE-LABEL: test_movq2dq:
   1005 ; SKYLAKE:       # %bb.0:
   1006 ; SKYLAKE-NEXT:    movq2dq %mm0, %xmm0 # sched: [2:2.00]
   1007 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1008 ;
   1009 ; SKX-LABEL: test_movq2dq:
   1010 ; SKX:       # %bb.0:
   1011 ; SKX-NEXT:    movq2dq %mm0, %xmm0 # sched: [2:2.00]
   1012 ; SKX-NEXT:    retq # sched: [7:1.00]
   1013 ;
   1014 ; BTVER2-LABEL: test_movq2dq:
   1015 ; BTVER2:       # %bb.0:
   1016 ; BTVER2-NEXT:    movq2dq %mm0, %xmm0 # sched: [1:0.50]
   1017 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1018 ;
   1019 ; ZNVER1-LABEL: test_movq2dq:
   1020 ; ZNVER1:       # %bb.0:
   1021 ; ZNVER1-NEXT:    movq2dq %mm0, %xmm0 # sched: [1:0.25]
   1022 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1023   %1 = bitcast x86_mmx %a0 to i64
   1024   %2 = insertelement <2 x i64> undef, i64 %1, i32 0
   1025   ret <2 x i64> %2
   1026 }
   1027 
   1028 define i64 @test_pabsb(x86_mmx *%a0) optsize {
   1029 ; GENERIC-LABEL: test_pabsb:
   1030 ; GENERIC:       # %bb.0:
   1031 ; GENERIC-NEXT:    pabsb (%rdi), %mm0 # sched: [6:0.50]
   1032 ; GENERIC-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
   1033 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1034 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1035 ;
   1036 ; ATOM-LABEL: test_pabsb:
   1037 ; ATOM:       # %bb.0:
   1038 ; ATOM-NEXT:    pabsb (%rdi), %mm0 # sched: [1:1.00]
   1039 ; ATOM-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
   1040 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   1041 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1042 ;
   1043 ; SLM-LABEL: test_pabsb:
   1044 ; SLM:       # %bb.0:
   1045 ; SLM-NEXT:    pabsb (%rdi), %mm0 # sched: [4:1.00]
   1046 ; SLM-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
   1047 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   1048 ; SLM-NEXT:    retq # sched: [4:1.00]
   1049 ;
   1050 ; SANDY-LABEL: test_pabsb:
   1051 ; SANDY:       # %bb.0:
   1052 ; SANDY-NEXT:    pabsb (%rdi), %mm0 # sched: [6:0.50]
   1053 ; SANDY-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
   1054 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1055 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1056 ;
   1057 ; HASWELL-LABEL: test_pabsb:
   1058 ; HASWELL:       # %bb.0:
   1059 ; HASWELL-NEXT:    pabsb (%rdi), %mm0 # sched: [6:0.50]
   1060 ; HASWELL-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
   1061 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1062 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1063 ;
   1064 ; BROADWELL-LABEL: test_pabsb:
   1065 ; BROADWELL:       # %bb.0:
   1066 ; BROADWELL-NEXT:    pabsb (%rdi), %mm0 # sched: [6:0.50]
   1067 ; BROADWELL-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
   1068 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1069 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1070 ;
   1071 ; SKYLAKE-LABEL: test_pabsb:
   1072 ; SKYLAKE:       # %bb.0:
   1073 ; SKYLAKE-NEXT:    pabsb (%rdi), %mm0 # sched: [6:0.50]
   1074 ; SKYLAKE-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
   1075 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1076 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1077 ;
   1078 ; SKX-LABEL: test_pabsb:
   1079 ; SKX:       # %bb.0:
   1080 ; SKX-NEXT:    pabsb (%rdi), %mm0 # sched: [6:0.50]
   1081 ; SKX-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
   1082 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1083 ; SKX-NEXT:    retq # sched: [7:1.00]
   1084 ;
   1085 ; BTVER2-LABEL: test_pabsb:
   1086 ; BTVER2:       # %bb.0:
   1087 ; BTVER2-NEXT:    pabsb (%rdi), %mm0 # sched: [6:1.00]
   1088 ; BTVER2-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
   1089 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   1090 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1091 ;
   1092 ; ZNVER1-LABEL: test_pabsb:
   1093 ; ZNVER1:       # %bb.0:
   1094 ; ZNVER1-NEXT:    pabsb (%rdi), %mm0 # sched: [8:0.50]
   1095 ; ZNVER1-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.25]
   1096 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1097 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1098   %1 = load x86_mmx, x86_mmx *%a0, align 8
   1099   %2 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1)
   1100   %3 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %2)
   1101   %4 = bitcast x86_mmx %3 to i64
   1102   ret i64 %4
   1103 }
   1104 declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
   1105 
   1106 define i64 @test_pabsd(x86_mmx *%a0) optsize {
   1107 ; GENERIC-LABEL: test_pabsd:
   1108 ; GENERIC:       # %bb.0:
   1109 ; GENERIC-NEXT:    pabsd (%rdi), %mm0 # sched: [6:0.50]
   1110 ; GENERIC-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
   1111 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1112 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1113 ;
   1114 ; ATOM-LABEL: test_pabsd:
   1115 ; ATOM:       # %bb.0:
   1116 ; ATOM-NEXT:    pabsd (%rdi), %mm0 # sched: [1:1.00]
   1117 ; ATOM-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
   1118 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   1119 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1120 ;
   1121 ; SLM-LABEL: test_pabsd:
   1122 ; SLM:       # %bb.0:
   1123 ; SLM-NEXT:    pabsd (%rdi), %mm0 # sched: [4:1.00]
   1124 ; SLM-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
   1125 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   1126 ; SLM-NEXT:    retq # sched: [4:1.00]
   1127 ;
   1128 ; SANDY-LABEL: test_pabsd:
   1129 ; SANDY:       # %bb.0:
   1130 ; SANDY-NEXT:    pabsd (%rdi), %mm0 # sched: [6:0.50]
   1131 ; SANDY-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
   1132 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1133 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1134 ;
   1135 ; HASWELL-LABEL: test_pabsd:
   1136 ; HASWELL:       # %bb.0:
   1137 ; HASWELL-NEXT:    pabsd (%rdi), %mm0 # sched: [6:0.50]
   1138 ; HASWELL-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
   1139 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1140 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1141 ;
   1142 ; BROADWELL-LABEL: test_pabsd:
   1143 ; BROADWELL:       # %bb.0:
   1144 ; BROADWELL-NEXT:    pabsd (%rdi), %mm0 # sched: [6:0.50]
   1145 ; BROADWELL-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
   1146 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1147 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1148 ;
   1149 ; SKYLAKE-LABEL: test_pabsd:
   1150 ; SKYLAKE:       # %bb.0:
   1151 ; SKYLAKE-NEXT:    pabsd (%rdi), %mm0 # sched: [6:0.50]
   1152 ; SKYLAKE-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
   1153 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1154 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1155 ;
   1156 ; SKX-LABEL: test_pabsd:
   1157 ; SKX:       # %bb.0:
   1158 ; SKX-NEXT:    pabsd (%rdi), %mm0 # sched: [6:0.50]
   1159 ; SKX-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
   1160 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1161 ; SKX-NEXT:    retq # sched: [7:1.00]
   1162 ;
   1163 ; BTVER2-LABEL: test_pabsd:
   1164 ; BTVER2:       # %bb.0:
   1165 ; BTVER2-NEXT:    pabsd (%rdi), %mm0 # sched: [6:1.00]
   1166 ; BTVER2-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
   1167 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   1168 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1169 ;
   1170 ; ZNVER1-LABEL: test_pabsd:
   1171 ; ZNVER1:       # %bb.0:
   1172 ; ZNVER1-NEXT:    pabsd (%rdi), %mm0 # sched: [8:0.50]
   1173 ; ZNVER1-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.25]
   1174 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1175 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1176   %1 = load x86_mmx, x86_mmx *%a0, align 8
   1177   %2 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1)
   1178   %3 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %2)
   1179   %4 = bitcast x86_mmx %3 to i64
   1180   ret i64 %4
   1181 }
   1182 declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
   1183 
   1184 define i64 @test_pabsw(x86_mmx *%a0) optsize {
   1185 ; GENERIC-LABEL: test_pabsw:
   1186 ; GENERIC:       # %bb.0:
   1187 ; GENERIC-NEXT:    pabsw (%rdi), %mm0 # sched: [6:0.50]
   1188 ; GENERIC-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
   1189 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1190 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1191 ;
   1192 ; ATOM-LABEL: test_pabsw:
   1193 ; ATOM:       # %bb.0:
   1194 ; ATOM-NEXT:    pabsw (%rdi), %mm0 # sched: [1:1.00]
   1195 ; ATOM-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
   1196 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   1197 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1198 ;
   1199 ; SLM-LABEL: test_pabsw:
   1200 ; SLM:       # %bb.0:
   1201 ; SLM-NEXT:    pabsw (%rdi), %mm0 # sched: [4:1.00]
   1202 ; SLM-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
   1203 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   1204 ; SLM-NEXT:    retq # sched: [4:1.00]
   1205 ;
   1206 ; SANDY-LABEL: test_pabsw:
   1207 ; SANDY:       # %bb.0:
   1208 ; SANDY-NEXT:    pabsw (%rdi), %mm0 # sched: [6:0.50]
   1209 ; SANDY-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
   1210 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1211 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1212 ;
   1213 ; HASWELL-LABEL: test_pabsw:
   1214 ; HASWELL:       # %bb.0:
   1215 ; HASWELL-NEXT:    pabsw (%rdi), %mm0 # sched: [6:0.50]
   1216 ; HASWELL-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
   1217 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1218 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1219 ;
   1220 ; BROADWELL-LABEL: test_pabsw:
   1221 ; BROADWELL:       # %bb.0:
   1222 ; BROADWELL-NEXT:    pabsw (%rdi), %mm0 # sched: [6:0.50]
   1223 ; BROADWELL-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
   1224 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1225 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1226 ;
   1227 ; SKYLAKE-LABEL: test_pabsw:
   1228 ; SKYLAKE:       # %bb.0:
   1229 ; SKYLAKE-NEXT:    pabsw (%rdi), %mm0 # sched: [6:0.50]
   1230 ; SKYLAKE-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
   1231 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1232 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1233 ;
   1234 ; SKX-LABEL: test_pabsw:
   1235 ; SKX:       # %bb.0:
   1236 ; SKX-NEXT:    pabsw (%rdi), %mm0 # sched: [6:0.50]
   1237 ; SKX-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
   1238 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1239 ; SKX-NEXT:    retq # sched: [7:1.00]
   1240 ;
   1241 ; BTVER2-LABEL: test_pabsw:
   1242 ; BTVER2:       # %bb.0:
   1243 ; BTVER2-NEXT:    pabsw (%rdi), %mm0 # sched: [6:1.00]
   1244 ; BTVER2-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
   1245 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   1246 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1247 ;
   1248 ; ZNVER1-LABEL: test_pabsw:
   1249 ; ZNVER1:       # %bb.0:
   1250 ; ZNVER1-NEXT:    pabsw (%rdi), %mm0 # sched: [8:0.50]
   1251 ; ZNVER1-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.25]
   1252 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1253 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1254   %1 = load x86_mmx, x86_mmx *%a0, align 8
   1255   %2 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1)
   1256   %3 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %2)
   1257   %4 = bitcast x86_mmx %3 to i64
   1258   ret i64 %4
   1259 }
   1260 declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
   1261 
   1262 define i64 @test_packssdw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   1263 ; GENERIC-LABEL: test_packssdw:
   1264 ; GENERIC:       # %bb.0:
   1265 ; GENERIC-NEXT:    packssdw %mm1, %mm0 # sched: [1:1.00]
   1266 ; GENERIC-NEXT:    packssdw (%rdi), %mm0 # sched: [6:1.00]
   1267 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1268 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1269 ;
   1270 ; ATOM-LABEL: test_packssdw:
   1271 ; ATOM:       # %bb.0:
   1272 ; ATOM-NEXT:    packssdw %mm1, %mm0 # sched: [1:0.50]
   1273 ; ATOM-NEXT:    packssdw (%rdi), %mm0 # sched: [1:1.00]
   1274 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   1275 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1276 ;
   1277 ; SLM-LABEL: test_packssdw:
   1278 ; SLM:       # %bb.0:
   1279 ; SLM-NEXT:    packssdw %mm1, %mm0 # sched: [1:1.00]
   1280 ; SLM-NEXT:    packssdw (%rdi), %mm0 # sched: [4:1.00]
   1281 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   1282 ; SLM-NEXT:    retq # sched: [4:1.00]
   1283 ;
   1284 ; SANDY-LABEL: test_packssdw:
   1285 ; SANDY:       # %bb.0:
   1286 ; SANDY-NEXT:    packssdw %mm1, %mm0 # sched: [1:1.00]
   1287 ; SANDY-NEXT:    packssdw (%rdi), %mm0 # sched: [6:1.00]
   1288 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1289 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1290 ;
   1291 ; HASWELL-LABEL: test_packssdw:
   1292 ; HASWELL:       # %bb.0:
   1293 ; HASWELL-NEXT:    packssdw %mm1, %mm0 # sched: [3:2.00]
   1294 ; HASWELL-NEXT:    packssdw (%rdi), %mm0 # sched: [7:2.00]
   1295 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1296 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1297 ;
   1298 ; BROADWELL-LABEL: test_packssdw:
   1299 ; BROADWELL:       # %bb.0:
   1300 ; BROADWELL-NEXT:    packssdw %mm1, %mm0 # sched: [3:2.00]
   1301 ; BROADWELL-NEXT:    packssdw (%rdi), %mm0 # sched: [7:2.00]
   1302 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1303 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1304 ;
   1305 ; SKYLAKE-LABEL: test_packssdw:
   1306 ; SKYLAKE:       # %bb.0:
   1307 ; SKYLAKE-NEXT:    packssdw %mm1, %mm0 # sched: [3:2.00]
   1308 ; SKYLAKE-NEXT:    packssdw (%rdi), %mm0 # sched: [7:2.00]
   1309 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1310 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1311 ;
   1312 ; SKX-LABEL: test_packssdw:
   1313 ; SKX:       # %bb.0:
   1314 ; SKX-NEXT:    packssdw %mm1, %mm0 # sched: [3:2.00]
   1315 ; SKX-NEXT:    packssdw (%rdi), %mm0 # sched: [7:2.00]
   1316 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1317 ; SKX-NEXT:    retq # sched: [7:1.00]
   1318 ;
   1319 ; BTVER2-LABEL: test_packssdw:
   1320 ; BTVER2:       # %bb.0:
   1321 ; BTVER2-NEXT:    packssdw %mm1, %mm0 # sched: [1:0.50]
   1322 ; BTVER2-NEXT:    packssdw (%rdi), %mm0 # sched: [6:1.00]
   1323 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   1324 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1325 ;
   1326 ; ZNVER1-LABEL: test_packssdw:
   1327 ; ZNVER1:       # %bb.0:
   1328 ; ZNVER1-NEXT:    packssdw %mm1, %mm0 # sched: [1:0.50]
   1329 ; ZNVER1-NEXT:    packssdw (%rdi), %mm0 # sched: [1:0.50]
   1330 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1331 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1332   %1 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %a0, x86_mmx %a1)
   1333   %2 = load x86_mmx, x86_mmx *%a2, align 8
   1334   %3 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %1, x86_mmx %2)
   1335   %4 = bitcast x86_mmx %3 to i64
   1336   ret i64 %4
   1337 }
   1338 declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
   1339 
   1340 define i64 @test_packsswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   1341 ; GENERIC-LABEL: test_packsswb:
   1342 ; GENERIC:       # %bb.0:
   1343 ; GENERIC-NEXT:    packsswb %mm1, %mm0 # sched: [1:1.00]
   1344 ; GENERIC-NEXT:    packsswb (%rdi), %mm0 # sched: [6:1.00]
   1345 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1346 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1347 ;
   1348 ; ATOM-LABEL: test_packsswb:
   1349 ; ATOM:       # %bb.0:
   1350 ; ATOM-NEXT:    packsswb %mm1, %mm0 # sched: [1:0.50]
   1351 ; ATOM-NEXT:    packsswb (%rdi), %mm0 # sched: [1:1.00]
   1352 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   1353 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1354 ;
   1355 ; SLM-LABEL: test_packsswb:
   1356 ; SLM:       # %bb.0:
   1357 ; SLM-NEXT:    packsswb %mm1, %mm0 # sched: [1:1.00]
   1358 ; SLM-NEXT:    packsswb (%rdi), %mm0 # sched: [4:1.00]
   1359 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   1360 ; SLM-NEXT:    retq # sched: [4:1.00]
   1361 ;
   1362 ; SANDY-LABEL: test_packsswb:
   1363 ; SANDY:       # %bb.0:
   1364 ; SANDY-NEXT:    packsswb %mm1, %mm0 # sched: [1:1.00]
   1365 ; SANDY-NEXT:    packsswb (%rdi), %mm0 # sched: [6:1.00]
   1366 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1367 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1368 ;
   1369 ; HASWELL-LABEL: test_packsswb:
   1370 ; HASWELL:       # %bb.0:
   1371 ; HASWELL-NEXT:    packsswb %mm1, %mm0 # sched: [3:2.00]
   1372 ; HASWELL-NEXT:    packsswb (%rdi), %mm0 # sched: [7:2.00]
   1373 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1374 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1375 ;
   1376 ; BROADWELL-LABEL: test_packsswb:
   1377 ; BROADWELL:       # %bb.0:
   1378 ; BROADWELL-NEXT:    packsswb %mm1, %mm0 # sched: [3:2.00]
   1379 ; BROADWELL-NEXT:    packsswb (%rdi), %mm0 # sched: [7:2.00]
   1380 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1381 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1382 ;
   1383 ; SKYLAKE-LABEL: test_packsswb:
   1384 ; SKYLAKE:       # %bb.0:
   1385 ; SKYLAKE-NEXT:    packsswb %mm1, %mm0 # sched: [3:2.00]
   1386 ; SKYLAKE-NEXT:    packsswb (%rdi), %mm0 # sched: [7:2.00]
   1387 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1388 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1389 ;
   1390 ; SKX-LABEL: test_packsswb:
   1391 ; SKX:       # %bb.0:
   1392 ; SKX-NEXT:    packsswb %mm1, %mm0 # sched: [3:2.00]
   1393 ; SKX-NEXT:    packsswb (%rdi), %mm0 # sched: [7:2.00]
   1394 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1395 ; SKX-NEXT:    retq # sched: [7:1.00]
   1396 ;
   1397 ; BTVER2-LABEL: test_packsswb:
   1398 ; BTVER2:       # %bb.0:
   1399 ; BTVER2-NEXT:    packsswb %mm1, %mm0 # sched: [1:0.50]
   1400 ; BTVER2-NEXT:    packsswb (%rdi), %mm0 # sched: [6:1.00]
   1401 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   1402 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1403 ;
   1404 ; ZNVER1-LABEL: test_packsswb:
   1405 ; ZNVER1:       # %bb.0:
   1406 ; ZNVER1-NEXT:    packsswb %mm1, %mm0 # sched: [1:0.50]
   1407 ; ZNVER1-NEXT:    packsswb (%rdi), %mm0 # sched: [1:0.50]
   1408 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1409 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1410   %1 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %a0, x86_mmx %a1)
   1411   %2 = load x86_mmx, x86_mmx *%a2, align 8
   1412   %3 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %1, x86_mmx %2)
   1413   %4 = bitcast x86_mmx %3 to i64
   1414   ret i64 %4
   1415 }
   1416 declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
   1417 
   1418 define i64 @test_packuswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   1419 ; GENERIC-LABEL: test_packuswb:
   1420 ; GENERIC:       # %bb.0:
   1421 ; GENERIC-NEXT:    packuswb %mm1, %mm0 # sched: [1:1.00]
   1422 ; GENERIC-NEXT:    packuswb (%rdi), %mm0 # sched: [6:1.00]
   1423 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1424 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1425 ;
   1426 ; ATOM-LABEL: test_packuswb:
   1427 ; ATOM:       # %bb.0:
   1428 ; ATOM-NEXT:    packuswb %mm1, %mm0 # sched: [1:0.50]
   1429 ; ATOM-NEXT:    packuswb (%rdi), %mm0 # sched: [1:1.00]
   1430 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   1431 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1432 ;
   1433 ; SLM-LABEL: test_packuswb:
   1434 ; SLM:       # %bb.0:
   1435 ; SLM-NEXT:    packuswb %mm1, %mm0 # sched: [1:1.00]
   1436 ; SLM-NEXT:    packuswb (%rdi), %mm0 # sched: [4:1.00]
   1437 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   1438 ; SLM-NEXT:    retq # sched: [4:1.00]
   1439 ;
   1440 ; SANDY-LABEL: test_packuswb:
   1441 ; SANDY:       # %bb.0:
   1442 ; SANDY-NEXT:    packuswb %mm1, %mm0 # sched: [1:1.00]
   1443 ; SANDY-NEXT:    packuswb (%rdi), %mm0 # sched: [6:1.00]
   1444 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1445 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1446 ;
   1447 ; HASWELL-LABEL: test_packuswb:
   1448 ; HASWELL:       # %bb.0:
   1449 ; HASWELL-NEXT:    packuswb %mm1, %mm0 # sched: [3:2.00]
   1450 ; HASWELL-NEXT:    packuswb (%rdi), %mm0 # sched: [7:2.00]
   1451 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1452 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1453 ;
   1454 ; BROADWELL-LABEL: test_packuswb:
   1455 ; BROADWELL:       # %bb.0:
   1456 ; BROADWELL-NEXT:    packuswb %mm1, %mm0 # sched: [3:2.00]
   1457 ; BROADWELL-NEXT:    packuswb (%rdi), %mm0 # sched: [7:2.00]
   1458 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1459 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1460 ;
   1461 ; SKYLAKE-LABEL: test_packuswb:
   1462 ; SKYLAKE:       # %bb.0:
   1463 ; SKYLAKE-NEXT:    packuswb %mm1, %mm0 # sched: [3:2.00]
   1464 ; SKYLAKE-NEXT:    packuswb (%rdi), %mm0 # sched: [7:2.00]
   1465 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1466 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1467 ;
   1468 ; SKX-LABEL: test_packuswb:
   1469 ; SKX:       # %bb.0:
   1470 ; SKX-NEXT:    packuswb %mm1, %mm0 # sched: [3:2.00]
   1471 ; SKX-NEXT:    packuswb (%rdi), %mm0 # sched: [7:2.00]
   1472 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1473 ; SKX-NEXT:    retq # sched: [7:1.00]
   1474 ;
   1475 ; BTVER2-LABEL: test_packuswb:
   1476 ; BTVER2:       # %bb.0:
   1477 ; BTVER2-NEXT:    packuswb %mm1, %mm0 # sched: [1:0.50]
   1478 ; BTVER2-NEXT:    packuswb (%rdi), %mm0 # sched: [6:1.00]
   1479 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   1480 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1481 ;
   1482 ; ZNVER1-LABEL: test_packuswb:
   1483 ; ZNVER1:       # %bb.0:
   1484 ; ZNVER1-NEXT:    packuswb %mm1, %mm0 # sched: [1:0.50]
   1485 ; ZNVER1-NEXT:    packuswb (%rdi), %mm0 # sched: [1:0.50]
   1486 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1487 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1488   %1 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %a0, x86_mmx %a1)
   1489   %2 = load x86_mmx, x86_mmx *%a2, align 8
   1490   %3 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %1, x86_mmx %2)
   1491   %4 = bitcast x86_mmx %3 to i64
   1492   ret i64 %4
   1493 }
   1494 declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
   1495 
   1496 define i64 @test_paddb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   1497 ; GENERIC-LABEL: test_paddb:
   1498 ; GENERIC:       # %bb.0:
   1499 ; GENERIC-NEXT:    paddb %mm1, %mm0 # sched: [3:1.00]
   1500 ; GENERIC-NEXT:    paddb (%rdi), %mm0 # sched: [8:1.00]
   1501 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1502 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1503 ;
   1504 ; ATOM-LABEL: test_paddb:
   1505 ; ATOM:       # %bb.0:
   1506 ; ATOM-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
   1507 ; ATOM-NEXT:    paddb (%rdi), %mm0 # sched: [1:1.00]
   1508 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   1509 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1510 ;
   1511 ; SLM-LABEL: test_paddb:
   1512 ; SLM:       # %bb.0:
   1513 ; SLM-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
   1514 ; SLM-NEXT:    paddb (%rdi), %mm0 # sched: [4:1.00]
   1515 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   1516 ; SLM-NEXT:    retq # sched: [4:1.00]
   1517 ;
   1518 ; SANDY-LABEL: test_paddb:
   1519 ; SANDY:       # %bb.0:
   1520 ; SANDY-NEXT:    paddb %mm1, %mm0 # sched: [3:1.00]
   1521 ; SANDY-NEXT:    paddb (%rdi), %mm0 # sched: [8:1.00]
   1522 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1523 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1524 ;
   1525 ; HASWELL-LABEL: test_paddb:
   1526 ; HASWELL:       # %bb.0:
   1527 ; HASWELL-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
   1528 ; HASWELL-NEXT:    paddb (%rdi), %mm0 # sched: [6:0.50]
   1529 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1530 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1531 ;
   1532 ; BROADWELL-LABEL: test_paddb:
   1533 ; BROADWELL:       # %bb.0:
   1534 ; BROADWELL-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
   1535 ; BROADWELL-NEXT:    paddb (%rdi), %mm0 # sched: [6:0.50]
   1536 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1537 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1538 ;
   1539 ; SKYLAKE-LABEL: test_paddb:
   1540 ; SKYLAKE:       # %bb.0:
   1541 ; SKYLAKE-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
   1542 ; SKYLAKE-NEXT:    paddb (%rdi), %mm0 # sched: [6:0.50]
   1543 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1544 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1545 ;
   1546 ; SKX-LABEL: test_paddb:
   1547 ; SKX:       # %bb.0:
   1548 ; SKX-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
   1549 ; SKX-NEXT:    paddb (%rdi), %mm0 # sched: [6:0.50]
   1550 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1551 ; SKX-NEXT:    retq # sched: [7:1.00]
   1552 ;
   1553 ; BTVER2-LABEL: test_paddb:
   1554 ; BTVER2:       # %bb.0:
   1555 ; BTVER2-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
   1556 ; BTVER2-NEXT:    paddb (%rdi), %mm0 # sched: [6:1.00]
   1557 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   1558 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1559 ;
   1560 ; ZNVER1-LABEL: test_paddb:
   1561 ; ZNVER1:       # %bb.0:
   1562 ; ZNVER1-NEXT:    paddb %mm1, %mm0 # sched: [1:0.25]
   1563 ; ZNVER1-NEXT:    paddb (%rdi), %mm0 # sched: [8:0.50]
   1564 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1565 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1566   %1 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a0, x86_mmx %a1)
   1567   %2 = load x86_mmx, x86_mmx *%a2, align 8
   1568   %3 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %1, x86_mmx %2)
   1569   %4 = bitcast x86_mmx %3 to i64
   1570   ret i64 %4
   1571 }
   1572 declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
   1573 
   1574 define i64 @test_paddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   1575 ; GENERIC-LABEL: test_paddd:
   1576 ; GENERIC:       # %bb.0:
   1577 ; GENERIC-NEXT:    paddd %mm1, %mm0 # sched: [3:1.00]
   1578 ; GENERIC-NEXT:    paddd (%rdi), %mm0 # sched: [8:1.00]
   1579 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1580 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1581 ;
   1582 ; ATOM-LABEL: test_paddd:
   1583 ; ATOM:       # %bb.0:
   1584 ; ATOM-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
   1585 ; ATOM-NEXT:    paddd (%rdi), %mm0 # sched: [1:1.00]
   1586 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   1587 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1588 ;
   1589 ; SLM-LABEL: test_paddd:
   1590 ; SLM:       # %bb.0:
   1591 ; SLM-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
   1592 ; SLM-NEXT:    paddd (%rdi), %mm0 # sched: [4:1.00]
   1593 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   1594 ; SLM-NEXT:    retq # sched: [4:1.00]
   1595 ;
   1596 ; SANDY-LABEL: test_paddd:
   1597 ; SANDY:       # %bb.0:
   1598 ; SANDY-NEXT:    paddd %mm1, %mm0 # sched: [3:1.00]
   1599 ; SANDY-NEXT:    paddd (%rdi), %mm0 # sched: [8:1.00]
   1600 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1601 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1602 ;
   1603 ; HASWELL-LABEL: test_paddd:
   1604 ; HASWELL:       # %bb.0:
   1605 ; HASWELL-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
   1606 ; HASWELL-NEXT:    paddd (%rdi), %mm0 # sched: [6:0.50]
   1607 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1608 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1609 ;
   1610 ; BROADWELL-LABEL: test_paddd:
   1611 ; BROADWELL:       # %bb.0:
   1612 ; BROADWELL-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
   1613 ; BROADWELL-NEXT:    paddd (%rdi), %mm0 # sched: [6:0.50]
   1614 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1615 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1616 ;
   1617 ; SKYLAKE-LABEL: test_paddd:
   1618 ; SKYLAKE:       # %bb.0:
   1619 ; SKYLAKE-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
   1620 ; SKYLAKE-NEXT:    paddd (%rdi), %mm0 # sched: [6:0.50]
   1621 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1622 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1623 ;
   1624 ; SKX-LABEL: test_paddd:
   1625 ; SKX:       # %bb.0:
   1626 ; SKX-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
   1627 ; SKX-NEXT:    paddd (%rdi), %mm0 # sched: [6:0.50]
   1628 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1629 ; SKX-NEXT:    retq # sched: [7:1.00]
   1630 ;
   1631 ; BTVER2-LABEL: test_paddd:
   1632 ; BTVER2:       # %bb.0:
   1633 ; BTVER2-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
   1634 ; BTVER2-NEXT:    paddd (%rdi), %mm0 # sched: [6:1.00]
   1635 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   1636 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1637 ;
   1638 ; ZNVER1-LABEL: test_paddd:
   1639 ; ZNVER1:       # %bb.0:
   1640 ; ZNVER1-NEXT:    paddd %mm1, %mm0 # sched: [1:0.25]
   1641 ; ZNVER1-NEXT:    paddd (%rdi), %mm0 # sched: [8:0.50]
   1642 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1643 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1644   %1 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a0, x86_mmx %a1)
   1645   %2 = load x86_mmx, x86_mmx *%a2, align 8
   1646   %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %1, x86_mmx %2)
   1647   %4 = bitcast x86_mmx %3 to i64
   1648   ret i64 %4
   1649 }
   1650 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
   1651 
   1652 define i64 @test_paddq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   1653 ; GENERIC-LABEL: test_paddq:
   1654 ; GENERIC:       # %bb.0:
   1655 ; GENERIC-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
   1656 ; GENERIC-NEXT:    paddq (%rdi), %mm0 # sched: [7:0.50]
   1657 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1658 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1659 ;
   1660 ; ATOM-LABEL: test_paddq:
   1661 ; ATOM:       # %bb.0:
   1662 ; ATOM-NEXT:    paddq %mm1, %mm0 # sched: [2:1.00]
   1663 ; ATOM-NEXT:    paddq (%rdi), %mm0 # sched: [3:1.50]
   1664 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   1665 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1666 ;
   1667 ; SLM-LABEL: test_paddq:
   1668 ; SLM:       # %bb.0:
   1669 ; SLM-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
   1670 ; SLM-NEXT:    paddq (%rdi), %mm0 # sched: [4:1.00]
   1671 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   1672 ; SLM-NEXT:    retq # sched: [4:1.00]
   1673 ;
   1674 ; SANDY-LABEL: test_paddq:
   1675 ; SANDY:       # %bb.0:
   1676 ; SANDY-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
   1677 ; SANDY-NEXT:    paddq (%rdi), %mm0 # sched: [7:0.50]
   1678 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1679 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1680 ;
   1681 ; HASWELL-LABEL: test_paddq:
   1682 ; HASWELL:       # %bb.0:
   1683 ; HASWELL-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
   1684 ; HASWELL-NEXT:    paddq (%rdi), %mm0 # sched: [6:0.50]
   1685 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1686 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1687 ;
   1688 ; BROADWELL-LABEL: test_paddq:
   1689 ; BROADWELL:       # %bb.0:
   1690 ; BROADWELL-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
   1691 ; BROADWELL-NEXT:    paddq (%rdi), %mm0 # sched: [6:0.50]
   1692 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1693 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1694 ;
   1695 ; SKYLAKE-LABEL: test_paddq:
   1696 ; SKYLAKE:       # %bb.0:
   1697 ; SKYLAKE-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
   1698 ; SKYLAKE-NEXT:    paddq (%rdi), %mm0 # sched: [6:0.50]
   1699 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1700 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1701 ;
   1702 ; SKX-LABEL: test_paddq:
   1703 ; SKX:       # %bb.0:
   1704 ; SKX-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
   1705 ; SKX-NEXT:    paddq (%rdi), %mm0 # sched: [6:0.50]
   1706 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1707 ; SKX-NEXT:    retq # sched: [7:1.00]
   1708 ;
   1709 ; BTVER2-LABEL: test_paddq:
   1710 ; BTVER2:       # %bb.0:
   1711 ; BTVER2-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
   1712 ; BTVER2-NEXT:    paddq (%rdi), %mm0 # sched: [6:1.00]
   1713 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   1714 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1715 ;
   1716 ; ZNVER1-LABEL: test_paddq:
   1717 ; ZNVER1:       # %bb.0:
   1718 ; ZNVER1-NEXT:    paddq %mm1, %mm0 # sched: [1:0.25]
   1719 ; ZNVER1-NEXT:    paddq (%rdi), %mm0 # sched: [8:0.50]
   1720 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1721 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1722   %1 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %a0, x86_mmx %a1)
   1723   %2 = load x86_mmx, x86_mmx *%a2, align 8
   1724   %3 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %1, x86_mmx %2)
   1725   %4 = bitcast x86_mmx %3 to i64
   1726   ret i64 %4
   1727 }
   1728 declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
   1729 
   1730 define i64 @test_paddsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   1731 ; GENERIC-LABEL: test_paddsb:
   1732 ; GENERIC:       # %bb.0:
   1733 ; GENERIC-NEXT:    paddsb %mm1, %mm0 # sched: [3:1.00]
   1734 ; GENERIC-NEXT:    paddsb (%rdi), %mm0 # sched: [8:1.00]
   1735 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1736 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1737 ;
   1738 ; ATOM-LABEL: test_paddsb:
   1739 ; ATOM:       # %bb.0:
   1740 ; ATOM-NEXT:    paddsb %mm1, %mm0 # sched: [1:0.50]
   1741 ; ATOM-NEXT:    paddsb (%rdi), %mm0 # sched: [1:1.00]
   1742 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   1743 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1744 ;
   1745 ; SLM-LABEL: test_paddsb:
   1746 ; SLM:       # %bb.0:
   1747 ; SLM-NEXT:    paddsb %mm1, %mm0 # sched: [1:0.50]
   1748 ; SLM-NEXT:    paddsb (%rdi), %mm0 # sched: [4:1.00]
   1749 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   1750 ; SLM-NEXT:    retq # sched: [4:1.00]
   1751 ;
   1752 ; SANDY-LABEL: test_paddsb:
   1753 ; SANDY:       # %bb.0:
   1754 ; SANDY-NEXT:    paddsb %mm1, %mm0 # sched: [3:1.00]
   1755 ; SANDY-NEXT:    paddsb (%rdi), %mm0 # sched: [8:1.00]
   1756 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1757 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1758 ;
   1759 ; HASWELL-LABEL: test_paddsb:
   1760 ; HASWELL:       # %bb.0:
   1761 ; HASWELL-NEXT:    paddsb %mm1, %mm0 # sched: [1:0.50]
   1762 ; HASWELL-NEXT:    paddsb (%rdi), %mm0 # sched: [6:0.50]
   1763 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1764 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1765 ;
   1766 ; BROADWELL-LABEL: test_paddsb:
   1767 ; BROADWELL:       # %bb.0:
   1768 ; BROADWELL-NEXT:    paddsb %mm1, %mm0 # sched: [1:0.50]
   1769 ; BROADWELL-NEXT:    paddsb (%rdi), %mm0 # sched: [6:0.50]
   1770 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1771 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1772 ;
   1773 ; SKYLAKE-LABEL: test_paddsb:
   1774 ; SKYLAKE:       # %bb.0:
   1775 ; SKYLAKE-NEXT:    paddsb %mm1, %mm0 # sched: [1:1.00]
   1776 ; SKYLAKE-NEXT:    paddsb (%rdi), %mm0 # sched: [6:1.00]
   1777 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1778 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1779 ;
   1780 ; SKX-LABEL: test_paddsb:
   1781 ; SKX:       # %bb.0:
   1782 ; SKX-NEXT:    paddsb %mm1, %mm0 # sched: [1:1.00]
   1783 ; SKX-NEXT:    paddsb (%rdi), %mm0 # sched: [6:1.00]
   1784 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1785 ; SKX-NEXT:    retq # sched: [7:1.00]
   1786 ;
   1787 ; BTVER2-LABEL: test_paddsb:
   1788 ; BTVER2:       # %bb.0:
   1789 ; BTVER2-NEXT:    paddsb %mm1, %mm0 # sched: [1:0.50]
   1790 ; BTVER2-NEXT:    paddsb (%rdi), %mm0 # sched: [6:1.00]
   1791 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   1792 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1793 ;
   1794 ; ZNVER1-LABEL: test_paddsb:
   1795 ; ZNVER1:       # %bb.0:
   1796 ; ZNVER1-NEXT:    paddsb %mm1, %mm0 # sched: [1:0.25]
   1797 ; ZNVER1-NEXT:    paddsb (%rdi), %mm0 # sched: [8:0.50]
   1798 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1799 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1800   %1 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %a0, x86_mmx %a1)
   1801   %2 = load x86_mmx, x86_mmx *%a2, align 8
   1802   %3 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %1, x86_mmx %2)
   1803   %4 = bitcast x86_mmx %3 to i64
   1804   ret i64 %4
   1805 }
   1806 declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
   1807 
   1808 define i64 @test_paddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   1809 ; GENERIC-LABEL: test_paddsw:
   1810 ; GENERIC:       # %bb.0:
   1811 ; GENERIC-NEXT:    paddsw %mm1, %mm0 # sched: [3:1.00]
   1812 ; GENERIC-NEXT:    paddsw (%rdi), %mm0 # sched: [8:1.00]
   1813 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1814 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1815 ;
   1816 ; ATOM-LABEL: test_paddsw:
   1817 ; ATOM:       # %bb.0:
   1818 ; ATOM-NEXT:    paddsw %mm1, %mm0 # sched: [1:0.50]
   1819 ; ATOM-NEXT:    paddsw (%rdi), %mm0 # sched: [1:1.00]
   1820 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   1821 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1822 ;
   1823 ; SLM-LABEL: test_paddsw:
   1824 ; SLM:       # %bb.0:
   1825 ; SLM-NEXT:    paddsw %mm1, %mm0 # sched: [1:0.50]
   1826 ; SLM-NEXT:    paddsw (%rdi), %mm0 # sched: [4:1.00]
   1827 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   1828 ; SLM-NEXT:    retq # sched: [4:1.00]
   1829 ;
   1830 ; SANDY-LABEL: test_paddsw:
   1831 ; SANDY:       # %bb.0:
   1832 ; SANDY-NEXT:    paddsw %mm1, %mm0 # sched: [3:1.00]
   1833 ; SANDY-NEXT:    paddsw (%rdi), %mm0 # sched: [8:1.00]
   1834 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1835 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1836 ;
   1837 ; HASWELL-LABEL: test_paddsw:
   1838 ; HASWELL:       # %bb.0:
   1839 ; HASWELL-NEXT:    paddsw %mm1, %mm0 # sched: [1:0.50]
   1840 ; HASWELL-NEXT:    paddsw (%rdi), %mm0 # sched: [6:0.50]
   1841 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1842 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1843 ;
   1844 ; BROADWELL-LABEL: test_paddsw:
   1845 ; BROADWELL:       # %bb.0:
   1846 ; BROADWELL-NEXT:    paddsw %mm1, %mm0 # sched: [1:0.50]
   1847 ; BROADWELL-NEXT:    paddsw (%rdi), %mm0 # sched: [6:0.50]
   1848 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1849 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1850 ;
   1851 ; SKYLAKE-LABEL: test_paddsw:
   1852 ; SKYLAKE:       # %bb.0:
   1853 ; SKYLAKE-NEXT:    paddsw %mm1, %mm0 # sched: [1:1.00]
   1854 ; SKYLAKE-NEXT:    paddsw (%rdi), %mm0 # sched: [6:1.00]
   1855 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1856 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1857 ;
   1858 ; SKX-LABEL: test_paddsw:
   1859 ; SKX:       # %bb.0:
   1860 ; SKX-NEXT:    paddsw %mm1, %mm0 # sched: [1:1.00]
   1861 ; SKX-NEXT:    paddsw (%rdi), %mm0 # sched: [6:1.00]
   1862 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1863 ; SKX-NEXT:    retq # sched: [7:1.00]
   1864 ;
   1865 ; BTVER2-LABEL: test_paddsw:
   1866 ; BTVER2:       # %bb.0:
   1867 ; BTVER2-NEXT:    paddsw %mm1, %mm0 # sched: [1:0.50]
   1868 ; BTVER2-NEXT:    paddsw (%rdi), %mm0 # sched: [6:1.00]
   1869 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   1870 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1871 ;
   1872 ; ZNVER1-LABEL: test_paddsw:
   1873 ; ZNVER1:       # %bb.0:
   1874 ; ZNVER1-NEXT:    paddsw %mm1, %mm0 # sched: [1:0.25]
   1875 ; ZNVER1-NEXT:    paddsw (%rdi), %mm0 # sched: [8:0.50]
   1876 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1877 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1878   %1 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %a0, x86_mmx %a1)
   1879   %2 = load x86_mmx, x86_mmx *%a2, align 8
   1880   %3 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %1, x86_mmx %2)
   1881   %4 = bitcast x86_mmx %3 to i64
   1882   ret i64 %4
   1883 }
   1884 declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
   1885 
   1886 define i64 @test_paddusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   1887 ; GENERIC-LABEL: test_paddusb:
   1888 ; GENERIC:       # %bb.0:
   1889 ; GENERIC-NEXT:    paddusb %mm1, %mm0 # sched: [3:1.00]
   1890 ; GENERIC-NEXT:    paddusb (%rdi), %mm0 # sched: [8:1.00]
   1891 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1892 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1893 ;
   1894 ; ATOM-LABEL: test_paddusb:
   1895 ; ATOM:       # %bb.0:
   1896 ; ATOM-NEXT:    paddusb %mm1, %mm0 # sched: [1:0.50]
   1897 ; ATOM-NEXT:    paddusb (%rdi), %mm0 # sched: [1:1.00]
   1898 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   1899 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1900 ;
   1901 ; SLM-LABEL: test_paddusb:
   1902 ; SLM:       # %bb.0:
   1903 ; SLM-NEXT:    paddusb %mm1, %mm0 # sched: [1:0.50]
   1904 ; SLM-NEXT:    paddusb (%rdi), %mm0 # sched: [4:1.00]
   1905 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   1906 ; SLM-NEXT:    retq # sched: [4:1.00]
   1907 ;
   1908 ; SANDY-LABEL: test_paddusb:
   1909 ; SANDY:       # %bb.0:
   1910 ; SANDY-NEXT:    paddusb %mm1, %mm0 # sched: [3:1.00]
   1911 ; SANDY-NEXT:    paddusb (%rdi), %mm0 # sched: [8:1.00]
   1912 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1913 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1914 ;
   1915 ; HASWELL-LABEL: test_paddusb:
   1916 ; HASWELL:       # %bb.0:
   1917 ; HASWELL-NEXT:    paddusb %mm1, %mm0 # sched: [1:0.50]
   1918 ; HASWELL-NEXT:    paddusb (%rdi), %mm0 # sched: [6:0.50]
   1919 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1920 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1921 ;
   1922 ; BROADWELL-LABEL: test_paddusb:
   1923 ; BROADWELL:       # %bb.0:
   1924 ; BROADWELL-NEXT:    paddusb %mm1, %mm0 # sched: [1:0.50]
   1925 ; BROADWELL-NEXT:    paddusb (%rdi), %mm0 # sched: [6:0.50]
   1926 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1927 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1928 ;
   1929 ; SKYLAKE-LABEL: test_paddusb:
   1930 ; SKYLAKE:       # %bb.0:
   1931 ; SKYLAKE-NEXT:    paddusb %mm1, %mm0 # sched: [1:1.00]
   1932 ; SKYLAKE-NEXT:    paddusb (%rdi), %mm0 # sched: [6:1.00]
   1933 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1934 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1935 ;
   1936 ; SKX-LABEL: test_paddusb:
   1937 ; SKX:       # %bb.0:
   1938 ; SKX-NEXT:    paddusb %mm1, %mm0 # sched: [1:1.00]
   1939 ; SKX-NEXT:    paddusb (%rdi), %mm0 # sched: [6:1.00]
   1940 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1941 ; SKX-NEXT:    retq # sched: [7:1.00]
   1942 ;
   1943 ; BTVER2-LABEL: test_paddusb:
   1944 ; BTVER2:       # %bb.0:
   1945 ; BTVER2-NEXT:    paddusb %mm1, %mm0 # sched: [1:0.50]
   1946 ; BTVER2-NEXT:    paddusb (%rdi), %mm0 # sched: [6:1.00]
   1947 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   1948 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1949 ;
   1950 ; ZNVER1-LABEL: test_paddusb:
   1951 ; ZNVER1:       # %bb.0:
   1952 ; ZNVER1-NEXT:    paddusb %mm1, %mm0 # sched: [1:0.25]
   1953 ; ZNVER1-NEXT:    paddusb (%rdi), %mm0 # sched: [8:0.50]
   1954 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1955 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1956   %1 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %a0, x86_mmx %a1)
   1957   %2 = load x86_mmx, x86_mmx *%a2, align 8
   1958   %3 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %1, x86_mmx %2)
   1959   %4 = bitcast x86_mmx %3 to i64
   1960   ret i64 %4
   1961 }
   1962 declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
   1963 
   1964 define i64 @test_paddusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   1965 ; GENERIC-LABEL: test_paddusw:
   1966 ; GENERIC:       # %bb.0:
   1967 ; GENERIC-NEXT:    paddusw %mm1, %mm0 # sched: [3:1.00]
   1968 ; GENERIC-NEXT:    paddusw (%rdi), %mm0 # sched: [8:1.00]
   1969 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1970 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1971 ;
   1972 ; ATOM-LABEL: test_paddusw:
   1973 ; ATOM:       # %bb.0:
   1974 ; ATOM-NEXT:    paddusw %mm1, %mm0 # sched: [1:0.50]
   1975 ; ATOM-NEXT:    paddusw (%rdi), %mm0 # sched: [1:1.00]
   1976 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   1977 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1978 ;
   1979 ; SLM-LABEL: test_paddusw:
   1980 ; SLM:       # %bb.0:
   1981 ; SLM-NEXT:    paddusw %mm1, %mm0 # sched: [1:0.50]
   1982 ; SLM-NEXT:    paddusw (%rdi), %mm0 # sched: [4:1.00]
   1983 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   1984 ; SLM-NEXT:    retq # sched: [4:1.00]
   1985 ;
   1986 ; SANDY-LABEL: test_paddusw:
   1987 ; SANDY:       # %bb.0:
   1988 ; SANDY-NEXT:    paddusw %mm1, %mm0 # sched: [3:1.00]
   1989 ; SANDY-NEXT:    paddusw (%rdi), %mm0 # sched: [8:1.00]
   1990 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   1991 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1992 ;
   1993 ; HASWELL-LABEL: test_paddusw:
   1994 ; HASWELL:       # %bb.0:
   1995 ; HASWELL-NEXT:    paddusw %mm1, %mm0 # sched: [1:0.50]
   1996 ; HASWELL-NEXT:    paddusw (%rdi), %mm0 # sched: [6:0.50]
   1997 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   1998 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1999 ;
   2000 ; BROADWELL-LABEL: test_paddusw:
   2001 ; BROADWELL:       # %bb.0:
   2002 ; BROADWELL-NEXT:    paddusw %mm1, %mm0 # sched: [1:0.50]
   2003 ; BROADWELL-NEXT:    paddusw (%rdi), %mm0 # sched: [6:0.50]
   2004 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2005 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2006 ;
   2007 ; SKYLAKE-LABEL: test_paddusw:
   2008 ; SKYLAKE:       # %bb.0:
   2009 ; SKYLAKE-NEXT:    paddusw %mm1, %mm0 # sched: [1:1.00]
   2010 ; SKYLAKE-NEXT:    paddusw (%rdi), %mm0 # sched: [6:1.00]
   2011 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2012 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2013 ;
   2014 ; SKX-LABEL: test_paddusw:
   2015 ; SKX:       # %bb.0:
   2016 ; SKX-NEXT:    paddusw %mm1, %mm0 # sched: [1:1.00]
   2017 ; SKX-NEXT:    paddusw (%rdi), %mm0 # sched: [6:1.00]
   2018 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2019 ; SKX-NEXT:    retq # sched: [7:1.00]
   2020 ;
   2021 ; BTVER2-LABEL: test_paddusw:
   2022 ; BTVER2:       # %bb.0:
   2023 ; BTVER2-NEXT:    paddusw %mm1, %mm0 # sched: [1:0.50]
   2024 ; BTVER2-NEXT:    paddusw (%rdi), %mm0 # sched: [6:1.00]
   2025 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   2026 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2027 ;
   2028 ; ZNVER1-LABEL: test_paddusw:
   2029 ; ZNVER1:       # %bb.0:
   2030 ; ZNVER1-NEXT:    paddusw %mm1, %mm0 # sched: [1:0.25]
   2031 ; ZNVER1-NEXT:    paddusw (%rdi), %mm0 # sched: [8:0.50]
   2032 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2033 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2034   %1 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %a0, x86_mmx %a1)
   2035   %2 = load x86_mmx, x86_mmx *%a2, align 8
   2036   %3 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %1, x86_mmx %2)
   2037   %4 = bitcast x86_mmx %3 to i64
   2038   ret i64 %4
   2039 }
   2040 declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
   2041 
   2042 define i64 @test_paddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   2043 ; GENERIC-LABEL: test_paddw:
   2044 ; GENERIC:       # %bb.0:
   2045 ; GENERIC-NEXT:    paddw %mm1, %mm0 # sched: [3:1.00]
   2046 ; GENERIC-NEXT:    paddw (%rdi), %mm0 # sched: [8:1.00]
   2047 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2048 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2049 ;
   2050 ; ATOM-LABEL: test_paddw:
   2051 ; ATOM:       # %bb.0:
   2052 ; ATOM-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
   2053 ; ATOM-NEXT:    paddw (%rdi), %mm0 # sched: [1:1.00]
   2054 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   2055 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2056 ;
   2057 ; SLM-LABEL: test_paddw:
   2058 ; SLM:       # %bb.0:
   2059 ; SLM-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
   2060 ; SLM-NEXT:    paddw (%rdi), %mm0 # sched: [4:1.00]
   2061 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   2062 ; SLM-NEXT:    retq # sched: [4:1.00]
   2063 ;
   2064 ; SANDY-LABEL: test_paddw:
   2065 ; SANDY:       # %bb.0:
   2066 ; SANDY-NEXT:    paddw %mm1, %mm0 # sched: [3:1.00]
   2067 ; SANDY-NEXT:    paddw (%rdi), %mm0 # sched: [8:1.00]
   2068 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2069 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2070 ;
   2071 ; HASWELL-LABEL: test_paddw:
   2072 ; HASWELL:       # %bb.0:
   2073 ; HASWELL-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
   2074 ; HASWELL-NEXT:    paddw (%rdi), %mm0 # sched: [6:0.50]
   2075 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2076 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2077 ;
   2078 ; BROADWELL-LABEL: test_paddw:
   2079 ; BROADWELL:       # %bb.0:
   2080 ; BROADWELL-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
   2081 ; BROADWELL-NEXT:    paddw (%rdi), %mm0 # sched: [6:0.50]
   2082 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2083 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2084 ;
   2085 ; SKYLAKE-LABEL: test_paddw:
   2086 ; SKYLAKE:       # %bb.0:
   2087 ; SKYLAKE-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
   2088 ; SKYLAKE-NEXT:    paddw (%rdi), %mm0 # sched: [6:0.50]
   2089 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2090 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2091 ;
   2092 ; SKX-LABEL: test_paddw:
   2093 ; SKX:       # %bb.0:
   2094 ; SKX-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
   2095 ; SKX-NEXT:    paddw (%rdi), %mm0 # sched: [6:0.50]
   2096 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2097 ; SKX-NEXT:    retq # sched: [7:1.00]
   2098 ;
   2099 ; BTVER2-LABEL: test_paddw:
   2100 ; BTVER2:       # %bb.0:
   2101 ; BTVER2-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
   2102 ; BTVER2-NEXT:    paddw (%rdi), %mm0 # sched: [6:1.00]
   2103 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   2104 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2105 ;
   2106 ; ZNVER1-LABEL: test_paddw:
   2107 ; ZNVER1:       # %bb.0:
   2108 ; ZNVER1-NEXT:    paddw %mm1, %mm0 # sched: [1:0.25]
   2109 ; ZNVER1-NEXT:    paddw (%rdi), %mm0 # sched: [8:0.50]
   2110 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2111 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2112   %1 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %a0, x86_mmx %a1)
   2113   %2 = load x86_mmx, x86_mmx *%a2, align 8
   2114   %3 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %1, x86_mmx %2)
   2115   %4 = bitcast x86_mmx %3 to i64
   2116   ret i64 %4
   2117 }
   2118 declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
   2119 
   2120 define i64 @test_palignr(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   2121 ; GENERIC-LABEL: test_palignr:
   2122 ; GENERIC:       # %bb.0:
   2123 ; GENERIC-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:0.50]
   2124 ; GENERIC-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:0.50]
   2125 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2126 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2127 ;
   2128 ; ATOM-LABEL: test_palignr:
   2129 ; ATOM:       # %bb.0:
   2130 ; ATOM-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:1.00]
   2131 ; ATOM-NEXT:    palignr $1, (%rdi), %mm0 # sched: [1:1.00]
   2132 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   2133 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2134 ;
   2135 ; SLM-LABEL: test_palignr:
   2136 ; SLM:       # %bb.0:
   2137 ; SLM-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:1.00]
   2138 ; SLM-NEXT:    palignr $1, (%rdi), %mm0 # sched: [4:1.00]
   2139 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   2140 ; SLM-NEXT:    retq # sched: [4:1.00]
   2141 ;
   2142 ; SANDY-LABEL: test_palignr:
   2143 ; SANDY:       # %bb.0:
   2144 ; SANDY-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:0.50]
   2145 ; SANDY-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:0.50]
   2146 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2147 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2148 ;
   2149 ; HASWELL-LABEL: test_palignr:
   2150 ; HASWELL:       # %bb.0:
   2151 ; HASWELL-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:1.00]
   2152 ; HASWELL-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:1.00]
   2153 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2154 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2155 ;
   2156 ; BROADWELL-LABEL: test_palignr:
   2157 ; BROADWELL:       # %bb.0:
   2158 ; BROADWELL-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:1.00]
   2159 ; BROADWELL-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:1.00]
   2160 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2161 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2162 ;
   2163 ; SKYLAKE-LABEL: test_palignr:
   2164 ; SKYLAKE:       # %bb.0:
   2165 ; SKYLAKE-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:1.00]
   2166 ; SKYLAKE-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:1.00]
   2167 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2168 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2169 ;
   2170 ; SKX-LABEL: test_palignr:
   2171 ; SKX:       # %bb.0:
   2172 ; SKX-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:1.00]
   2173 ; SKX-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:1.00]
   2174 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2175 ; SKX-NEXT:    retq # sched: [7:1.00]
   2176 ;
   2177 ; BTVER2-LABEL: test_palignr:
   2178 ; BTVER2:       # %bb.0:
   2179 ; BTVER2-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:0.50]
   2180 ; BTVER2-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:1.00]
   2181 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   2182 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2183 ;
   2184 ; ZNVER1-LABEL: test_palignr:
   2185 ; ZNVER1:       # %bb.0:
   2186 ; ZNVER1-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:0.25]
   2187 ; ZNVER1-NEXT:    palignr $1, (%rdi), %mm0 # sched: [8:0.50]
   2188 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2189 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2190   %1 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %a0, x86_mmx %a1, i8 1)
   2191   %2 = load x86_mmx, x86_mmx *%a2, align 8
   2192   %3 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %1, x86_mmx %2, i8 1)
   2193   %4 = bitcast x86_mmx %3 to i64
   2194   ret i64 %4
   2195 }
   2196 declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
   2197 
   2198 define i64 @test_pand(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   2199 ; GENERIC-LABEL: test_pand:
   2200 ; GENERIC:       # %bb.0:
   2201 ; GENERIC-NEXT:    pand %mm1, %mm0 # sched: [1:0.33]
   2202 ; GENERIC-NEXT:    pand (%rdi), %mm0 # sched: [6:0.50]
   2203 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2204 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2205 ;
   2206 ; ATOM-LABEL: test_pand:
   2207 ; ATOM:       # %bb.0:
   2208 ; ATOM-NEXT:    pand %mm1, %mm0 # sched: [1:0.50]
   2209 ; ATOM-NEXT:    pand (%rdi), %mm0 # sched: [1:1.00]
   2210 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   2211 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2212 ;
   2213 ; SLM-LABEL: test_pand:
   2214 ; SLM:       # %bb.0:
   2215 ; SLM-NEXT:    pand %mm1, %mm0 # sched: [1:0.50]
   2216 ; SLM-NEXT:    pand (%rdi), %mm0 # sched: [4:1.00]
   2217 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   2218 ; SLM-NEXT:    retq # sched: [4:1.00]
   2219 ;
   2220 ; SANDY-LABEL: test_pand:
   2221 ; SANDY:       # %bb.0:
   2222 ; SANDY-NEXT:    pand %mm1, %mm0 # sched: [1:0.33]
   2223 ; SANDY-NEXT:    pand (%rdi), %mm0 # sched: [6:0.50]
   2224 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2225 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2226 ;
   2227 ; HASWELL-LABEL: test_pand:
   2228 ; HASWELL:       # %bb.0:
   2229 ; HASWELL-NEXT:    pand %mm1, %mm0 # sched: [1:0.33]
   2230 ; HASWELL-NEXT:    pand (%rdi), %mm0 # sched: [6:0.50]
   2231 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2232 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2233 ;
   2234 ; BROADWELL-LABEL: test_pand:
   2235 ; BROADWELL:       # %bb.0:
   2236 ; BROADWELL-NEXT:    pand %mm1, %mm0 # sched: [1:0.33]
   2237 ; BROADWELL-NEXT:    pand (%rdi), %mm0 # sched: [6:0.50]
   2238 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2239 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2240 ;
   2241 ; SKYLAKE-LABEL: test_pand:
   2242 ; SKYLAKE:       # %bb.0:
   2243 ; SKYLAKE-NEXT:    pand %mm1, %mm0 # sched: [1:0.50]
   2244 ; SKYLAKE-NEXT:    pand (%rdi), %mm0 # sched: [6:0.50]
   2245 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2246 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2247 ;
   2248 ; SKX-LABEL: test_pand:
   2249 ; SKX:       # %bb.0:
   2250 ; SKX-NEXT:    pand %mm1, %mm0 # sched: [1:0.50]
   2251 ; SKX-NEXT:    pand (%rdi), %mm0 # sched: [6:0.50]
   2252 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2253 ; SKX-NEXT:    retq # sched: [7:1.00]
   2254 ;
   2255 ; BTVER2-LABEL: test_pand:
   2256 ; BTVER2:       # %bb.0:
   2257 ; BTVER2-NEXT:    pand %mm1, %mm0 # sched: [1:0.50]
   2258 ; BTVER2-NEXT:    pand (%rdi), %mm0 # sched: [6:1.00]
   2259 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   2260 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2261 ;
   2262 ; ZNVER1-LABEL: test_pand:
   2263 ; ZNVER1:       # %bb.0:
   2264 ; ZNVER1-NEXT:    pand %mm1, %mm0 # sched: [1:0.25]
   2265 ; ZNVER1-NEXT:    pand (%rdi), %mm0 # sched: [8:0.50]
   2266 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2267 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2268   %1 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %a0, x86_mmx %a1)
   2269   %2 = load x86_mmx, x86_mmx *%a2, align 8
   2270   %3 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %1, x86_mmx %2)
   2271   %4 = bitcast x86_mmx %3 to i64
   2272   ret i64 %4
   2273 }
   2274 declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
   2275 
   2276 define i64 @test_pandn(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   2277 ; GENERIC-LABEL: test_pandn:
   2278 ; GENERIC:       # %bb.0:
   2279 ; GENERIC-NEXT:    pandn %mm1, %mm0 # sched: [1:0.33]
   2280 ; GENERIC-NEXT:    pandn (%rdi), %mm0 # sched: [6:0.50]
   2281 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2282 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2283 ;
   2284 ; ATOM-LABEL: test_pandn:
   2285 ; ATOM:       # %bb.0:
   2286 ; ATOM-NEXT:    pandn %mm1, %mm0 # sched: [1:0.50]
   2287 ; ATOM-NEXT:    pandn (%rdi), %mm0 # sched: [1:1.00]
   2288 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   2289 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2290 ;
   2291 ; SLM-LABEL: test_pandn:
   2292 ; SLM:       # %bb.0:
   2293 ; SLM-NEXT:    pandn %mm1, %mm0 # sched: [1:0.50]
   2294 ; SLM-NEXT:    pandn (%rdi), %mm0 # sched: [4:1.00]
   2295 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   2296 ; SLM-NEXT:    retq # sched: [4:1.00]
   2297 ;
   2298 ; SANDY-LABEL: test_pandn:
   2299 ; SANDY:       # %bb.0:
   2300 ; SANDY-NEXT:    pandn %mm1, %mm0 # sched: [1:0.33]
   2301 ; SANDY-NEXT:    pandn (%rdi), %mm0 # sched: [6:0.50]
   2302 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2303 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2304 ;
   2305 ; HASWELL-LABEL: test_pandn:
   2306 ; HASWELL:       # %bb.0:
   2307 ; HASWELL-NEXT:    pandn %mm1, %mm0 # sched: [1:0.33]
   2308 ; HASWELL-NEXT:    pandn (%rdi), %mm0 # sched: [6:0.50]
   2309 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2310 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2311 ;
   2312 ; BROADWELL-LABEL: test_pandn:
   2313 ; BROADWELL:       # %bb.0:
   2314 ; BROADWELL-NEXT:    pandn %mm1, %mm0 # sched: [1:0.33]
   2315 ; BROADWELL-NEXT:    pandn (%rdi), %mm0 # sched: [6:0.50]
   2316 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2317 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2318 ;
   2319 ; SKYLAKE-LABEL: test_pandn:
   2320 ; SKYLAKE:       # %bb.0:
   2321 ; SKYLAKE-NEXT:    pandn %mm1, %mm0 # sched: [1:0.50]
   2322 ; SKYLAKE-NEXT:    pandn (%rdi), %mm0 # sched: [6:0.50]
   2323 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2324 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2325 ;
   2326 ; SKX-LABEL: test_pandn:
   2327 ; SKX:       # %bb.0:
   2328 ; SKX-NEXT:    pandn %mm1, %mm0 # sched: [1:0.50]
   2329 ; SKX-NEXT:    pandn (%rdi), %mm0 # sched: [6:0.50]
   2330 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2331 ; SKX-NEXT:    retq # sched: [7:1.00]
   2332 ;
   2333 ; BTVER2-LABEL: test_pandn:
   2334 ; BTVER2:       # %bb.0:
   2335 ; BTVER2-NEXT:    pandn %mm1, %mm0 # sched: [1:0.50]
   2336 ; BTVER2-NEXT:    pandn (%rdi), %mm0 # sched: [6:1.00]
   2337 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   2338 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2339 ;
   2340 ; ZNVER1-LABEL: test_pandn:
   2341 ; ZNVER1:       # %bb.0:
   2342 ; ZNVER1-NEXT:    pandn %mm1, %mm0 # sched: [1:0.25]
   2343 ; ZNVER1-NEXT:    pandn (%rdi), %mm0 # sched: [8:0.50]
   2344 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2345 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2346   %1 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %a0, x86_mmx %a1)
   2347   %2 = load x86_mmx, x86_mmx *%a2, align 8
   2348   %3 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %1, x86_mmx %2)
   2349   %4 = bitcast x86_mmx %3 to i64
   2350   ret i64 %4
   2351 }
   2352 declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
   2353 
   2354 define i64 @test_pavgb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   2355 ; GENERIC-LABEL: test_pavgb:
   2356 ; GENERIC:       # %bb.0:
   2357 ; GENERIC-NEXT:    pavgb %mm1, %mm0 # sched: [3:1.00]
   2358 ; GENERIC-NEXT:    pavgb (%rdi), %mm0 # sched: [8:1.00]
   2359 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2360 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2361 ;
   2362 ; ATOM-LABEL: test_pavgb:
   2363 ; ATOM:       # %bb.0:
   2364 ; ATOM-NEXT:    pavgb %mm1, %mm0 # sched: [1:0.50]
   2365 ; ATOM-NEXT:    pavgb (%rdi), %mm0 # sched: [1:1.00]
   2366 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   2367 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2368 ;
   2369 ; SLM-LABEL: test_pavgb:
   2370 ; SLM:       # %bb.0:
   2371 ; SLM-NEXT:    pavgb %mm1, %mm0 # sched: [1:0.50]
   2372 ; SLM-NEXT:    pavgb (%rdi), %mm0 # sched: [4:1.00]
   2373 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   2374 ; SLM-NEXT:    retq # sched: [4:1.00]
   2375 ;
   2376 ; SANDY-LABEL: test_pavgb:
   2377 ; SANDY:       # %bb.0:
   2378 ; SANDY-NEXT:    pavgb %mm1, %mm0 # sched: [3:1.00]
   2379 ; SANDY-NEXT:    pavgb (%rdi), %mm0 # sched: [8:1.00]
   2380 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2381 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2382 ;
   2383 ; HASWELL-LABEL: test_pavgb:
   2384 ; HASWELL:       # %bb.0:
   2385 ; HASWELL-NEXT:    pavgb %mm1, %mm0 # sched: [1:0.50]
   2386 ; HASWELL-NEXT:    pavgb (%rdi), %mm0 # sched: [6:0.50]
   2387 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2388 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2389 ;
   2390 ; BROADWELL-LABEL: test_pavgb:
   2391 ; BROADWELL:       # %bb.0:
   2392 ; BROADWELL-NEXT:    pavgb %mm1, %mm0 # sched: [1:0.50]
   2393 ; BROADWELL-NEXT:    pavgb (%rdi), %mm0 # sched: [6:0.50]
   2394 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2395 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2396 ;
   2397 ; SKYLAKE-LABEL: test_pavgb:
   2398 ; SKYLAKE:       # %bb.0:
   2399 ; SKYLAKE-NEXT:    pavgb %mm1, %mm0 # sched: [1:1.00]
   2400 ; SKYLAKE-NEXT:    pavgb (%rdi), %mm0 # sched: [6:1.00]
   2401 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2402 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2403 ;
   2404 ; SKX-LABEL: test_pavgb:
   2405 ; SKX:       # %bb.0:
   2406 ; SKX-NEXT:    pavgb %mm1, %mm0 # sched: [1:1.00]
   2407 ; SKX-NEXT:    pavgb (%rdi), %mm0 # sched: [6:1.00]
   2408 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2409 ; SKX-NEXT:    retq # sched: [7:1.00]
   2410 ;
   2411 ; BTVER2-LABEL: test_pavgb:
   2412 ; BTVER2:       # %bb.0:
   2413 ; BTVER2-NEXT:    pavgb %mm1, %mm0 # sched: [1:0.50]
   2414 ; BTVER2-NEXT:    pavgb (%rdi), %mm0 # sched: [6:1.00]
   2415 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   2416 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2417 ;
   2418 ; ZNVER1-LABEL: test_pavgb:
   2419 ; ZNVER1:       # %bb.0:
   2420 ; ZNVER1-NEXT:    pavgb %mm1, %mm0 # sched: [1:0.25]
   2421 ; ZNVER1-NEXT:    pavgb (%rdi), %mm0 # sched: [8:0.50]
   2422 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2423 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2424   %1 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %a0, x86_mmx %a1)
   2425   %2 = load x86_mmx, x86_mmx *%a2, align 8
   2426   %3 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %1, x86_mmx %2)
   2427   %4 = bitcast x86_mmx %3 to i64
   2428   ret i64 %4
   2429 }
   2430 declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
   2431 
   2432 define i64 @test_pavgw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   2433 ; GENERIC-LABEL: test_pavgw:
   2434 ; GENERIC:       # %bb.0:
   2435 ; GENERIC-NEXT:    pavgw %mm1, %mm0 # sched: [3:1.00]
   2436 ; GENERIC-NEXT:    pavgw (%rdi), %mm0 # sched: [8:1.00]
   2437 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2438 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2439 ;
   2440 ; ATOM-LABEL: test_pavgw:
   2441 ; ATOM:       # %bb.0:
   2442 ; ATOM-NEXT:    pavgw %mm1, %mm0 # sched: [1:0.50]
   2443 ; ATOM-NEXT:    pavgw (%rdi), %mm0 # sched: [1:1.00]
   2444 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   2445 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2446 ;
   2447 ; SLM-LABEL: test_pavgw:
   2448 ; SLM:       # %bb.0:
   2449 ; SLM-NEXT:    pavgw %mm1, %mm0 # sched: [1:0.50]
   2450 ; SLM-NEXT:    pavgw (%rdi), %mm0 # sched: [4:1.00]
   2451 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   2452 ; SLM-NEXT:    retq # sched: [4:1.00]
   2453 ;
   2454 ; SANDY-LABEL: test_pavgw:
   2455 ; SANDY:       # %bb.0:
   2456 ; SANDY-NEXT:    pavgw %mm1, %mm0 # sched: [3:1.00]
   2457 ; SANDY-NEXT:    pavgw (%rdi), %mm0 # sched: [8:1.00]
   2458 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2459 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2460 ;
   2461 ; HASWELL-LABEL: test_pavgw:
   2462 ; HASWELL:       # %bb.0:
   2463 ; HASWELL-NEXT:    pavgw %mm1, %mm0 # sched: [1:0.50]
   2464 ; HASWELL-NEXT:    pavgw (%rdi), %mm0 # sched: [6:0.50]
   2465 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2466 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2467 ;
   2468 ; BROADWELL-LABEL: test_pavgw:
   2469 ; BROADWELL:       # %bb.0:
   2470 ; BROADWELL-NEXT:    pavgw %mm1, %mm0 # sched: [1:0.50]
   2471 ; BROADWELL-NEXT:    pavgw (%rdi), %mm0 # sched: [6:0.50]
   2472 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2473 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2474 ;
   2475 ; SKYLAKE-LABEL: test_pavgw:
   2476 ; SKYLAKE:       # %bb.0:
   2477 ; SKYLAKE-NEXT:    pavgw %mm1, %mm0 # sched: [1:1.00]
   2478 ; SKYLAKE-NEXT:    pavgw (%rdi), %mm0 # sched: [6:1.00]
   2479 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2480 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2481 ;
   2482 ; SKX-LABEL: test_pavgw:
   2483 ; SKX:       # %bb.0:
   2484 ; SKX-NEXT:    pavgw %mm1, %mm0 # sched: [1:1.00]
   2485 ; SKX-NEXT:    pavgw (%rdi), %mm0 # sched: [6:1.00]
   2486 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2487 ; SKX-NEXT:    retq # sched: [7:1.00]
   2488 ;
   2489 ; BTVER2-LABEL: test_pavgw:
   2490 ; BTVER2:       # %bb.0:
   2491 ; BTVER2-NEXT:    pavgw %mm1, %mm0 # sched: [1:0.50]
   2492 ; BTVER2-NEXT:    pavgw (%rdi), %mm0 # sched: [6:1.00]
   2493 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   2494 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2495 ;
   2496 ; ZNVER1-LABEL: test_pavgw:
   2497 ; ZNVER1:       # %bb.0:
   2498 ; ZNVER1-NEXT:    pavgw %mm1, %mm0 # sched: [1:0.25]
   2499 ; ZNVER1-NEXT:    pavgw (%rdi), %mm0 # sched: [8:0.50]
   2500 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2501 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2502   %1 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %a0, x86_mmx %a1)
   2503   %2 = load x86_mmx, x86_mmx *%a2, align 8
   2504   %3 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %1, x86_mmx %2)
   2505   %4 = bitcast x86_mmx %3 to i64
   2506   ret i64 %4
   2507 }
   2508 declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
   2509 
   2510 define i64 @test_pcmpeqb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   2511 ; GENERIC-LABEL: test_pcmpeqb:
   2512 ; GENERIC:       # %bb.0:
   2513 ; GENERIC-NEXT:    pcmpeqb %mm1, %mm0 # sched: [3:1.00]
   2514 ; GENERIC-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [8:1.00]
   2515 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2516 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2517 ;
   2518 ; ATOM-LABEL: test_pcmpeqb:
   2519 ; ATOM:       # %bb.0:
   2520 ; ATOM-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:0.50]
   2521 ; ATOM-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [1:1.00]
   2522 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   2523 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2524 ;
   2525 ; SLM-LABEL: test_pcmpeqb:
   2526 ; SLM:       # %bb.0:
   2527 ; SLM-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:0.50]
   2528 ; SLM-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [4:1.00]
   2529 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   2530 ; SLM-NEXT:    retq # sched: [4:1.00]
   2531 ;
   2532 ; SANDY-LABEL: test_pcmpeqb:
   2533 ; SANDY:       # %bb.0:
   2534 ; SANDY-NEXT:    pcmpeqb %mm1, %mm0 # sched: [3:1.00]
   2535 ; SANDY-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [8:1.00]
   2536 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2537 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2538 ;
   2539 ; HASWELL-LABEL: test_pcmpeqb:
   2540 ; HASWELL:       # %bb.0:
   2541 ; HASWELL-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:0.50]
   2542 ; HASWELL-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [6:0.50]
   2543 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2544 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2545 ;
   2546 ; BROADWELL-LABEL: test_pcmpeqb:
   2547 ; BROADWELL:       # %bb.0:
   2548 ; BROADWELL-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:0.50]
   2549 ; BROADWELL-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [6:0.50]
   2550 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2551 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2552 ;
   2553 ; SKYLAKE-LABEL: test_pcmpeqb:
   2554 ; SKYLAKE:       # %bb.0:
   2555 ; SKYLAKE-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:1.00]
   2556 ; SKYLAKE-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [6:1.00]
   2557 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2558 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2559 ;
   2560 ; SKX-LABEL: test_pcmpeqb:
   2561 ; SKX:       # %bb.0:
   2562 ; SKX-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:1.00]
   2563 ; SKX-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [6:1.00]
   2564 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2565 ; SKX-NEXT:    retq # sched: [7:1.00]
   2566 ;
   2567 ; BTVER2-LABEL: test_pcmpeqb:
   2568 ; BTVER2:       # %bb.0:
   2569 ; BTVER2-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:0.50]
   2570 ; BTVER2-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [6:1.00]
   2571 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   2572 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2573 ;
   2574 ; ZNVER1-LABEL: test_pcmpeqb:
   2575 ; ZNVER1:       # %bb.0:
   2576 ; ZNVER1-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:0.25]
   2577 ; ZNVER1-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [8:0.50]
   2578 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2579 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2580   %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %a0, x86_mmx %a1)
   2581   %2 = load x86_mmx, x86_mmx *%a2, align 8
   2582   %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %1, x86_mmx %2)
   2583   %4 = bitcast x86_mmx %3 to i64
   2584   ret i64 %4
   2585 }
   2586 declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
   2587 
   2588 define i64 @test_pcmpeqd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   2589 ; GENERIC-LABEL: test_pcmpeqd:
   2590 ; GENERIC:       # %bb.0:
   2591 ; GENERIC-NEXT:    pcmpeqd %mm1, %mm0 # sched: [3:1.00]
   2592 ; GENERIC-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [8:1.00]
   2593 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2594 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2595 ;
   2596 ; ATOM-LABEL: test_pcmpeqd:
   2597 ; ATOM:       # %bb.0:
   2598 ; ATOM-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:0.50]
   2599 ; ATOM-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [1:1.00]
   2600 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   2601 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2602 ;
   2603 ; SLM-LABEL: test_pcmpeqd:
   2604 ; SLM:       # %bb.0:
   2605 ; SLM-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:0.50]
   2606 ; SLM-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [4:1.00]
   2607 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   2608 ; SLM-NEXT:    retq # sched: [4:1.00]
   2609 ;
   2610 ; SANDY-LABEL: test_pcmpeqd:
   2611 ; SANDY:       # %bb.0:
   2612 ; SANDY-NEXT:    pcmpeqd %mm1, %mm0 # sched: [3:1.00]
   2613 ; SANDY-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [8:1.00]
   2614 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2615 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2616 ;
   2617 ; HASWELL-LABEL: test_pcmpeqd:
   2618 ; HASWELL:       # %bb.0:
   2619 ; HASWELL-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:0.50]
   2620 ; HASWELL-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [6:0.50]
   2621 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2622 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2623 ;
   2624 ; BROADWELL-LABEL: test_pcmpeqd:
   2625 ; BROADWELL:       # %bb.0:
   2626 ; BROADWELL-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:0.50]
   2627 ; BROADWELL-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [6:0.50]
   2628 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2629 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2630 ;
   2631 ; SKYLAKE-LABEL: test_pcmpeqd:
   2632 ; SKYLAKE:       # %bb.0:
   2633 ; SKYLAKE-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:1.00]
   2634 ; SKYLAKE-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [6:1.00]
   2635 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2636 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2637 ;
   2638 ; SKX-LABEL: test_pcmpeqd:
   2639 ; SKX:       # %bb.0:
   2640 ; SKX-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:1.00]
   2641 ; SKX-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [6:1.00]
   2642 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2643 ; SKX-NEXT:    retq # sched: [7:1.00]
   2644 ;
   2645 ; BTVER2-LABEL: test_pcmpeqd:
   2646 ; BTVER2:       # %bb.0:
   2647 ; BTVER2-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:0.50]
   2648 ; BTVER2-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [6:1.00]
   2649 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   2650 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2651 ;
   2652 ; ZNVER1-LABEL: test_pcmpeqd:
   2653 ; ZNVER1:       # %bb.0:
   2654 ; ZNVER1-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:0.25]
   2655 ; ZNVER1-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [8:0.50]
   2656 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2657 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2658   %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %a0, x86_mmx %a1)
   2659   %2 = load x86_mmx, x86_mmx *%a2, align 8
   2660   %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %1, x86_mmx %2)
   2661   %4 = bitcast x86_mmx %3 to i64
   2662   ret i64 %4
   2663 }
   2664 declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
   2665 
   2666 define i64 @test_pcmpeqw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   2667 ; GENERIC-LABEL: test_pcmpeqw:
   2668 ; GENERIC:       # %bb.0:
   2669 ; GENERIC-NEXT:    pcmpeqw %mm1, %mm0 # sched: [3:1.00]
   2670 ; GENERIC-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [8:1.00]
   2671 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2672 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2673 ;
   2674 ; ATOM-LABEL: test_pcmpeqw:
   2675 ; ATOM:       # %bb.0:
   2676 ; ATOM-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:0.50]
   2677 ; ATOM-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [1:1.00]
   2678 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   2679 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2680 ;
   2681 ; SLM-LABEL: test_pcmpeqw:
   2682 ; SLM:       # %bb.0:
   2683 ; SLM-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:0.50]
   2684 ; SLM-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [4:1.00]
   2685 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   2686 ; SLM-NEXT:    retq # sched: [4:1.00]
   2687 ;
   2688 ; SANDY-LABEL: test_pcmpeqw:
   2689 ; SANDY:       # %bb.0:
   2690 ; SANDY-NEXT:    pcmpeqw %mm1, %mm0 # sched: [3:1.00]
   2691 ; SANDY-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [8:1.00]
   2692 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2693 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2694 ;
   2695 ; HASWELL-LABEL: test_pcmpeqw:
   2696 ; HASWELL:       # %bb.0:
   2697 ; HASWELL-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:0.50]
   2698 ; HASWELL-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [6:0.50]
   2699 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2700 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2701 ;
   2702 ; BROADWELL-LABEL: test_pcmpeqw:
   2703 ; BROADWELL:       # %bb.0:
   2704 ; BROADWELL-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:0.50]
   2705 ; BROADWELL-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [6:0.50]
   2706 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2707 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2708 ;
   2709 ; SKYLAKE-LABEL: test_pcmpeqw:
   2710 ; SKYLAKE:       # %bb.0:
   2711 ; SKYLAKE-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:1.00]
   2712 ; SKYLAKE-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [6:1.00]
   2713 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2714 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2715 ;
   2716 ; SKX-LABEL: test_pcmpeqw:
   2717 ; SKX:       # %bb.0:
   2718 ; SKX-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:1.00]
   2719 ; SKX-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [6:1.00]
   2720 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2721 ; SKX-NEXT:    retq # sched: [7:1.00]
   2722 ;
   2723 ; BTVER2-LABEL: test_pcmpeqw:
   2724 ; BTVER2:       # %bb.0:
   2725 ; BTVER2-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:0.50]
   2726 ; BTVER2-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [6:1.00]
   2727 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   2728 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2729 ;
   2730 ; ZNVER1-LABEL: test_pcmpeqw:
   2731 ; ZNVER1:       # %bb.0:
   2732 ; ZNVER1-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:0.25]
   2733 ; ZNVER1-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [8:0.50]
   2734 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2735 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2736   %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %a0, x86_mmx %a1)
   2737   %2 = load x86_mmx, x86_mmx *%a2, align 8
   2738   %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %1, x86_mmx %2)
   2739   %4 = bitcast x86_mmx %3 to i64
   2740   ret i64 %4
   2741 }
   2742 declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
   2743 
   2744 define i64 @test_pcmpgtb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   2745 ; GENERIC-LABEL: test_pcmpgtb:
   2746 ; GENERIC:       # %bb.0:
   2747 ; GENERIC-NEXT:    pcmpgtb %mm1, %mm0 # sched: [3:1.00]
   2748 ; GENERIC-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [8:1.00]
   2749 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2750 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2751 ;
   2752 ; ATOM-LABEL: test_pcmpgtb:
   2753 ; ATOM:       # %bb.0:
   2754 ; ATOM-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:0.50]
   2755 ; ATOM-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [1:1.00]
   2756 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   2757 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2758 ;
   2759 ; SLM-LABEL: test_pcmpgtb:
   2760 ; SLM:       # %bb.0:
   2761 ; SLM-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:0.50]
   2762 ; SLM-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [4:1.00]
   2763 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   2764 ; SLM-NEXT:    retq # sched: [4:1.00]
   2765 ;
   2766 ; SANDY-LABEL: test_pcmpgtb:
   2767 ; SANDY:       # %bb.0:
   2768 ; SANDY-NEXT:    pcmpgtb %mm1, %mm0 # sched: [3:1.00]
   2769 ; SANDY-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [8:1.00]
   2770 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2771 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2772 ;
   2773 ; HASWELL-LABEL: test_pcmpgtb:
   2774 ; HASWELL:       # %bb.0:
   2775 ; HASWELL-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:0.50]
   2776 ; HASWELL-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [6:0.50]
   2777 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2778 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2779 ;
   2780 ; BROADWELL-LABEL: test_pcmpgtb:
   2781 ; BROADWELL:       # %bb.0:
   2782 ; BROADWELL-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:0.50]
   2783 ; BROADWELL-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [6:0.50]
   2784 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2785 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2786 ;
   2787 ; SKYLAKE-LABEL: test_pcmpgtb:
   2788 ; SKYLAKE:       # %bb.0:
   2789 ; SKYLAKE-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:1.00]
   2790 ; SKYLAKE-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [6:1.00]
   2791 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2792 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2793 ;
   2794 ; SKX-LABEL: test_pcmpgtb:
   2795 ; SKX:       # %bb.0:
   2796 ; SKX-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:1.00]
   2797 ; SKX-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [6:1.00]
   2798 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2799 ; SKX-NEXT:    retq # sched: [7:1.00]
   2800 ;
   2801 ; BTVER2-LABEL: test_pcmpgtb:
   2802 ; BTVER2:       # %bb.0:
   2803 ; BTVER2-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:0.50]
   2804 ; BTVER2-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [6:1.00]
   2805 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   2806 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2807 ;
   2808 ; ZNVER1-LABEL: test_pcmpgtb:
   2809 ; ZNVER1:       # %bb.0:
   2810 ; ZNVER1-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:0.25]
   2811 ; ZNVER1-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [8:0.50]
   2812 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2813 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2814   %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %a0, x86_mmx %a1)
   2815   %2 = load x86_mmx, x86_mmx *%a2, align 8
   2816   %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %1, x86_mmx %2)
   2817   %4 = bitcast x86_mmx %3 to i64
   2818   ret i64 %4
   2819 }
   2820 declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
   2821 
   2822 define i64 @test_pcmpgtd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   2823 ; GENERIC-LABEL: test_pcmpgtd:
   2824 ; GENERIC:       # %bb.0:
   2825 ; GENERIC-NEXT:    pcmpgtd %mm1, %mm0 # sched: [3:1.00]
   2826 ; GENERIC-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [8:1.00]
   2827 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2828 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2829 ;
   2830 ; ATOM-LABEL: test_pcmpgtd:
   2831 ; ATOM:       # %bb.0:
   2832 ; ATOM-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:0.50]
   2833 ; ATOM-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [1:1.00]
   2834 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   2835 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2836 ;
   2837 ; SLM-LABEL: test_pcmpgtd:
   2838 ; SLM:       # %bb.0:
   2839 ; SLM-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:0.50]
   2840 ; SLM-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [4:1.00]
   2841 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   2842 ; SLM-NEXT:    retq # sched: [4:1.00]
   2843 ;
   2844 ; SANDY-LABEL: test_pcmpgtd:
   2845 ; SANDY:       # %bb.0:
   2846 ; SANDY-NEXT:    pcmpgtd %mm1, %mm0 # sched: [3:1.00]
   2847 ; SANDY-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [8:1.00]
   2848 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2849 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2850 ;
   2851 ; HASWELL-LABEL: test_pcmpgtd:
   2852 ; HASWELL:       # %bb.0:
   2853 ; HASWELL-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:0.50]
   2854 ; HASWELL-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [6:0.50]
   2855 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2856 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2857 ;
   2858 ; BROADWELL-LABEL: test_pcmpgtd:
   2859 ; BROADWELL:       # %bb.0:
   2860 ; BROADWELL-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:0.50]
   2861 ; BROADWELL-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [6:0.50]
   2862 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2863 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2864 ;
   2865 ; SKYLAKE-LABEL: test_pcmpgtd:
   2866 ; SKYLAKE:       # %bb.0:
   2867 ; SKYLAKE-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:1.00]
   2868 ; SKYLAKE-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [6:1.00]
   2869 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2870 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2871 ;
   2872 ; SKX-LABEL: test_pcmpgtd:
   2873 ; SKX:       # %bb.0:
   2874 ; SKX-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:1.00]
   2875 ; SKX-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [6:1.00]
   2876 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2877 ; SKX-NEXT:    retq # sched: [7:1.00]
   2878 ;
   2879 ; BTVER2-LABEL: test_pcmpgtd:
   2880 ; BTVER2:       # %bb.0:
   2881 ; BTVER2-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:0.50]
   2882 ; BTVER2-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [6:1.00]
   2883 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   2884 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2885 ;
   2886 ; ZNVER1-LABEL: test_pcmpgtd:
   2887 ; ZNVER1:       # %bb.0:
   2888 ; ZNVER1-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:0.25]
   2889 ; ZNVER1-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [8:0.50]
   2890 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2891 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2892   %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %a0, x86_mmx %a1)
   2893   %2 = load x86_mmx, x86_mmx *%a2, align 8
   2894   %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %1, x86_mmx %2)
   2895   %4 = bitcast x86_mmx %3 to i64
   2896   ret i64 %4
   2897 }
   2898 declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
   2899 
   2900 define i64 @test_pcmpgtw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   2901 ; GENERIC-LABEL: test_pcmpgtw:
   2902 ; GENERIC:       # %bb.0:
   2903 ; GENERIC-NEXT:    pcmpgtw %mm1, %mm0 # sched: [3:1.00]
   2904 ; GENERIC-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [8:1.00]
   2905 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2906 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2907 ;
   2908 ; ATOM-LABEL: test_pcmpgtw:
   2909 ; ATOM:       # %bb.0:
   2910 ; ATOM-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:0.50]
   2911 ; ATOM-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [1:1.00]
   2912 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   2913 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2914 ;
   2915 ; SLM-LABEL: test_pcmpgtw:
   2916 ; SLM:       # %bb.0:
   2917 ; SLM-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:0.50]
   2918 ; SLM-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [4:1.00]
   2919 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   2920 ; SLM-NEXT:    retq # sched: [4:1.00]
   2921 ;
   2922 ; SANDY-LABEL: test_pcmpgtw:
   2923 ; SANDY:       # %bb.0:
   2924 ; SANDY-NEXT:    pcmpgtw %mm1, %mm0 # sched: [3:1.00]
   2925 ; SANDY-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [8:1.00]
   2926 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2927 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2928 ;
   2929 ; HASWELL-LABEL: test_pcmpgtw:
   2930 ; HASWELL:       # %bb.0:
   2931 ; HASWELL-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:0.50]
   2932 ; HASWELL-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [6:0.50]
   2933 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2934 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2935 ;
   2936 ; BROADWELL-LABEL: test_pcmpgtw:
   2937 ; BROADWELL:       # %bb.0:
   2938 ; BROADWELL-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:0.50]
   2939 ; BROADWELL-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [6:0.50]
   2940 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   2941 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2942 ;
   2943 ; SKYLAKE-LABEL: test_pcmpgtw:
   2944 ; SKYLAKE:       # %bb.0:
   2945 ; SKYLAKE-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:1.00]
   2946 ; SKYLAKE-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [6:1.00]
   2947 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2948 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2949 ;
   2950 ; SKX-LABEL: test_pcmpgtw:
   2951 ; SKX:       # %bb.0:
   2952 ; SKX-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:1.00]
   2953 ; SKX-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [6:1.00]
   2954 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2955 ; SKX-NEXT:    retq # sched: [7:1.00]
   2956 ;
   2957 ; BTVER2-LABEL: test_pcmpgtw:
   2958 ; BTVER2:       # %bb.0:
   2959 ; BTVER2-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:0.50]
   2960 ; BTVER2-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [6:1.00]
   2961 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   2962 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2963 ;
   2964 ; ZNVER1-LABEL: test_pcmpgtw:
   2965 ; ZNVER1:       # %bb.0:
   2966 ; ZNVER1-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:0.25]
   2967 ; ZNVER1-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [8:0.50]
   2968 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   2969 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2970   %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %a0, x86_mmx %a1)
   2971   %2 = load x86_mmx, x86_mmx *%a2, align 8
   2972   %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %1, x86_mmx %2)
   2973   %4 = bitcast x86_mmx %3 to i64
   2974   ret i64 %4
   2975 }
   2976 declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
   2977 
   2978 define i32 @test_pextrw(x86_mmx %a0) optsize {
   2979 ; GENERIC-LABEL: test_pextrw:
   2980 ; GENERIC:       # %bb.0:
   2981 ; GENERIC-NEXT:    pextrw $0, %mm0, %eax # sched: [3:1.00]
   2982 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2983 ;
   2984 ; ATOM-LABEL: test_pextrw:
   2985 ; ATOM:       # %bb.0:
   2986 ; ATOM-NEXT:    pextrw $0, %mm0, %eax # sched: [4:2.00]
   2987 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2988 ;
   2989 ; SLM-LABEL: test_pextrw:
   2990 ; SLM:       # %bb.0:
   2991 ; SLM-NEXT:    pextrw $0, %mm0, %eax # sched: [1:1.00]
   2992 ; SLM-NEXT:    retq # sched: [4:1.00]
   2993 ;
   2994 ; SANDY-LABEL: test_pextrw:
   2995 ; SANDY:       # %bb.0:
   2996 ; SANDY-NEXT:    pextrw $0, %mm0, %eax # sched: [3:1.00]
   2997 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2998 ;
   2999 ; HASWELL-LABEL: test_pextrw:
   3000 ; HASWELL:       # %bb.0:
   3001 ; HASWELL-NEXT:    pextrw $0, %mm0, %eax # sched: [2:1.00]
   3002 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3003 ;
   3004 ; BROADWELL-LABEL: test_pextrw:
   3005 ; BROADWELL:       # %bb.0:
   3006 ; BROADWELL-NEXT:    pextrw $0, %mm0, %eax # sched: [2:1.00]
   3007 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3008 ;
   3009 ; SKYLAKE-LABEL: test_pextrw:
   3010 ; SKYLAKE:       # %bb.0:
   3011 ; SKYLAKE-NEXT:    pextrw $0, %mm0, %eax # sched: [3:1.00]
   3012 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3013 ;
   3014 ; SKX-LABEL: test_pextrw:
   3015 ; SKX:       # %bb.0:
   3016 ; SKX-NEXT:    pextrw $0, %mm0, %eax # sched: [3:1.00]
   3017 ; SKX-NEXT:    retq # sched: [7:1.00]
   3018 ;
   3019 ; BTVER2-LABEL: test_pextrw:
   3020 ; BTVER2:       # %bb.0:
   3021 ; BTVER2-NEXT:    pextrw $0, %mm0, %eax # sched: [3:1.00]
   3022 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3023 ;
   3024 ; ZNVER1-LABEL: test_pextrw:
   3025 ; ZNVER1:       # %bb.0:
   3026 ; ZNVER1-NEXT:    pextrw $0, %mm0, %eax # sched: [2:2.00]
   3027 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3028   %1 = call i32 @llvm.x86.mmx.pextr.w(x86_mmx %a0, i32 0)
   3029   ret i32 %1
   3030 }
   3031 declare i32 @llvm.x86.mmx.pextr.w(x86_mmx, i32) nounwind readnone
   3032 
   3033 define i64 @test_phaddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   3034 ; GENERIC-LABEL: test_phaddd:
   3035 ; GENERIC:       # %bb.0:
   3036 ; GENERIC-NEXT:    phaddd %mm1, %mm0 # sched: [3:1.50]
   3037 ; GENERIC-NEXT:    phaddd (%rdi), %mm0 # sched: [8:1.50]
   3038 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3039 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3040 ;
   3041 ; ATOM-LABEL: test_phaddd:
   3042 ; ATOM:       # %bb.0:
   3043 ; ATOM-NEXT:    phaddd %mm1, %mm0 # sched: [3:1.50]
   3044 ; ATOM-NEXT:    phaddd (%rdi), %mm0 # sched: [4:2.00]
   3045 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   3046 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3047 ;
   3048 ; SLM-LABEL: test_phaddd:
   3049 ; SLM:       # %bb.0:
   3050 ; SLM-NEXT:    phaddd %mm1, %mm0 # sched: [1:0.50]
   3051 ; SLM-NEXT:    phaddd (%rdi), %mm0 # sched: [4:1.00]
   3052 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   3053 ; SLM-NEXT:    retq # sched: [4:1.00]
   3054 ;
   3055 ; SANDY-LABEL: test_phaddd:
   3056 ; SANDY:       # %bb.0:
   3057 ; SANDY-NEXT:    phaddd %mm1, %mm0 # sched: [3:1.50]
   3058 ; SANDY-NEXT:    phaddd (%rdi), %mm0 # sched: [8:1.50]
   3059 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3060 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3061 ;
   3062 ; HASWELL-LABEL: test_phaddd:
   3063 ; HASWELL:       # %bb.0:
   3064 ; HASWELL-NEXT:    phaddd %mm1, %mm0 # sched: [3:2.00]
   3065 ; HASWELL-NEXT:    phaddd (%rdi), %mm0 # sched: [8:2.00]
   3066 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3067 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3068 ;
   3069 ; BROADWELL-LABEL: test_phaddd:
   3070 ; BROADWELL:       # %bb.0:
   3071 ; BROADWELL-NEXT:    phaddd %mm1, %mm0 # sched: [3:2.00]
   3072 ; BROADWELL-NEXT:    phaddd (%rdi), %mm0 # sched: [8:2.00]
   3073 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3074 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3075 ;
   3076 ; SKYLAKE-LABEL: test_phaddd:
   3077 ; SKYLAKE:       # %bb.0:
   3078 ; SKYLAKE-NEXT:    phaddd %mm1, %mm0 # sched: [3:2.00]
   3079 ; SKYLAKE-NEXT:    phaddd (%rdi), %mm0 # sched: [8:2.00]
   3080 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3081 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3082 ;
   3083 ; SKX-LABEL: test_phaddd:
   3084 ; SKX:       # %bb.0:
   3085 ; SKX-NEXT:    phaddd %mm1, %mm0 # sched: [3:2.00]
   3086 ; SKX-NEXT:    phaddd (%rdi), %mm0 # sched: [8:2.00]
   3087 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3088 ; SKX-NEXT:    retq # sched: [7:1.00]
   3089 ;
   3090 ; BTVER2-LABEL: test_phaddd:
   3091 ; BTVER2:       # %bb.0:
   3092 ; BTVER2-NEXT:    phaddd %mm1, %mm0 # sched: [1:0.50]
   3093 ; BTVER2-NEXT:    phaddd (%rdi), %mm0 # sched: [6:1.00]
   3094 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   3095 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3096 ;
   3097 ; ZNVER1-LABEL: test_phaddd:
   3098 ; ZNVER1:       # %bb.0:
   3099 ; ZNVER1-NEXT:    phaddd %mm1, %mm0 # sched: [100:0.25]
   3100 ; ZNVER1-NEXT:    phaddd (%rdi), %mm0 # sched: [100:0.25]
   3101 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3102 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3103   %1 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %a0, x86_mmx %a1)
   3104   %2 = load x86_mmx, x86_mmx *%a2, align 8
   3105   %3 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %1, x86_mmx %2)
   3106   %4 = bitcast x86_mmx %3 to i64
   3107   ret i64 %4
   3108 }
   3109 declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
   3110 
   3111 define i64 @test_phaddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   3112 ; GENERIC-LABEL: test_phaddsw:
   3113 ; GENERIC:       # %bb.0:
   3114 ; GENERIC-NEXT:    phaddsw %mm1, %mm0 # sched: [3:1.50]
   3115 ; GENERIC-NEXT:    phaddsw (%rdi), %mm0 # sched: [8:1.50]
   3116 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3117 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3118 ;
   3119 ; ATOM-LABEL: test_phaddsw:
   3120 ; ATOM:       # %bb.0:
   3121 ; ATOM-NEXT:    phaddsw %mm1, %mm0 # sched: [5:2.50]
   3122 ; ATOM-NEXT:    phaddsw (%rdi), %mm0 # sched: [6:3.00]
   3123 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   3124 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3125 ;
   3126 ; SLM-LABEL: test_phaddsw:
   3127 ; SLM:       # %bb.0:
   3128 ; SLM-NEXT:    phaddsw %mm1, %mm0 # sched: [1:0.50]
   3129 ; SLM-NEXT:    phaddsw (%rdi), %mm0 # sched: [4:1.00]
   3130 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   3131 ; SLM-NEXT:    retq # sched: [4:1.00]
   3132 ;
   3133 ; SANDY-LABEL: test_phaddsw:
   3134 ; SANDY:       # %bb.0:
   3135 ; SANDY-NEXT:    phaddsw %mm1, %mm0 # sched: [3:1.50]
   3136 ; SANDY-NEXT:    phaddsw (%rdi), %mm0 # sched: [8:1.50]
   3137 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3138 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3139 ;
   3140 ; HASWELL-LABEL: test_phaddsw:
   3141 ; HASWELL:       # %bb.0:
   3142 ; HASWELL-NEXT:    phaddsw %mm1, %mm0 # sched: [3:2.00]
   3143 ; HASWELL-NEXT:    phaddsw (%rdi), %mm0 # sched: [8:2.00]
   3144 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3145 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3146 ;
   3147 ; BROADWELL-LABEL: test_phaddsw:
   3148 ; BROADWELL:       # %bb.0:
   3149 ; BROADWELL-NEXT:    phaddsw %mm1, %mm0 # sched: [3:2.00]
   3150 ; BROADWELL-NEXT:    phaddsw (%rdi), %mm0 # sched: [8:2.00]
   3151 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3152 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3153 ;
   3154 ; SKYLAKE-LABEL: test_phaddsw:
   3155 ; SKYLAKE:       # %bb.0:
   3156 ; SKYLAKE-NEXT:    phaddsw %mm1, %mm0 # sched: [3:2.00]
   3157 ; SKYLAKE-NEXT:    phaddsw (%rdi), %mm0 # sched: [8:2.00]
   3158 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3159 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3160 ;
   3161 ; SKX-LABEL: test_phaddsw:
   3162 ; SKX:       # %bb.0:
   3163 ; SKX-NEXT:    phaddsw %mm1, %mm0 # sched: [3:2.00]
   3164 ; SKX-NEXT:    phaddsw (%rdi), %mm0 # sched: [8:2.00]
   3165 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3166 ; SKX-NEXT:    retq # sched: [7:1.00]
   3167 ;
   3168 ; BTVER2-LABEL: test_phaddsw:
   3169 ; BTVER2:       # %bb.0:
   3170 ; BTVER2-NEXT:    phaddsw %mm1, %mm0 # sched: [1:0.50]
   3171 ; BTVER2-NEXT:    phaddsw (%rdi), %mm0 # sched: [6:1.00]
   3172 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   3173 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3174 ;
   3175 ; ZNVER1-LABEL: test_phaddsw:
   3176 ; ZNVER1:       # %bb.0:
   3177 ; ZNVER1-NEXT:    phaddsw %mm1, %mm0 # sched: [100:0.25]
   3178 ; ZNVER1-NEXT:    phaddsw (%rdi), %mm0 # sched: [100:0.25]
   3179 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3180 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3181   %1 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %a0, x86_mmx %a1)
   3182   %2 = load x86_mmx, x86_mmx *%a2, align 8
   3183   %3 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %1, x86_mmx %2)
   3184   %4 = bitcast x86_mmx %3 to i64
   3185   ret i64 %4
   3186 }
   3187 declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
   3188 
   3189 define i64 @test_phaddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   3190 ; GENERIC-LABEL: test_phaddw:
   3191 ; GENERIC:       # %bb.0:
   3192 ; GENERIC-NEXT:    phaddw %mm1, %mm0 # sched: [3:1.50]
   3193 ; GENERIC-NEXT:    phaddw (%rdi), %mm0 # sched: [8:1.50]
   3194 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3195 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3196 ;
   3197 ; ATOM-LABEL: test_phaddw:
   3198 ; ATOM:       # %bb.0:
   3199 ; ATOM-NEXT:    phaddw %mm1, %mm0 # sched: [5:2.50]
   3200 ; ATOM-NEXT:    phaddw (%rdi), %mm0 # sched: [6:3.00]
   3201 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   3202 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3203 ;
   3204 ; SLM-LABEL: test_phaddw:
   3205 ; SLM:       # %bb.0:
   3206 ; SLM-NEXT:    phaddw %mm1, %mm0 # sched: [1:0.50]
   3207 ; SLM-NEXT:    phaddw (%rdi), %mm0 # sched: [4:1.00]
   3208 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   3209 ; SLM-NEXT:    retq # sched: [4:1.00]
   3210 ;
   3211 ; SANDY-LABEL: test_phaddw:
   3212 ; SANDY:       # %bb.0:
   3213 ; SANDY-NEXT:    phaddw %mm1, %mm0 # sched: [3:1.50]
   3214 ; SANDY-NEXT:    phaddw (%rdi), %mm0 # sched: [8:1.50]
   3215 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3216 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3217 ;
   3218 ; HASWELL-LABEL: test_phaddw:
   3219 ; HASWELL:       # %bb.0:
   3220 ; HASWELL-NEXT:    phaddw %mm1, %mm0 # sched: [3:2.00]
   3221 ; HASWELL-NEXT:    phaddw (%rdi), %mm0 # sched: [8:2.00]
   3222 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3223 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3224 ;
   3225 ; BROADWELL-LABEL: test_phaddw:
   3226 ; BROADWELL:       # %bb.0:
   3227 ; BROADWELL-NEXT:    phaddw %mm1, %mm0 # sched: [3:2.00]
   3228 ; BROADWELL-NEXT:    phaddw (%rdi), %mm0 # sched: [8:2.00]
   3229 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3230 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3231 ;
   3232 ; SKYLAKE-LABEL: test_phaddw:
   3233 ; SKYLAKE:       # %bb.0:
   3234 ; SKYLAKE-NEXT:    phaddw %mm1, %mm0 # sched: [3:2.00]
   3235 ; SKYLAKE-NEXT:    phaddw (%rdi), %mm0 # sched: [8:2.00]
   3236 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3237 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3238 ;
   3239 ; SKX-LABEL: test_phaddw:
   3240 ; SKX:       # %bb.0:
   3241 ; SKX-NEXT:    phaddw %mm1, %mm0 # sched: [3:2.00]
   3242 ; SKX-NEXT:    phaddw (%rdi), %mm0 # sched: [8:2.00]
   3243 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3244 ; SKX-NEXT:    retq # sched: [7:1.00]
   3245 ;
   3246 ; BTVER2-LABEL: test_phaddw:
   3247 ; BTVER2:       # %bb.0:
   3248 ; BTVER2-NEXT:    phaddw %mm1, %mm0 # sched: [1:0.50]
   3249 ; BTVER2-NEXT:    phaddw (%rdi), %mm0 # sched: [6:1.00]
   3250 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   3251 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3252 ;
   3253 ; ZNVER1-LABEL: test_phaddw:
   3254 ; ZNVER1:       # %bb.0:
   3255 ; ZNVER1-NEXT:    phaddw %mm1, %mm0 # sched: [100:0.25]
   3256 ; ZNVER1-NEXT:    phaddw (%rdi), %mm0 # sched: [100:0.25]
   3257 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3258 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3259   %1 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %a0, x86_mmx %a1)
   3260   %2 = load x86_mmx, x86_mmx *%a2, align 8
   3261   %3 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %1, x86_mmx %2)
   3262   %4 = bitcast x86_mmx %3 to i64
   3263   ret i64 %4
   3264 }
   3265 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
   3266 
   3267 define i64 @test_phsubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   3268 ; GENERIC-LABEL: test_phsubd:
   3269 ; GENERIC:       # %bb.0:
   3270 ; GENERIC-NEXT:    phsubd %mm1, %mm0 # sched: [3:1.50]
   3271 ; GENERIC-NEXT:    phsubd (%rdi), %mm0 # sched: [8:1.50]
   3272 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3273 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3274 ;
   3275 ; ATOM-LABEL: test_phsubd:
   3276 ; ATOM:       # %bb.0:
   3277 ; ATOM-NEXT:    phsubd %mm1, %mm0 # sched: [3:1.50]
   3278 ; ATOM-NEXT:    phsubd (%rdi), %mm0 # sched: [4:2.00]
   3279 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   3280 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3281 ;
   3282 ; SLM-LABEL: test_phsubd:
   3283 ; SLM:       # %bb.0:
   3284 ; SLM-NEXT:    phsubd %mm1, %mm0 # sched: [1:0.50]
   3285 ; SLM-NEXT:    phsubd (%rdi), %mm0 # sched: [4:1.00]
   3286 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   3287 ; SLM-NEXT:    retq # sched: [4:1.00]
   3288 ;
   3289 ; SANDY-LABEL: test_phsubd:
   3290 ; SANDY:       # %bb.0:
   3291 ; SANDY-NEXT:    phsubd %mm1, %mm0 # sched: [3:1.50]
   3292 ; SANDY-NEXT:    phsubd (%rdi), %mm0 # sched: [8:1.50]
   3293 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3294 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3295 ;
   3296 ; HASWELL-LABEL: test_phsubd:
   3297 ; HASWELL:       # %bb.0:
   3298 ; HASWELL-NEXT:    phsubd %mm1, %mm0 # sched: [3:2.00]
   3299 ; HASWELL-NEXT:    phsubd (%rdi), %mm0 # sched: [8:2.00]
   3300 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3301 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3302 ;
   3303 ; BROADWELL-LABEL: test_phsubd:
   3304 ; BROADWELL:       # %bb.0:
   3305 ; BROADWELL-NEXT:    phsubd %mm1, %mm0 # sched: [3:2.00]
   3306 ; BROADWELL-NEXT:    phsubd (%rdi), %mm0 # sched: [8:2.00]
   3307 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3308 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3309 ;
   3310 ; SKYLAKE-LABEL: test_phsubd:
   3311 ; SKYLAKE:       # %bb.0:
   3312 ; SKYLAKE-NEXT:    phsubd %mm1, %mm0 # sched: [3:2.00]
   3313 ; SKYLAKE-NEXT:    phsubd (%rdi), %mm0 # sched: [8:2.00]
   3314 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3315 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3316 ;
   3317 ; SKX-LABEL: test_phsubd:
   3318 ; SKX:       # %bb.0:
   3319 ; SKX-NEXT:    phsubd %mm1, %mm0 # sched: [3:2.00]
   3320 ; SKX-NEXT:    phsubd (%rdi), %mm0 # sched: [8:2.00]
   3321 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3322 ; SKX-NEXT:    retq # sched: [7:1.00]
   3323 ;
   3324 ; BTVER2-LABEL: test_phsubd:
   3325 ; BTVER2:       # %bb.0:
   3326 ; BTVER2-NEXT:    phsubd %mm1, %mm0 # sched: [1:0.50]
   3327 ; BTVER2-NEXT:    phsubd (%rdi), %mm0 # sched: [6:1.00]
   3328 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   3329 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3330 ;
   3331 ; ZNVER1-LABEL: test_phsubd:
   3332 ; ZNVER1:       # %bb.0:
   3333 ; ZNVER1-NEXT:    phsubd %mm1, %mm0 # sched: [100:0.25]
   3334 ; ZNVER1-NEXT:    phsubd (%rdi), %mm0 # sched: [100:0.25]
   3335 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3336 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3337   %1 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %a0, x86_mmx %a1)
   3338   %2 = load x86_mmx, x86_mmx *%a2, align 8
   3339   %3 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %1, x86_mmx %2)
   3340   %4 = bitcast x86_mmx %3 to i64
   3341   ret i64 %4
   3342 }
   3343 declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
   3344 
   3345 define i64 @test_phsubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   3346 ; GENERIC-LABEL: test_phsubsw:
   3347 ; GENERIC:       # %bb.0:
   3348 ; GENERIC-NEXT:    phsubsw %mm1, %mm0 # sched: [3:1.50]
   3349 ; GENERIC-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:1.50]
   3350 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3351 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3352 ;
   3353 ; ATOM-LABEL: test_phsubsw:
   3354 ; ATOM:       # %bb.0:
   3355 ; ATOM-NEXT:    phsubsw %mm1, %mm0 # sched: [5:2.50]
   3356 ; ATOM-NEXT:    phsubsw (%rdi), %mm0 # sched: [6:3.00]
   3357 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   3358 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3359 ;
   3360 ; SLM-LABEL: test_phsubsw:
   3361 ; SLM:       # %bb.0:
   3362 ; SLM-NEXT:    phsubsw %mm1, %mm0 # sched: [1:0.50]
   3363 ; SLM-NEXT:    phsubsw (%rdi), %mm0 # sched: [4:1.00]
   3364 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   3365 ; SLM-NEXT:    retq # sched: [4:1.00]
   3366 ;
   3367 ; SANDY-LABEL: test_phsubsw:
   3368 ; SANDY:       # %bb.0:
   3369 ; SANDY-NEXT:    phsubsw %mm1, %mm0 # sched: [3:1.50]
   3370 ; SANDY-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:1.50]
   3371 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3372 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3373 ;
   3374 ; HASWELL-LABEL: test_phsubsw:
   3375 ; HASWELL:       # %bb.0:
   3376 ; HASWELL-NEXT:    phsubsw %mm1, %mm0 # sched: [3:2.00]
   3377 ; HASWELL-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:2.00]
   3378 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3379 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3380 ;
   3381 ; BROADWELL-LABEL: test_phsubsw:
   3382 ; BROADWELL:       # %bb.0:
   3383 ; BROADWELL-NEXT:    phsubsw %mm1, %mm0 # sched: [3:2.00]
   3384 ; BROADWELL-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:2.00]
   3385 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3386 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3387 ;
   3388 ; SKYLAKE-LABEL: test_phsubsw:
   3389 ; SKYLAKE:       # %bb.0:
   3390 ; SKYLAKE-NEXT:    phsubsw %mm1, %mm0 # sched: [3:2.00]
   3391 ; SKYLAKE-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:2.00]
   3392 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3393 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3394 ;
   3395 ; SKX-LABEL: test_phsubsw:
   3396 ; SKX:       # %bb.0:
   3397 ; SKX-NEXT:    phsubsw %mm1, %mm0 # sched: [3:2.00]
   3398 ; SKX-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:2.00]
   3399 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3400 ; SKX-NEXT:    retq # sched: [7:1.00]
   3401 ;
   3402 ; BTVER2-LABEL: test_phsubsw:
   3403 ; BTVER2:       # %bb.0:
   3404 ; BTVER2-NEXT:    phsubsw %mm1, %mm0 # sched: [1:0.50]
   3405 ; BTVER2-NEXT:    phsubsw (%rdi), %mm0 # sched: [6:1.00]
   3406 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   3407 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3408 ;
   3409 ; ZNVER1-LABEL: test_phsubsw:
   3410 ; ZNVER1:       # %bb.0:
   3411 ; ZNVER1-NEXT:    phsubsw %mm1, %mm0 # sched: [100:0.25]
   3412 ; ZNVER1-NEXT:    phsubsw (%rdi), %mm0 # sched: [100:0.25]
   3413 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3414 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3415   %1 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %a0, x86_mmx %a1)
   3416   %2 = load x86_mmx, x86_mmx *%a2, align 8
   3417   %3 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %1, x86_mmx %2)
   3418   %4 = bitcast x86_mmx %3 to i64
   3419   ret i64 %4
   3420 }
   3421 declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
   3422 
   3423 define i64 @test_phsubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   3424 ; GENERIC-LABEL: test_phsubw:
   3425 ; GENERIC:       # %bb.0:
   3426 ; GENERIC-NEXT:    phsubw %mm1, %mm0 # sched: [3:1.50]
   3427 ; GENERIC-NEXT:    phsubw (%rdi), %mm0 # sched: [8:1.50]
   3428 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3429 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3430 ;
   3431 ; ATOM-LABEL: test_phsubw:
   3432 ; ATOM:       # %bb.0:
   3433 ; ATOM-NEXT:    phsubw %mm1, %mm0 # sched: [5:2.50]
   3434 ; ATOM-NEXT:    phsubw (%rdi), %mm0 # sched: [6:3.00]
   3435 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   3436 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3437 ;
   3438 ; SLM-LABEL: test_phsubw:
   3439 ; SLM:       # %bb.0:
   3440 ; SLM-NEXT:    phsubw %mm1, %mm0 # sched: [1:0.50]
   3441 ; SLM-NEXT:    phsubw (%rdi), %mm0 # sched: [4:1.00]
   3442 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   3443 ; SLM-NEXT:    retq # sched: [4:1.00]
   3444 ;
   3445 ; SANDY-LABEL: test_phsubw:
   3446 ; SANDY:       # %bb.0:
   3447 ; SANDY-NEXT:    phsubw %mm1, %mm0 # sched: [3:1.50]
   3448 ; SANDY-NEXT:    phsubw (%rdi), %mm0 # sched: [8:1.50]
   3449 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3450 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3451 ;
   3452 ; HASWELL-LABEL: test_phsubw:
   3453 ; HASWELL:       # %bb.0:
   3454 ; HASWELL-NEXT:    phsubw %mm1, %mm0 # sched: [3:2.00]
   3455 ; HASWELL-NEXT:    phsubw (%rdi), %mm0 # sched: [8:2.00]
   3456 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3457 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3458 ;
   3459 ; BROADWELL-LABEL: test_phsubw:
   3460 ; BROADWELL:       # %bb.0:
   3461 ; BROADWELL-NEXT:    phsubw %mm1, %mm0 # sched: [3:2.00]
   3462 ; BROADWELL-NEXT:    phsubw (%rdi), %mm0 # sched: [8:2.00]
   3463 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3464 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3465 ;
   3466 ; SKYLAKE-LABEL: test_phsubw:
   3467 ; SKYLAKE:       # %bb.0:
   3468 ; SKYLAKE-NEXT:    phsubw %mm1, %mm0 # sched: [3:2.00]
   3469 ; SKYLAKE-NEXT:    phsubw (%rdi), %mm0 # sched: [8:2.00]
   3470 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3471 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3472 ;
   3473 ; SKX-LABEL: test_phsubw:
   3474 ; SKX:       # %bb.0:
   3475 ; SKX-NEXT:    phsubw %mm1, %mm0 # sched: [3:2.00]
   3476 ; SKX-NEXT:    phsubw (%rdi), %mm0 # sched: [8:2.00]
   3477 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3478 ; SKX-NEXT:    retq # sched: [7:1.00]
   3479 ;
   3480 ; BTVER2-LABEL: test_phsubw:
   3481 ; BTVER2:       # %bb.0:
   3482 ; BTVER2-NEXT:    phsubw %mm1, %mm0 # sched: [1:0.50]
   3483 ; BTVER2-NEXT:    phsubw (%rdi), %mm0 # sched: [6:1.00]
   3484 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   3485 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3486 ;
   3487 ; ZNVER1-LABEL: test_phsubw:
   3488 ; ZNVER1:       # %bb.0:
   3489 ; ZNVER1-NEXT:    phsubw %mm1, %mm0 # sched: [100:0.25]
   3490 ; ZNVER1-NEXT:    phsubw (%rdi), %mm0 # sched: [100:0.25]
   3491 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3492 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3493   %1 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %a0, x86_mmx %a1)
   3494   %2 = load x86_mmx, x86_mmx *%a2, align 8
   3495   %3 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %1, x86_mmx %2)
   3496   %4 = bitcast x86_mmx %3 to i64
   3497   ret i64 %4
   3498 }
   3499 declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
   3500 
   3501 define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize {
   3502 ; GENERIC-LABEL: test_pinsrw:
   3503 ; GENERIC:       # %bb.0:
   3504 ; GENERIC-NEXT:    pinsrw $0, %edi, %mm0 # sched: [2:1.00]
   3505 ; GENERIC-NEXT:    movswl (%rsi), %eax # sched: [5:0.50]
   3506 ; GENERIC-NEXT:    pinsrw $1, %eax, %mm0 # sched: [2:1.00]
   3507 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3508 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3509 ;
   3510 ; ATOM-LABEL: test_pinsrw:
   3511 ; ATOM:       # %bb.0:
   3512 ; ATOM-NEXT:    pinsrw $0, %edi, %mm0 # sched: [1:1.00]
   3513 ; ATOM-NEXT:    movswl (%rsi), %eax # sched: [1:1.00]
   3514 ; ATOM-NEXT:    pinsrw $1, %eax, %mm0 # sched: [1:1.00]
   3515 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   3516 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3517 ;
   3518 ; SLM-LABEL: test_pinsrw:
   3519 ; SLM:       # %bb.0:
   3520 ; SLM-NEXT:    movswl (%rsi), %eax # sched: [4:1.00]
   3521 ; SLM-NEXT:    pinsrw $0, %edi, %mm0 # sched: [1:1.00]
   3522 ; SLM-NEXT:    pinsrw $1, %eax, %mm0 # sched: [1:1.00]
   3523 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   3524 ; SLM-NEXT:    retq # sched: [4:1.00]
   3525 ;
   3526 ; SANDY-LABEL: test_pinsrw:
   3527 ; SANDY:       # %bb.0:
   3528 ; SANDY-NEXT:    pinsrw $0, %edi, %mm0 # sched: [2:1.00]
   3529 ; SANDY-NEXT:    movswl (%rsi), %eax # sched: [5:0.50]
   3530 ; SANDY-NEXT:    pinsrw $1, %eax, %mm0 # sched: [2:1.00]
   3531 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3532 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3533 ;
   3534 ; HASWELL-LABEL: test_pinsrw:
   3535 ; HASWELL:       # %bb.0:
   3536 ; HASWELL-NEXT:    pinsrw $0, %edi, %mm0 # sched: [2:2.00]
   3537 ; HASWELL-NEXT:    movswl (%rsi), %eax # sched: [5:0.50]
   3538 ; HASWELL-NEXT:    pinsrw $1, %eax, %mm0 # sched: [2:2.00]
   3539 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3540 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3541 ;
   3542 ; BROADWELL-LABEL: test_pinsrw:
   3543 ; BROADWELL:       # %bb.0:
   3544 ; BROADWELL-NEXT:    pinsrw $0, %edi, %mm0 # sched: [2:2.00]
   3545 ; BROADWELL-NEXT:    movswl (%rsi), %eax # sched: [5:0.50]
   3546 ; BROADWELL-NEXT:    pinsrw $1, %eax, %mm0 # sched: [2:2.00]
   3547 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3548 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3549 ;
   3550 ; SKYLAKE-LABEL: test_pinsrw:
   3551 ; SKYLAKE:       # %bb.0:
   3552 ; SKYLAKE-NEXT:    pinsrw $0, %edi, %mm0 # sched: [2:2.00]
   3553 ; SKYLAKE-NEXT:    movswl (%rsi), %eax # sched: [5:0.50]
   3554 ; SKYLAKE-NEXT:    pinsrw $1, %eax, %mm0 # sched: [2:2.00]
   3555 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3556 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3557 ;
   3558 ; SKX-LABEL: test_pinsrw:
   3559 ; SKX:       # %bb.0:
   3560 ; SKX-NEXT:    pinsrw $0, %edi, %mm0 # sched: [2:2.00]
   3561 ; SKX-NEXT:    movswl (%rsi), %eax # sched: [5:0.50]
   3562 ; SKX-NEXT:    pinsrw $1, %eax, %mm0 # sched: [2:2.00]
   3563 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3564 ; SKX-NEXT:    retq # sched: [7:1.00]
   3565 ;
   3566 ; BTVER2-LABEL: test_pinsrw:
   3567 ; BTVER2:       # %bb.0:
   3568 ; BTVER2-NEXT:    pinsrw $0, %edi, %mm0 # sched: [7:0.50]
   3569 ; BTVER2-NEXT:    movswl (%rsi), %eax # sched: [4:1.00]
   3570 ; BTVER2-NEXT:    pinsrw $1, %eax, %mm0 # sched: [7:0.50]
   3571 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   3572 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3573 ;
   3574 ; ZNVER1-LABEL: test_pinsrw:
   3575 ; ZNVER1:       # %bb.0:
   3576 ; ZNVER1-NEXT:    movswl (%rsi), %eax # sched: [8:0.50]
   3577 ; ZNVER1-NEXT:    pinsrw $0, %edi, %mm0 # sched: [1:0.25]
   3578 ; ZNVER1-NEXT:    pinsrw $1, %eax, %mm0 # sched: [1:0.25]
   3579 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3580 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3581   %1 = call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %a0, i32 %a1, i32 0)
   3582   %2 = load i16, i16 *%a2, align 2
   3583   %3 = sext i16 %2 to i32
   3584   %4 = call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %1, i32 %3, i32 1)
   3585   %5 = bitcast x86_mmx %4 to i64
   3586   ret i64 %5
   3587 }
   3588 declare x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx, i32, i32) nounwind readnone
   3589 
   3590 define i64 @test_pmaddwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   3591 ; GENERIC-LABEL: test_pmaddwd:
   3592 ; GENERIC:       # %bb.0:
   3593 ; GENERIC-NEXT:    pmaddwd %mm1, %mm0 # sched: [5:1.00]
   3594 ; GENERIC-NEXT:    pmaddwd (%rdi), %mm0 # sched: [10:1.00]
   3595 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3596 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3597 ;
   3598 ; ATOM-LABEL: test_pmaddwd:
   3599 ; ATOM:       # %bb.0:
   3600 ; ATOM-NEXT:    pmaddwd %mm1, %mm0 # sched: [4:4.00]
   3601 ; ATOM-NEXT:    pmaddwd (%rdi), %mm0 # sched: [4:4.00]
   3602 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   3603 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3604 ;
   3605 ; SLM-LABEL: test_pmaddwd:
   3606 ; SLM:       # %bb.0:
   3607 ; SLM-NEXT:    pmaddwd %mm1, %mm0 # sched: [4:1.00]
   3608 ; SLM-NEXT:    pmaddwd (%rdi), %mm0 # sched: [7:1.00]
   3609 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   3610 ; SLM-NEXT:    retq # sched: [4:1.00]
   3611 ;
   3612 ; SANDY-LABEL: test_pmaddwd:
   3613 ; SANDY:       # %bb.0:
   3614 ; SANDY-NEXT:    pmaddwd %mm1, %mm0 # sched: [5:1.00]
   3615 ; SANDY-NEXT:    pmaddwd (%rdi), %mm0 # sched: [10:1.00]
   3616 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3617 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3618 ;
   3619 ; HASWELL-LABEL: test_pmaddwd:
   3620 ; HASWELL:       # %bb.0:
   3621 ; HASWELL-NEXT:    pmaddwd %mm1, %mm0 # sched: [5:1.00]
   3622 ; HASWELL-NEXT:    pmaddwd (%rdi), %mm0 # sched: [10:1.00]
   3623 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3624 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3625 ;
   3626 ; BROADWELL-LABEL: test_pmaddwd:
   3627 ; BROADWELL:       # %bb.0:
   3628 ; BROADWELL-NEXT:    pmaddwd %mm1, %mm0 # sched: [5:1.00]
   3629 ; BROADWELL-NEXT:    pmaddwd (%rdi), %mm0 # sched: [10:1.00]
   3630 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3631 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3632 ;
   3633 ; SKYLAKE-LABEL: test_pmaddwd:
   3634 ; SKYLAKE:       # %bb.0:
   3635 ; SKYLAKE-NEXT:    pmaddwd %mm1, %mm0 # sched: [4:1.00]
   3636 ; SKYLAKE-NEXT:    pmaddwd (%rdi), %mm0 # sched: [9:1.00]
   3637 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3638 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3639 ;
   3640 ; SKX-LABEL: test_pmaddwd:
   3641 ; SKX:       # %bb.0:
   3642 ; SKX-NEXT:    pmaddwd %mm1, %mm0 # sched: [4:1.00]
   3643 ; SKX-NEXT:    pmaddwd (%rdi), %mm0 # sched: [9:1.00]
   3644 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3645 ; SKX-NEXT:    retq # sched: [7:1.00]
   3646 ;
   3647 ; BTVER2-LABEL: test_pmaddwd:
   3648 ; BTVER2:       # %bb.0:
   3649 ; BTVER2-NEXT:    pmaddwd %mm1, %mm0 # sched: [2:1.00]
   3650 ; BTVER2-NEXT:    pmaddwd (%rdi), %mm0 # sched: [7:1.00]
   3651 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   3652 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3653 ;
   3654 ; ZNVER1-LABEL: test_pmaddwd:
   3655 ; ZNVER1:       # %bb.0:
   3656 ; ZNVER1-NEXT:    pmaddwd %mm1, %mm0 # sched: [4:1.00]
   3657 ; ZNVER1-NEXT:    pmaddwd (%rdi), %mm0 # sched: [11:1.00]
   3658 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3659 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3660   %1 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %a0, x86_mmx %a1)
   3661   %2 = load x86_mmx, x86_mmx *%a2, align 8
   3662   %3 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %1, x86_mmx %2)
   3663   %4 = bitcast x86_mmx %3 to i64
   3664   ret i64 %4
   3665 }
   3666 declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
   3667 
   3668 define i64 @test_pmaddubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   3669 ; GENERIC-LABEL: test_pmaddubsw:
   3670 ; GENERIC:       # %bb.0:
   3671 ; GENERIC-NEXT:    pmaddubsw %mm1, %mm0 # sched: [5:1.00]
   3672 ; GENERIC-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [10:1.00]
   3673 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3674 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3675 ;
   3676 ; ATOM-LABEL: test_pmaddubsw:
   3677 ; ATOM:       # %bb.0:
   3678 ; ATOM-NEXT:    pmaddubsw %mm1, %mm0 # sched: [4:4.00]
   3679 ; ATOM-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [4:4.00]
   3680 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   3681 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3682 ;
   3683 ; SLM-LABEL: test_pmaddubsw:
   3684 ; SLM:       # %bb.0:
   3685 ; SLM-NEXT:    pmaddubsw %mm1, %mm0 # sched: [4:1.00]
   3686 ; SLM-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [7:1.00]
   3687 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   3688 ; SLM-NEXT:    retq # sched: [4:1.00]
   3689 ;
   3690 ; SANDY-LABEL: test_pmaddubsw:
   3691 ; SANDY:       # %bb.0:
   3692 ; SANDY-NEXT:    pmaddubsw %mm1, %mm0 # sched: [5:1.00]
   3693 ; SANDY-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [10:1.00]
   3694 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3695 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3696 ;
   3697 ; HASWELL-LABEL: test_pmaddubsw:
   3698 ; HASWELL:       # %bb.0:
   3699 ; HASWELL-NEXT:    pmaddubsw %mm1, %mm0 # sched: [5:1.00]
   3700 ; HASWELL-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [10:1.00]
   3701 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3702 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3703 ;
   3704 ; BROADWELL-LABEL: test_pmaddubsw:
   3705 ; BROADWELL:       # %bb.0:
   3706 ; BROADWELL-NEXT:    pmaddubsw %mm1, %mm0 # sched: [5:1.00]
   3707 ; BROADWELL-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [10:1.00]
   3708 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3709 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3710 ;
   3711 ; SKYLAKE-LABEL: test_pmaddubsw:
   3712 ; SKYLAKE:       # %bb.0:
   3713 ; SKYLAKE-NEXT:    pmaddubsw %mm1, %mm0 # sched: [4:1.00]
   3714 ; SKYLAKE-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [9:1.00]
   3715 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3716 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3717 ;
   3718 ; SKX-LABEL: test_pmaddubsw:
   3719 ; SKX:       # %bb.0:
   3720 ; SKX-NEXT:    pmaddubsw %mm1, %mm0 # sched: [4:1.00]
   3721 ; SKX-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [9:1.00]
   3722 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3723 ; SKX-NEXT:    retq # sched: [7:1.00]
   3724 ;
   3725 ; BTVER2-LABEL: test_pmaddubsw:
   3726 ; BTVER2:       # %bb.0:
   3727 ; BTVER2-NEXT:    pmaddubsw %mm1, %mm0 # sched: [2:1.00]
   3728 ; BTVER2-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [7:1.00]
   3729 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   3730 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3731 ;
   3732 ; ZNVER1-LABEL: test_pmaddubsw:
   3733 ; ZNVER1:       # %bb.0:
   3734 ; ZNVER1-NEXT:    pmaddubsw %mm1, %mm0 # sched: [4:1.00]
   3735 ; ZNVER1-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [11:1.00]
   3736 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3737 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3738   %1 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a0, x86_mmx %a1)
   3739   %2 = load x86_mmx, x86_mmx *%a2, align 8
   3740   %3 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %1, x86_mmx %2)
   3741   %4 = bitcast x86_mmx %3 to i64
   3742   ret i64 %4
   3743 }
   3744 declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
   3745 
   3746 define i64 @test_pmaxsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   3747 ; GENERIC-LABEL: test_pmaxsw:
   3748 ; GENERIC:       # %bb.0:
   3749 ; GENERIC-NEXT:    pmaxsw %mm1, %mm0 # sched: [3:1.00]
   3750 ; GENERIC-NEXT:    pmaxsw (%rdi), %mm0 # sched: [8:1.00]
   3751 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3752 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3753 ;
   3754 ; ATOM-LABEL: test_pmaxsw:
   3755 ; ATOM:       # %bb.0:
   3756 ; ATOM-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:0.50]
   3757 ; ATOM-NEXT:    pmaxsw (%rdi), %mm0 # sched: [1:1.00]
   3758 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   3759 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3760 ;
   3761 ; SLM-LABEL: test_pmaxsw:
   3762 ; SLM:       # %bb.0:
   3763 ; SLM-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:0.50]
   3764 ; SLM-NEXT:    pmaxsw (%rdi), %mm0 # sched: [4:1.00]
   3765 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   3766 ; SLM-NEXT:    retq # sched: [4:1.00]
   3767 ;
   3768 ; SANDY-LABEL: test_pmaxsw:
   3769 ; SANDY:       # %bb.0:
   3770 ; SANDY-NEXT:    pmaxsw %mm1, %mm0 # sched: [3:1.00]
   3771 ; SANDY-NEXT:    pmaxsw (%rdi), %mm0 # sched: [8:1.00]
   3772 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3773 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3774 ;
   3775 ; HASWELL-LABEL: test_pmaxsw:
   3776 ; HASWELL:       # %bb.0:
   3777 ; HASWELL-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:0.50]
   3778 ; HASWELL-NEXT:    pmaxsw (%rdi), %mm0 # sched: [6:0.50]
   3779 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3780 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3781 ;
   3782 ; BROADWELL-LABEL: test_pmaxsw:
   3783 ; BROADWELL:       # %bb.0:
   3784 ; BROADWELL-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:0.50]
   3785 ; BROADWELL-NEXT:    pmaxsw (%rdi), %mm0 # sched: [6:0.50]
   3786 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3787 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3788 ;
   3789 ; SKYLAKE-LABEL: test_pmaxsw:
   3790 ; SKYLAKE:       # %bb.0:
   3791 ; SKYLAKE-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:1.00]
   3792 ; SKYLAKE-NEXT:    pmaxsw (%rdi), %mm0 # sched: [6:1.00]
   3793 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3794 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3795 ;
   3796 ; SKX-LABEL: test_pmaxsw:
   3797 ; SKX:       # %bb.0:
   3798 ; SKX-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:1.00]
   3799 ; SKX-NEXT:    pmaxsw (%rdi), %mm0 # sched: [6:1.00]
   3800 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3801 ; SKX-NEXT:    retq # sched: [7:1.00]
   3802 ;
   3803 ; BTVER2-LABEL: test_pmaxsw:
   3804 ; BTVER2:       # %bb.0:
   3805 ; BTVER2-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:0.50]
   3806 ; BTVER2-NEXT:    pmaxsw (%rdi), %mm0 # sched: [6:1.00]
   3807 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   3808 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3809 ;
   3810 ; ZNVER1-LABEL: test_pmaxsw:
   3811 ; ZNVER1:       # %bb.0:
   3812 ; ZNVER1-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:0.25]
   3813 ; ZNVER1-NEXT:    pmaxsw (%rdi), %mm0 # sched: [8:0.50]
   3814 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3815 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3816   %1 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %a0, x86_mmx %a1)
   3817   %2 = load x86_mmx, x86_mmx *%a2, align 8
   3818   %3 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %1, x86_mmx %2)
   3819   %4 = bitcast x86_mmx %3 to i64
   3820   ret i64 %4
   3821 }
   3822 declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
   3823 
   3824 define i64 @test_pmaxub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   3825 ; GENERIC-LABEL: test_pmaxub:
   3826 ; GENERIC:       # %bb.0:
   3827 ; GENERIC-NEXT:    pmaxub %mm1, %mm0 # sched: [3:1.00]
   3828 ; GENERIC-NEXT:    pmaxub (%rdi), %mm0 # sched: [8:1.00]
   3829 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3830 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3831 ;
   3832 ; ATOM-LABEL: test_pmaxub:
   3833 ; ATOM:       # %bb.0:
   3834 ; ATOM-NEXT:    pmaxub %mm1, %mm0 # sched: [1:0.50]
   3835 ; ATOM-NEXT:    pmaxub (%rdi), %mm0 # sched: [1:1.00]
   3836 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   3837 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3838 ;
   3839 ; SLM-LABEL: test_pmaxub:
   3840 ; SLM:       # %bb.0:
   3841 ; SLM-NEXT:    pmaxub %mm1, %mm0 # sched: [1:0.50]
   3842 ; SLM-NEXT:    pmaxub (%rdi), %mm0 # sched: [4:1.00]
   3843 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   3844 ; SLM-NEXT:    retq # sched: [4:1.00]
   3845 ;
   3846 ; SANDY-LABEL: test_pmaxub:
   3847 ; SANDY:       # %bb.0:
   3848 ; SANDY-NEXT:    pmaxub %mm1, %mm0 # sched: [3:1.00]
   3849 ; SANDY-NEXT:    pmaxub (%rdi), %mm0 # sched: [8:1.00]
   3850 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3851 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3852 ;
   3853 ; HASWELL-LABEL: test_pmaxub:
   3854 ; HASWELL:       # %bb.0:
   3855 ; HASWELL-NEXT:    pmaxub %mm1, %mm0 # sched: [1:0.50]
   3856 ; HASWELL-NEXT:    pmaxub (%rdi), %mm0 # sched: [6:0.50]
   3857 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3858 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3859 ;
   3860 ; BROADWELL-LABEL: test_pmaxub:
   3861 ; BROADWELL:       # %bb.0:
   3862 ; BROADWELL-NEXT:    pmaxub %mm1, %mm0 # sched: [1:0.50]
   3863 ; BROADWELL-NEXT:    pmaxub (%rdi), %mm0 # sched: [6:0.50]
   3864 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3865 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3866 ;
   3867 ; SKYLAKE-LABEL: test_pmaxub:
   3868 ; SKYLAKE:       # %bb.0:
   3869 ; SKYLAKE-NEXT:    pmaxub %mm1, %mm0 # sched: [1:1.00]
   3870 ; SKYLAKE-NEXT:    pmaxub (%rdi), %mm0 # sched: [6:1.00]
   3871 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3872 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3873 ;
   3874 ; SKX-LABEL: test_pmaxub:
   3875 ; SKX:       # %bb.0:
   3876 ; SKX-NEXT:    pmaxub %mm1, %mm0 # sched: [1:1.00]
   3877 ; SKX-NEXT:    pmaxub (%rdi), %mm0 # sched: [6:1.00]
   3878 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3879 ; SKX-NEXT:    retq # sched: [7:1.00]
   3880 ;
   3881 ; BTVER2-LABEL: test_pmaxub:
   3882 ; BTVER2:       # %bb.0:
   3883 ; BTVER2-NEXT:    pmaxub %mm1, %mm0 # sched: [1:0.50]
   3884 ; BTVER2-NEXT:    pmaxub (%rdi), %mm0 # sched: [6:1.00]
   3885 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   3886 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3887 ;
   3888 ; ZNVER1-LABEL: test_pmaxub:
   3889 ; ZNVER1:       # %bb.0:
   3890 ; ZNVER1-NEXT:    pmaxub %mm1, %mm0 # sched: [1:0.25]
   3891 ; ZNVER1-NEXT:    pmaxub (%rdi), %mm0 # sched: [8:0.50]
   3892 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3893 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3894   %1 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %a0, x86_mmx %a1)
   3895   %2 = load x86_mmx, x86_mmx *%a2, align 8
   3896   %3 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %1, x86_mmx %2)
   3897   %4 = bitcast x86_mmx %3 to i64
   3898   ret i64 %4
   3899 }
   3900 declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
   3901 
   3902 define i64 @test_pminsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   3903 ; GENERIC-LABEL: test_pminsw:
   3904 ; GENERIC:       # %bb.0:
   3905 ; GENERIC-NEXT:    pminsw %mm1, %mm0 # sched: [3:1.00]
   3906 ; GENERIC-NEXT:    pminsw (%rdi), %mm0 # sched: [8:1.00]
   3907 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3908 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3909 ;
   3910 ; ATOM-LABEL: test_pminsw:
   3911 ; ATOM:       # %bb.0:
   3912 ; ATOM-NEXT:    pminsw %mm1, %mm0 # sched: [1:0.50]
   3913 ; ATOM-NEXT:    pminsw (%rdi), %mm0 # sched: [1:1.00]
   3914 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   3915 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3916 ;
   3917 ; SLM-LABEL: test_pminsw:
   3918 ; SLM:       # %bb.0:
   3919 ; SLM-NEXT:    pminsw %mm1, %mm0 # sched: [1:0.50]
   3920 ; SLM-NEXT:    pminsw (%rdi), %mm0 # sched: [4:1.00]
   3921 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   3922 ; SLM-NEXT:    retq # sched: [4:1.00]
   3923 ;
   3924 ; SANDY-LABEL: test_pminsw:
   3925 ; SANDY:       # %bb.0:
   3926 ; SANDY-NEXT:    pminsw %mm1, %mm0 # sched: [3:1.00]
   3927 ; SANDY-NEXT:    pminsw (%rdi), %mm0 # sched: [8:1.00]
   3928 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3929 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3930 ;
   3931 ; HASWELL-LABEL: test_pminsw:
   3932 ; HASWELL:       # %bb.0:
   3933 ; HASWELL-NEXT:    pminsw %mm1, %mm0 # sched: [1:0.50]
   3934 ; HASWELL-NEXT:    pminsw (%rdi), %mm0 # sched: [6:0.50]
   3935 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3936 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3937 ;
   3938 ; BROADWELL-LABEL: test_pminsw:
   3939 ; BROADWELL:       # %bb.0:
   3940 ; BROADWELL-NEXT:    pminsw %mm1, %mm0 # sched: [1:0.50]
   3941 ; BROADWELL-NEXT:    pminsw (%rdi), %mm0 # sched: [6:0.50]
   3942 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   3943 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3944 ;
   3945 ; SKYLAKE-LABEL: test_pminsw:
   3946 ; SKYLAKE:       # %bb.0:
   3947 ; SKYLAKE-NEXT:    pminsw %mm1, %mm0 # sched: [1:1.00]
   3948 ; SKYLAKE-NEXT:    pminsw (%rdi), %mm0 # sched: [6:1.00]
   3949 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3950 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3951 ;
   3952 ; SKX-LABEL: test_pminsw:
   3953 ; SKX:       # %bb.0:
   3954 ; SKX-NEXT:    pminsw %mm1, %mm0 # sched: [1:1.00]
   3955 ; SKX-NEXT:    pminsw (%rdi), %mm0 # sched: [6:1.00]
   3956 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3957 ; SKX-NEXT:    retq # sched: [7:1.00]
   3958 ;
   3959 ; BTVER2-LABEL: test_pminsw:
   3960 ; BTVER2:       # %bb.0:
   3961 ; BTVER2-NEXT:    pminsw %mm1, %mm0 # sched: [1:0.50]
   3962 ; BTVER2-NEXT:    pminsw (%rdi), %mm0 # sched: [6:1.00]
   3963 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   3964 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3965 ;
   3966 ; ZNVER1-LABEL: test_pminsw:
   3967 ; ZNVER1:       # %bb.0:
   3968 ; ZNVER1-NEXT:    pminsw %mm1, %mm0 # sched: [1:0.25]
   3969 ; ZNVER1-NEXT:    pminsw (%rdi), %mm0 # sched: [8:0.50]
   3970 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3971 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3972   %1 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %a0, x86_mmx %a1)
   3973   %2 = load x86_mmx, x86_mmx *%a2, align 8
   3974   %3 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %1, x86_mmx %2)
   3975   %4 = bitcast x86_mmx %3 to i64
   3976   ret i64 %4
   3977 }
   3978 declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
   3979 
   3980 define i64 @test_pminub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   3981 ; GENERIC-LABEL: test_pminub:
   3982 ; GENERIC:       # %bb.0:
   3983 ; GENERIC-NEXT:    pminub %mm1, %mm0 # sched: [3:1.00]
   3984 ; GENERIC-NEXT:    pminub (%rdi), %mm0 # sched: [8:1.00]
   3985 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   3986 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3987 ;
   3988 ; ATOM-LABEL: test_pminub:
   3989 ; ATOM:       # %bb.0:
   3990 ; ATOM-NEXT:    pminub %mm1, %mm0 # sched: [1:0.50]
   3991 ; ATOM-NEXT:    pminub (%rdi), %mm0 # sched: [1:1.00]
   3992 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   3993 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3994 ;
   3995 ; SLM-LABEL: test_pminub:
   3996 ; SLM:       # %bb.0:
   3997 ; SLM-NEXT:    pminub %mm1, %mm0 # sched: [1:0.50]
   3998 ; SLM-NEXT:    pminub (%rdi), %mm0 # sched: [4:1.00]
   3999 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   4000 ; SLM-NEXT:    retq # sched: [4:1.00]
   4001 ;
   4002 ; SANDY-LABEL: test_pminub:
   4003 ; SANDY:       # %bb.0:
   4004 ; SANDY-NEXT:    pminub %mm1, %mm0 # sched: [3:1.00]
   4005 ; SANDY-NEXT:    pminub (%rdi), %mm0 # sched: [8:1.00]
   4006 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4007 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4008 ;
   4009 ; HASWELL-LABEL: test_pminub:
   4010 ; HASWELL:       # %bb.0:
   4011 ; HASWELL-NEXT:    pminub %mm1, %mm0 # sched: [1:0.50]
   4012 ; HASWELL-NEXT:    pminub (%rdi), %mm0 # sched: [6:0.50]
   4013 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4014 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4015 ;
   4016 ; BROADWELL-LABEL: test_pminub:
   4017 ; BROADWELL:       # %bb.0:
   4018 ; BROADWELL-NEXT:    pminub %mm1, %mm0 # sched: [1:0.50]
   4019 ; BROADWELL-NEXT:    pminub (%rdi), %mm0 # sched: [6:0.50]
   4020 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4021 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4022 ;
   4023 ; SKYLAKE-LABEL: test_pminub:
   4024 ; SKYLAKE:       # %bb.0:
   4025 ; SKYLAKE-NEXT:    pminub %mm1, %mm0 # sched: [1:1.00]
   4026 ; SKYLAKE-NEXT:    pminub (%rdi), %mm0 # sched: [6:1.00]
   4027 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4028 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4029 ;
   4030 ; SKX-LABEL: test_pminub:
   4031 ; SKX:       # %bb.0:
   4032 ; SKX-NEXT:    pminub %mm1, %mm0 # sched: [1:1.00]
   4033 ; SKX-NEXT:    pminub (%rdi), %mm0 # sched: [6:1.00]
   4034 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4035 ; SKX-NEXT:    retq # sched: [7:1.00]
   4036 ;
   4037 ; BTVER2-LABEL: test_pminub:
   4038 ; BTVER2:       # %bb.0:
   4039 ; BTVER2-NEXT:    pminub %mm1, %mm0 # sched: [1:0.50]
   4040 ; BTVER2-NEXT:    pminub (%rdi), %mm0 # sched: [6:1.00]
   4041 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   4042 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4043 ;
   4044 ; ZNVER1-LABEL: test_pminub:
   4045 ; ZNVER1:       # %bb.0:
   4046 ; ZNVER1-NEXT:    pminub %mm1, %mm0 # sched: [1:0.25]
   4047 ; ZNVER1-NEXT:    pminub (%rdi), %mm0 # sched: [8:0.50]
   4048 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4049 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4050   %1 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %a0, x86_mmx %a1)
   4051   %2 = load x86_mmx, x86_mmx *%a2, align 8
   4052   %3 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %1, x86_mmx %2)
   4053   %4 = bitcast x86_mmx %3 to i64
   4054   ret i64 %4
   4055 }
   4056 declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
   4057 
   4058 define i32 @test_pmovmskb(x86_mmx %a0) optsize {
   4059 ; GENERIC-LABEL: test_pmovmskb:
   4060 ; GENERIC:       # %bb.0:
   4061 ; GENERIC-NEXT:    pmovmskb %mm0, %eax # sched: [1:1.00]
   4062 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4063 ;
   4064 ; ATOM-LABEL: test_pmovmskb:
   4065 ; ATOM:       # %bb.0:
   4066 ; ATOM-NEXT:    pmovmskb %mm0, %eax # sched: [3:3.00]
   4067 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4068 ;
   4069 ; SLM-LABEL: test_pmovmskb:
   4070 ; SLM:       # %bb.0:
   4071 ; SLM-NEXT:    pmovmskb %mm0, %eax # sched: [4:1.00]
   4072 ; SLM-NEXT:    retq # sched: [4:1.00]
   4073 ;
   4074 ; SANDY-LABEL: test_pmovmskb:
   4075 ; SANDY:       # %bb.0:
   4076 ; SANDY-NEXT:    pmovmskb %mm0, %eax # sched: [1:1.00]
   4077 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4078 ;
   4079 ; HASWELL-LABEL: test_pmovmskb:
   4080 ; HASWELL:       # %bb.0:
   4081 ; HASWELL-NEXT:    pmovmskb %mm0, %eax # sched: [1:1.00]
   4082 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4083 ;
   4084 ; BROADWELL-LABEL: test_pmovmskb:
   4085 ; BROADWELL:       # %bb.0:
   4086 ; BROADWELL-NEXT:    pmovmskb %mm0, %eax # sched: [1:1.00]
   4087 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4088 ;
   4089 ; SKYLAKE-LABEL: test_pmovmskb:
   4090 ; SKYLAKE:       # %bb.0:
   4091 ; SKYLAKE-NEXT:    pmovmskb %mm0, %eax # sched: [2:1.00]
   4092 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4093 ;
   4094 ; SKX-LABEL: test_pmovmskb:
   4095 ; SKX:       # %bb.0:
   4096 ; SKX-NEXT:    pmovmskb %mm0, %eax # sched: [2:1.00]
   4097 ; SKX-NEXT:    retq # sched: [7:1.00]
   4098 ;
   4099 ; BTVER2-LABEL: test_pmovmskb:
   4100 ; BTVER2:       # %bb.0:
   4101 ; BTVER2-NEXT:    pmovmskb %mm0, %eax # sched: [3:1.00]
   4102 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4103 ;
   4104 ; ZNVER1-LABEL: test_pmovmskb:
   4105 ; ZNVER1:       # %bb.0:
   4106 ; ZNVER1-NEXT:    pmovmskb %mm0, %eax # sched: [1:1.00]
   4107 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4108   %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
   4109   ret i32 %1
   4110 }
   4111 declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
   4112 
   4113 define i64 @test_pmulhrsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   4114 ; GENERIC-LABEL: test_pmulhrsw:
   4115 ; GENERIC:       # %bb.0:
   4116 ; GENERIC-NEXT:    pmulhrsw %mm1, %mm0 # sched: [5:1.00]
   4117 ; GENERIC-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [10:1.00]
   4118 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4119 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4120 ;
   4121 ; ATOM-LABEL: test_pmulhrsw:
   4122 ; ATOM:       # %bb.0:
   4123 ; ATOM-NEXT:    pmulhrsw %mm1, %mm0 # sched: [4:4.00]
   4124 ; ATOM-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [4:4.00]
   4125 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   4126 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4127 ;
   4128 ; SLM-LABEL: test_pmulhrsw:
   4129 ; SLM:       # %bb.0:
   4130 ; SLM-NEXT:    pmulhrsw %mm1, %mm0 # sched: [4:1.00]
   4131 ; SLM-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [7:1.00]
   4132 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   4133 ; SLM-NEXT:    retq # sched: [4:1.00]
   4134 ;
   4135 ; SANDY-LABEL: test_pmulhrsw:
   4136 ; SANDY:       # %bb.0:
   4137 ; SANDY-NEXT:    pmulhrsw %mm1, %mm0 # sched: [5:1.00]
   4138 ; SANDY-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [10:1.00]
   4139 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4140 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4141 ;
   4142 ; HASWELL-LABEL: test_pmulhrsw:
   4143 ; HASWELL:       # %bb.0:
   4144 ; HASWELL-NEXT:    pmulhrsw %mm1, %mm0 # sched: [5:1.00]
   4145 ; HASWELL-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [10:1.00]
   4146 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4147 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4148 ;
   4149 ; BROADWELL-LABEL: test_pmulhrsw:
   4150 ; BROADWELL:       # %bb.0:
   4151 ; BROADWELL-NEXT:    pmulhrsw %mm1, %mm0 # sched: [5:1.00]
   4152 ; BROADWELL-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [10:1.00]
   4153 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4154 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4155 ;
   4156 ; SKYLAKE-LABEL: test_pmulhrsw:
   4157 ; SKYLAKE:       # %bb.0:
   4158 ; SKYLAKE-NEXT:    pmulhrsw %mm1, %mm0 # sched: [4:1.00]
   4159 ; SKYLAKE-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [9:1.00]
   4160 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4161 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4162 ;
   4163 ; SKX-LABEL: test_pmulhrsw:
   4164 ; SKX:       # %bb.0:
   4165 ; SKX-NEXT:    pmulhrsw %mm1, %mm0 # sched: [4:1.00]
   4166 ; SKX-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [9:1.00]
   4167 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4168 ; SKX-NEXT:    retq # sched: [7:1.00]
   4169 ;
   4170 ; BTVER2-LABEL: test_pmulhrsw:
   4171 ; BTVER2:       # %bb.0:
   4172 ; BTVER2-NEXT:    pmulhrsw %mm1, %mm0 # sched: [2:1.00]
   4173 ; BTVER2-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [7:1.00]
   4174 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   4175 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4176 ;
   4177 ; ZNVER1-LABEL: test_pmulhrsw:
   4178 ; ZNVER1:       # %bb.0:
   4179 ; ZNVER1-NEXT:    pmulhrsw %mm1, %mm0 # sched: [4:1.00]
   4180 ; ZNVER1-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [11:1.00]
   4181 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4182 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4183   %1 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %a0, x86_mmx %a1)
   4184   %2 = load x86_mmx, x86_mmx *%a2, align 8
   4185   %3 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %1, x86_mmx %2)
   4186   %4 = bitcast x86_mmx %3 to i64
   4187   ret i64 %4
   4188 }
   4189 declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
   4190 
   4191 define i64 @test_pmulhw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   4192 ; GENERIC-LABEL: test_pmulhw:
   4193 ; GENERIC:       # %bb.0:
   4194 ; GENERIC-NEXT:    pmulhw %mm1, %mm0 # sched: [5:1.00]
   4195 ; GENERIC-NEXT:    pmulhw (%rdi), %mm0 # sched: [10:1.00]
   4196 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4197 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4198 ;
   4199 ; ATOM-LABEL: test_pmulhw:
   4200 ; ATOM:       # %bb.0:
   4201 ; ATOM-NEXT:    pmulhw %mm1, %mm0 # sched: [4:4.00]
   4202 ; ATOM-NEXT:    pmulhw (%rdi), %mm0 # sched: [4:4.00]
   4203 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   4204 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4205 ;
   4206 ; SLM-LABEL: test_pmulhw:
   4207 ; SLM:       # %bb.0:
   4208 ; SLM-NEXT:    pmulhw %mm1, %mm0 # sched: [4:1.00]
   4209 ; SLM-NEXT:    pmulhw (%rdi), %mm0 # sched: [7:1.00]
   4210 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   4211 ; SLM-NEXT:    retq # sched: [4:1.00]
   4212 ;
   4213 ; SANDY-LABEL: test_pmulhw:
   4214 ; SANDY:       # %bb.0:
   4215 ; SANDY-NEXT:    pmulhw %mm1, %mm0 # sched: [5:1.00]
   4216 ; SANDY-NEXT:    pmulhw (%rdi), %mm0 # sched: [10:1.00]
   4217 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4218 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4219 ;
   4220 ; HASWELL-LABEL: test_pmulhw:
   4221 ; HASWELL:       # %bb.0:
   4222 ; HASWELL-NEXT:    pmulhw %mm1, %mm0 # sched: [5:1.00]
   4223 ; HASWELL-NEXT:    pmulhw (%rdi), %mm0 # sched: [10:1.00]
   4224 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4225 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4226 ;
   4227 ; BROADWELL-LABEL: test_pmulhw:
   4228 ; BROADWELL:       # %bb.0:
   4229 ; BROADWELL-NEXT:    pmulhw %mm1, %mm0 # sched: [5:1.00]
   4230 ; BROADWELL-NEXT:    pmulhw (%rdi), %mm0 # sched: [10:1.00]
   4231 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4232 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4233 ;
   4234 ; SKYLAKE-LABEL: test_pmulhw:
   4235 ; SKYLAKE:       # %bb.0:
   4236 ; SKYLAKE-NEXT:    pmulhw %mm1, %mm0 # sched: [4:1.00]
   4237 ; SKYLAKE-NEXT:    pmulhw (%rdi), %mm0 # sched: [9:1.00]
   4238 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4239 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4240 ;
   4241 ; SKX-LABEL: test_pmulhw:
   4242 ; SKX:       # %bb.0:
   4243 ; SKX-NEXT:    pmulhw %mm1, %mm0 # sched: [4:1.00]
   4244 ; SKX-NEXT:    pmulhw (%rdi), %mm0 # sched: [9:1.00]
   4245 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4246 ; SKX-NEXT:    retq # sched: [7:1.00]
   4247 ;
   4248 ; BTVER2-LABEL: test_pmulhw:
   4249 ; BTVER2:       # %bb.0:
   4250 ; BTVER2-NEXT:    pmulhw %mm1, %mm0 # sched: [2:1.00]
   4251 ; BTVER2-NEXT:    pmulhw (%rdi), %mm0 # sched: [7:1.00]
   4252 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   4253 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4254 ;
   4255 ; ZNVER1-LABEL: test_pmulhw:
   4256 ; ZNVER1:       # %bb.0:
   4257 ; ZNVER1-NEXT:    pmulhw %mm1, %mm0 # sched: [4:1.00]
   4258 ; ZNVER1-NEXT:    pmulhw (%rdi), %mm0 # sched: [11:1.00]
   4259 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4260 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4261   %1 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %a0, x86_mmx %a1)
   4262   %2 = load x86_mmx, x86_mmx *%a2, align 8
   4263   %3 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %1, x86_mmx %2)
   4264   %4 = bitcast x86_mmx %3 to i64
   4265   ret i64 %4
   4266 }
   4267 declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
   4268 
   4269 define i64 @test_pmulhuw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   4270 ; GENERIC-LABEL: test_pmulhuw:
   4271 ; GENERIC:       # %bb.0:
   4272 ; GENERIC-NEXT:    pmulhuw %mm1, %mm0 # sched: [5:1.00]
   4273 ; GENERIC-NEXT:    pmulhuw (%rdi), %mm0 # sched: [10:1.00]
   4274 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4275 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4276 ;
   4277 ; ATOM-LABEL: test_pmulhuw:
   4278 ; ATOM:       # %bb.0:
   4279 ; ATOM-NEXT:    pmulhuw %mm1, %mm0 # sched: [4:4.00]
   4280 ; ATOM-NEXT:    pmulhuw (%rdi), %mm0 # sched: [4:4.00]
   4281 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   4282 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4283 ;
   4284 ; SLM-LABEL: test_pmulhuw:
   4285 ; SLM:       # %bb.0:
   4286 ; SLM-NEXT:    pmulhuw %mm1, %mm0 # sched: [4:1.00]
   4287 ; SLM-NEXT:    pmulhuw (%rdi), %mm0 # sched: [7:1.00]
   4288 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   4289 ; SLM-NEXT:    retq # sched: [4:1.00]
   4290 ;
   4291 ; SANDY-LABEL: test_pmulhuw:
   4292 ; SANDY:       # %bb.0:
   4293 ; SANDY-NEXT:    pmulhuw %mm1, %mm0 # sched: [5:1.00]
   4294 ; SANDY-NEXT:    pmulhuw (%rdi), %mm0 # sched: [10:1.00]
   4295 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4296 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4297 ;
   4298 ; HASWELL-LABEL: test_pmulhuw:
   4299 ; HASWELL:       # %bb.0:
   4300 ; HASWELL-NEXT:    pmulhuw %mm1, %mm0 # sched: [5:1.00]
   4301 ; HASWELL-NEXT:    pmulhuw (%rdi), %mm0 # sched: [10:1.00]
   4302 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4303 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4304 ;
   4305 ; BROADWELL-LABEL: test_pmulhuw:
   4306 ; BROADWELL:       # %bb.0:
   4307 ; BROADWELL-NEXT:    pmulhuw %mm1, %mm0 # sched: [5:1.00]
   4308 ; BROADWELL-NEXT:    pmulhuw (%rdi), %mm0 # sched: [10:1.00]
   4309 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4310 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4311 ;
   4312 ; SKYLAKE-LABEL: test_pmulhuw:
   4313 ; SKYLAKE:       # %bb.0:
   4314 ; SKYLAKE-NEXT:    pmulhuw %mm1, %mm0 # sched: [4:1.00]
   4315 ; SKYLAKE-NEXT:    pmulhuw (%rdi), %mm0 # sched: [9:1.00]
   4316 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4317 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4318 ;
   4319 ; SKX-LABEL: test_pmulhuw:
   4320 ; SKX:       # %bb.0:
   4321 ; SKX-NEXT:    pmulhuw %mm1, %mm0 # sched: [4:1.00]
   4322 ; SKX-NEXT:    pmulhuw (%rdi), %mm0 # sched: [9:1.00]
   4323 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4324 ; SKX-NEXT:    retq # sched: [7:1.00]
   4325 ;
   4326 ; BTVER2-LABEL: test_pmulhuw:
   4327 ; BTVER2:       # %bb.0:
   4328 ; BTVER2-NEXT:    pmulhuw %mm1, %mm0 # sched: [2:1.00]
   4329 ; BTVER2-NEXT:    pmulhuw (%rdi), %mm0 # sched: [7:1.00]
   4330 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   4331 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4332 ;
   4333 ; ZNVER1-LABEL: test_pmulhuw:
   4334 ; ZNVER1:       # %bb.0:
   4335 ; ZNVER1-NEXT:    pmulhuw %mm1, %mm0 # sched: [4:1.00]
   4336 ; ZNVER1-NEXT:    pmulhuw (%rdi), %mm0 # sched: [11:1.00]
   4337 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4338 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4339   %1 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %a0, x86_mmx %a1)
   4340   %2 = load x86_mmx, x86_mmx *%a2, align 8
   4341   %3 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %1, x86_mmx %2)
   4342   %4 = bitcast x86_mmx %3 to i64
   4343   ret i64 %4
   4344 }
   4345 declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
   4346 
   4347 define i64 @test_pmullw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   4348 ; GENERIC-LABEL: test_pmullw:
   4349 ; GENERIC:       # %bb.0:
   4350 ; GENERIC-NEXT:    pmullw %mm1, %mm0 # sched: [5:1.00]
   4351 ; GENERIC-NEXT:    pmullw (%rdi), %mm0 # sched: [10:1.00]
   4352 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4353 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4354 ;
   4355 ; ATOM-LABEL: test_pmullw:
   4356 ; ATOM:       # %bb.0:
   4357 ; ATOM-NEXT:    pmullw %mm1, %mm0 # sched: [4:4.00]
   4358 ; ATOM-NEXT:    pmullw (%rdi), %mm0 # sched: [4:4.00]
   4359 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   4360 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4361 ;
   4362 ; SLM-LABEL: test_pmullw:
   4363 ; SLM:       # %bb.0:
   4364 ; SLM-NEXT:    pmullw %mm1, %mm0 # sched: [4:1.00]
   4365 ; SLM-NEXT:    pmullw (%rdi), %mm0 # sched: [7:1.00]
   4366 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   4367 ; SLM-NEXT:    retq # sched: [4:1.00]
   4368 ;
   4369 ; SANDY-LABEL: test_pmullw:
   4370 ; SANDY:       # %bb.0:
   4371 ; SANDY-NEXT:    pmullw %mm1, %mm0 # sched: [5:1.00]
   4372 ; SANDY-NEXT:    pmullw (%rdi), %mm0 # sched: [10:1.00]
   4373 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4374 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4375 ;
   4376 ; HASWELL-LABEL: test_pmullw:
   4377 ; HASWELL:       # %bb.0:
   4378 ; HASWELL-NEXT:    pmullw %mm1, %mm0 # sched: [5:1.00]
   4379 ; HASWELL-NEXT:    pmullw (%rdi), %mm0 # sched: [10:1.00]
   4380 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4381 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4382 ;
   4383 ; BROADWELL-LABEL: test_pmullw:
   4384 ; BROADWELL:       # %bb.0:
   4385 ; BROADWELL-NEXT:    pmullw %mm1, %mm0 # sched: [5:1.00]
   4386 ; BROADWELL-NEXT:    pmullw (%rdi), %mm0 # sched: [10:1.00]
   4387 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4388 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4389 ;
   4390 ; SKYLAKE-LABEL: test_pmullw:
   4391 ; SKYLAKE:       # %bb.0:
   4392 ; SKYLAKE-NEXT:    pmullw %mm1, %mm0 # sched: [4:1.00]
   4393 ; SKYLAKE-NEXT:    pmullw (%rdi), %mm0 # sched: [9:1.00]
   4394 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4395 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4396 ;
   4397 ; SKX-LABEL: test_pmullw:
   4398 ; SKX:       # %bb.0:
   4399 ; SKX-NEXT:    pmullw %mm1, %mm0 # sched: [4:1.00]
   4400 ; SKX-NEXT:    pmullw (%rdi), %mm0 # sched: [9:1.00]
   4401 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4402 ; SKX-NEXT:    retq # sched: [7:1.00]
   4403 ;
   4404 ; BTVER2-LABEL: test_pmullw:
   4405 ; BTVER2:       # %bb.0:
   4406 ; BTVER2-NEXT:    pmullw %mm1, %mm0 # sched: [2:1.00]
   4407 ; BTVER2-NEXT:    pmullw (%rdi), %mm0 # sched: [7:1.00]
   4408 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   4409 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4410 ;
   4411 ; ZNVER1-LABEL: test_pmullw:
   4412 ; ZNVER1:       # %bb.0:
   4413 ; ZNVER1-NEXT:    pmullw %mm1, %mm0 # sched: [4:1.00]
   4414 ; ZNVER1-NEXT:    pmullw (%rdi), %mm0 # sched: [11:1.00]
   4415 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4416 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4417   %1 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %a0, x86_mmx %a1)
   4418   %2 = load x86_mmx, x86_mmx *%a2, align 8
   4419   %3 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %1, x86_mmx %2)
   4420   %4 = bitcast x86_mmx %3 to i64
   4421   ret i64 %4
   4422 }
   4423 declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
   4424 
   4425 define i64 @test_pmuludq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   4426 ; GENERIC-LABEL: test_pmuludq:
   4427 ; GENERIC:       # %bb.0:
   4428 ; GENERIC-NEXT:    pmuludq %mm1, %mm0 # sched: [5:1.00]
   4429 ; GENERIC-NEXT:    pmuludq (%rdi), %mm0 # sched: [10:1.00]
   4430 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4431 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4432 ;
   4433 ; ATOM-LABEL: test_pmuludq:
   4434 ; ATOM:       # %bb.0:
   4435 ; ATOM-NEXT:    pmuludq %mm1, %mm0 # sched: [4:4.00]
   4436 ; ATOM-NEXT:    pmuludq (%rdi), %mm0 # sched: [4:4.00]
   4437 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   4438 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4439 ;
   4440 ; SLM-LABEL: test_pmuludq:
   4441 ; SLM:       # %bb.0:
   4442 ; SLM-NEXT:    pmuludq %mm1, %mm0 # sched: [4:1.00]
   4443 ; SLM-NEXT:    pmuludq (%rdi), %mm0 # sched: [7:1.00]
   4444 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   4445 ; SLM-NEXT:    retq # sched: [4:1.00]
   4446 ;
   4447 ; SANDY-LABEL: test_pmuludq:
   4448 ; SANDY:       # %bb.0:
   4449 ; SANDY-NEXT:    pmuludq %mm1, %mm0 # sched: [5:1.00]
   4450 ; SANDY-NEXT:    pmuludq (%rdi), %mm0 # sched: [10:1.00]
   4451 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4452 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4453 ;
   4454 ; HASWELL-LABEL: test_pmuludq:
   4455 ; HASWELL:       # %bb.0:
   4456 ; HASWELL-NEXT:    pmuludq %mm1, %mm0 # sched: [5:1.00]
   4457 ; HASWELL-NEXT:    pmuludq (%rdi), %mm0 # sched: [10:1.00]
   4458 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4459 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4460 ;
   4461 ; BROADWELL-LABEL: test_pmuludq:
   4462 ; BROADWELL:       # %bb.0:
   4463 ; BROADWELL-NEXT:    pmuludq %mm1, %mm0 # sched: [5:1.00]
   4464 ; BROADWELL-NEXT:    pmuludq (%rdi), %mm0 # sched: [10:1.00]
   4465 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4466 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4467 ;
   4468 ; SKYLAKE-LABEL: test_pmuludq:
   4469 ; SKYLAKE:       # %bb.0:
   4470 ; SKYLAKE-NEXT:    pmuludq %mm1, %mm0 # sched: [4:1.00]
   4471 ; SKYLAKE-NEXT:    pmuludq (%rdi), %mm0 # sched: [9:1.00]
   4472 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4473 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4474 ;
   4475 ; SKX-LABEL: test_pmuludq:
   4476 ; SKX:       # %bb.0:
   4477 ; SKX-NEXT:    pmuludq %mm1, %mm0 # sched: [4:1.00]
   4478 ; SKX-NEXT:    pmuludq (%rdi), %mm0 # sched: [9:1.00]
   4479 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4480 ; SKX-NEXT:    retq # sched: [7:1.00]
   4481 ;
   4482 ; BTVER2-LABEL: test_pmuludq:
   4483 ; BTVER2:       # %bb.0:
   4484 ; BTVER2-NEXT:    pmuludq %mm1, %mm0 # sched: [2:1.00]
   4485 ; BTVER2-NEXT:    pmuludq (%rdi), %mm0 # sched: [7:1.00]
   4486 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   4487 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4488 ;
   4489 ; ZNVER1-LABEL: test_pmuludq:
   4490 ; ZNVER1:       # %bb.0:
   4491 ; ZNVER1-NEXT:    pmuludq %mm1, %mm0 # sched: [4:1.00]
   4492 ; ZNVER1-NEXT:    pmuludq (%rdi), %mm0 # sched: [11:1.00]
   4493 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4494 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4495   %1 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %a0, x86_mmx %a1)
   4496   %2 = load x86_mmx, x86_mmx *%a2, align 8
   4497   %3 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %1, x86_mmx %2)
   4498   %4 = bitcast x86_mmx %3 to i64
   4499   ret i64 %4
   4500 }
   4501 declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
   4502 
   4503 define i64 @test_por(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   4504 ; GENERIC-LABEL: test_por:
   4505 ; GENERIC:       # %bb.0:
   4506 ; GENERIC-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
   4507 ; GENERIC-NEXT:    por (%rdi), %mm0 # sched: [6:0.50]
   4508 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4509 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4510 ;
   4511 ; ATOM-LABEL: test_por:
   4512 ; ATOM:       # %bb.0:
   4513 ; ATOM-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
   4514 ; ATOM-NEXT:    por (%rdi), %mm0 # sched: [1:1.00]
   4515 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   4516 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4517 ;
   4518 ; SLM-LABEL: test_por:
   4519 ; SLM:       # %bb.0:
   4520 ; SLM-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
   4521 ; SLM-NEXT:    por (%rdi), %mm0 # sched: [4:1.00]
   4522 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   4523 ; SLM-NEXT:    retq # sched: [4:1.00]
   4524 ;
   4525 ; SANDY-LABEL: test_por:
   4526 ; SANDY:       # %bb.0:
   4527 ; SANDY-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
   4528 ; SANDY-NEXT:    por (%rdi), %mm0 # sched: [6:0.50]
   4529 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4530 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4531 ;
   4532 ; HASWELL-LABEL: test_por:
   4533 ; HASWELL:       # %bb.0:
   4534 ; HASWELL-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
   4535 ; HASWELL-NEXT:    por (%rdi), %mm0 # sched: [6:0.50]
   4536 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4537 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4538 ;
   4539 ; BROADWELL-LABEL: test_por:
   4540 ; BROADWELL:       # %bb.0:
   4541 ; BROADWELL-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
   4542 ; BROADWELL-NEXT:    por (%rdi), %mm0 # sched: [6:0.50]
   4543 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4544 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4545 ;
   4546 ; SKYLAKE-LABEL: test_por:
   4547 ; SKYLAKE:       # %bb.0:
   4548 ; SKYLAKE-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
   4549 ; SKYLAKE-NEXT:    por (%rdi), %mm0 # sched: [6:0.50]
   4550 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4551 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4552 ;
   4553 ; SKX-LABEL: test_por:
   4554 ; SKX:       # %bb.0:
   4555 ; SKX-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
   4556 ; SKX-NEXT:    por (%rdi), %mm0 # sched: [6:0.50]
   4557 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4558 ; SKX-NEXT:    retq # sched: [7:1.00]
   4559 ;
   4560 ; BTVER2-LABEL: test_por:
   4561 ; BTVER2:       # %bb.0:
   4562 ; BTVER2-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
   4563 ; BTVER2-NEXT:    por (%rdi), %mm0 # sched: [6:1.00]
   4564 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   4565 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4566 ;
   4567 ; ZNVER1-LABEL: test_por:
   4568 ; ZNVER1:       # %bb.0:
   4569 ; ZNVER1-NEXT:    por %mm1, %mm0 # sched: [1:0.25]
   4570 ; ZNVER1-NEXT:    por (%rdi), %mm0 # sched: [8:0.50]
   4571 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4572 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4573   %1 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %a0, x86_mmx %a1)
   4574   %2 = load x86_mmx, x86_mmx *%a2, align 8
   4575   %3 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %2)
   4576   %4 = bitcast x86_mmx %3 to i64
   4577   ret i64 %4
   4578 }
   4579 declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
   4580 
   4581 define i64 @test_psadbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   4582 ; GENERIC-LABEL: test_psadbw:
   4583 ; GENERIC:       # %bb.0:
   4584 ; GENERIC-NEXT:    psadbw %mm1, %mm0 # sched: [5:1.00]
   4585 ; GENERIC-NEXT:    psadbw (%rdi), %mm0 # sched: [10:1.00]
   4586 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4587 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4588 ;
   4589 ; ATOM-LABEL: test_psadbw:
   4590 ; ATOM:       # %bb.0:
   4591 ; ATOM-NEXT:    psadbw %mm1, %mm0 # sched: [4:2.00]
   4592 ; ATOM-NEXT:    psadbw (%rdi), %mm0 # sched: [4:2.00]
   4593 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   4594 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4595 ;
   4596 ; SLM-LABEL: test_psadbw:
   4597 ; SLM:       # %bb.0:
   4598 ; SLM-NEXT:    psadbw %mm1, %mm0 # sched: [4:1.00]
   4599 ; SLM-NEXT:    psadbw (%rdi), %mm0 # sched: [7:1.00]
   4600 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   4601 ; SLM-NEXT:    retq # sched: [4:1.00]
   4602 ;
   4603 ; SANDY-LABEL: test_psadbw:
   4604 ; SANDY:       # %bb.0:
   4605 ; SANDY-NEXT:    psadbw %mm1, %mm0 # sched: [5:1.00]
   4606 ; SANDY-NEXT:    psadbw (%rdi), %mm0 # sched: [10:1.00]
   4607 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4608 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4609 ;
   4610 ; HASWELL-LABEL: test_psadbw:
   4611 ; HASWELL:       # %bb.0:
   4612 ; HASWELL-NEXT:    psadbw %mm1, %mm0 # sched: [5:1.00]
   4613 ; HASWELL-NEXT:    psadbw (%rdi), %mm0 # sched: [10:1.00]
   4614 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4615 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4616 ;
   4617 ; BROADWELL-LABEL: test_psadbw:
   4618 ; BROADWELL:       # %bb.0:
   4619 ; BROADWELL-NEXT:    psadbw %mm1, %mm0 # sched: [5:1.00]
   4620 ; BROADWELL-NEXT:    psadbw (%rdi), %mm0 # sched: [10:1.00]
   4621 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4622 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4623 ;
   4624 ; SKYLAKE-LABEL: test_psadbw:
   4625 ; SKYLAKE:       # %bb.0:
   4626 ; SKYLAKE-NEXT:    psadbw %mm1, %mm0 # sched: [3:1.00]
   4627 ; SKYLAKE-NEXT:    psadbw (%rdi), %mm0 # sched: [8:1.00]
   4628 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4629 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4630 ;
   4631 ; SKX-LABEL: test_psadbw:
   4632 ; SKX:       # %bb.0:
   4633 ; SKX-NEXT:    psadbw %mm1, %mm0 # sched: [3:1.00]
   4634 ; SKX-NEXT:    psadbw (%rdi), %mm0 # sched: [8:1.00]
   4635 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4636 ; SKX-NEXT:    retq # sched: [7:1.00]
   4637 ;
   4638 ; BTVER2-LABEL: test_psadbw:
   4639 ; BTVER2:       # %bb.0:
   4640 ; BTVER2-NEXT:    psadbw %mm1, %mm0 # sched: [2:0.50]
   4641 ; BTVER2-NEXT:    psadbw (%rdi), %mm0 # sched: [7:1.00]
   4642 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   4643 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4644 ;
   4645 ; ZNVER1-LABEL: test_psadbw:
   4646 ; ZNVER1:       # %bb.0:
   4647 ; ZNVER1-NEXT:    psadbw %mm1, %mm0 # sched: [3:1.00]
   4648 ; ZNVER1-NEXT:    psadbw (%rdi), %mm0 # sched: [10:1.00]
   4649 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4650 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4651   %1 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a0, x86_mmx %a1)
   4652   %2 = load x86_mmx, x86_mmx *%a2, align 8
   4653   %3 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %1, x86_mmx %2)
   4654   %4 = bitcast x86_mmx %3 to i64
   4655   ret i64 %4
   4656 }
   4657 declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
   4658 
   4659 define i64 @test_pshufb(x86_mmx %a0, x86_mmx %a1, x86_mmx *%a2) optsize {
   4660 ; GENERIC-LABEL: test_pshufb:
   4661 ; GENERIC:       # %bb.0:
   4662 ; GENERIC-NEXT:    pshufb %mm1, %mm0 # sched: [1:0.50]
   4663 ; GENERIC-NEXT:    pshufb (%rdi), %mm0 # sched: [6:0.50]
   4664 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4665 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4666 ;
   4667 ; ATOM-LABEL: test_pshufb:
   4668 ; ATOM:       # %bb.0:
   4669 ; ATOM-NEXT:    pshufb %mm1, %mm0 # sched: [1:1.00]
   4670 ; ATOM-NEXT:    pshufb (%rdi), %mm0 # sched: [1:1.00]
   4671 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   4672 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4673 ;
   4674 ; SLM-LABEL: test_pshufb:
   4675 ; SLM:       # %bb.0:
   4676 ; SLM-NEXT:    pshufb %mm1, %mm0 # sched: [1:1.00]
   4677 ; SLM-NEXT:    pshufb (%rdi), %mm0 # sched: [4:1.00]
   4678 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   4679 ; SLM-NEXT:    retq # sched: [4:1.00]
   4680 ;
   4681 ; SANDY-LABEL: test_pshufb:
   4682 ; SANDY:       # %bb.0:
   4683 ; SANDY-NEXT:    pshufb %mm1, %mm0 # sched: [1:0.50]
   4684 ; SANDY-NEXT:    pshufb (%rdi), %mm0 # sched: [6:0.50]
   4685 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4686 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4687 ;
   4688 ; HASWELL-LABEL: test_pshufb:
   4689 ; HASWELL:       # %bb.0:
   4690 ; HASWELL-NEXT:    pshufb %mm1, %mm0 # sched: [1:1.00]
   4691 ; HASWELL-NEXT:    pshufb (%rdi), %mm0 # sched: [6:1.00]
   4692 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4693 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4694 ;
   4695 ; BROADWELL-LABEL: test_pshufb:
   4696 ; BROADWELL:       # %bb.0:
   4697 ; BROADWELL-NEXT:    pshufb %mm1, %mm0 # sched: [1:1.00]
   4698 ; BROADWELL-NEXT:    pshufb (%rdi), %mm0 # sched: [6:1.00]
   4699 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4700 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4701 ;
   4702 ; SKYLAKE-LABEL: test_pshufb:
   4703 ; SKYLAKE:       # %bb.0:
   4704 ; SKYLAKE-NEXT:    pshufb %mm1, %mm0 # sched: [1:1.00]
   4705 ; SKYLAKE-NEXT:    pshufb (%rdi), %mm0 # sched: [6:1.00]
   4706 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4707 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4708 ;
   4709 ; SKX-LABEL: test_pshufb:
   4710 ; SKX:       # %bb.0:
   4711 ; SKX-NEXT:    pshufb %mm1, %mm0 # sched: [1:1.00]
   4712 ; SKX-NEXT:    pshufb (%rdi), %mm0 # sched: [6:1.00]
   4713 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4714 ; SKX-NEXT:    retq # sched: [7:1.00]
   4715 ;
   4716 ; BTVER2-LABEL: test_pshufb:
   4717 ; BTVER2:       # %bb.0:
   4718 ; BTVER2-NEXT:    pshufb %mm1, %mm0 # sched: [2:2.00]
   4719 ; BTVER2-NEXT:    pshufb (%rdi), %mm0 # sched: [7:2.00]
   4720 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   4721 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4722 ;
   4723 ; ZNVER1-LABEL: test_pshufb:
   4724 ; ZNVER1:       # %bb.0:
   4725 ; ZNVER1-NEXT:    pshufb %mm1, %mm0 # sched: [1:0.25]
   4726 ; ZNVER1-NEXT:    pshufb (%rdi), %mm0 # sched: [8:0.50]
   4727 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4728 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4729   %1 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %a0, x86_mmx %a1)
   4730   %2 = load x86_mmx, x86_mmx *%a2, align 8
   4731   %3 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %1, x86_mmx %2)
   4732   %4 = bitcast x86_mmx %3 to i64
   4733   ret i64 %4
   4734 }
   4735 declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
   4736 
   4737 define i64 @test_pshufw(x86_mmx *%a0) optsize {
   4738 ; GENERIC-LABEL: test_pshufw:
   4739 ; GENERIC:       # %bb.0:
   4740 ; GENERIC-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
   4741 ; GENERIC-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
   4742 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4743 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4744 ;
   4745 ; ATOM-LABEL: test_pshufw:
   4746 ; ATOM:       # %bb.0:
   4747 ; ATOM-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [1:1.00]
   4748 ; ATOM-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
   4749 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   4750 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4751 ;
   4752 ; SLM-LABEL: test_pshufw:
   4753 ; SLM:       # %bb.0:
   4754 ; SLM-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [4:1.00]
   4755 ; SLM-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
   4756 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   4757 ; SLM-NEXT:    retq # sched: [4:1.00]
   4758 ;
   4759 ; SANDY-LABEL: test_pshufw:
   4760 ; SANDY:       # %bb.0:
   4761 ; SANDY-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
   4762 ; SANDY-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
   4763 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4764 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4765 ;
   4766 ; HASWELL-LABEL: test_pshufw:
   4767 ; HASWELL:       # %bb.0:
   4768 ; HASWELL-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
   4769 ; HASWELL-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
   4770 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4771 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4772 ;
   4773 ; BROADWELL-LABEL: test_pshufw:
   4774 ; BROADWELL:       # %bb.0:
   4775 ; BROADWELL-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
   4776 ; BROADWELL-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
   4777 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4778 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4779 ;
   4780 ; SKYLAKE-LABEL: test_pshufw:
   4781 ; SKYLAKE:       # %bb.0:
   4782 ; SKYLAKE-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
   4783 ; SKYLAKE-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
   4784 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4785 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4786 ;
   4787 ; SKX-LABEL: test_pshufw:
   4788 ; SKX:       # %bb.0:
   4789 ; SKX-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
   4790 ; SKX-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
   4791 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4792 ; SKX-NEXT:    retq # sched: [7:1.00]
   4793 ;
   4794 ; BTVER2-LABEL: test_pshufw:
   4795 ; BTVER2:       # %bb.0:
   4796 ; BTVER2-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
   4797 ; BTVER2-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:0.50]
   4798 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   4799 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4800 ;
   4801 ; ZNVER1-LABEL: test_pshufw:
   4802 ; ZNVER1:       # %bb.0:
   4803 ; ZNVER1-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [8:0.50]
   4804 ; ZNVER1-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:0.25]
   4805 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4806 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4807   %1 = load x86_mmx, x86_mmx *%a0, align 8
   4808   %2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 0)
   4809   %3 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %2, i8 0)
   4810   %4 = bitcast x86_mmx %3 to i64
   4811   ret i64 %4
   4812 }
   4813 declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
   4814 
   4815 define i64 @test_psignb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   4816 ; GENERIC-LABEL: test_psignb:
   4817 ; GENERIC:       # %bb.0:
   4818 ; GENERIC-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
   4819 ; GENERIC-NEXT:    psignb (%rdi), %mm0 # sched: [6:0.50]
   4820 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4821 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4822 ;
   4823 ; ATOM-LABEL: test_psignb:
   4824 ; ATOM:       # %bb.0:
   4825 ; ATOM-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
   4826 ; ATOM-NEXT:    psignb (%rdi), %mm0 # sched: [1:1.00]
   4827 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   4828 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4829 ;
   4830 ; SLM-LABEL: test_psignb:
   4831 ; SLM:       # %bb.0:
   4832 ; SLM-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
   4833 ; SLM-NEXT:    psignb (%rdi), %mm0 # sched: [4:1.00]
   4834 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   4835 ; SLM-NEXT:    retq # sched: [4:1.00]
   4836 ;
   4837 ; SANDY-LABEL: test_psignb:
   4838 ; SANDY:       # %bb.0:
   4839 ; SANDY-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
   4840 ; SANDY-NEXT:    psignb (%rdi), %mm0 # sched: [6:0.50]
   4841 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4842 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4843 ;
   4844 ; HASWELL-LABEL: test_psignb:
   4845 ; HASWELL:       # %bb.0:
   4846 ; HASWELL-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
   4847 ; HASWELL-NEXT:    psignb (%rdi), %mm0 # sched: [6:0.50]
   4848 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4849 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4850 ;
   4851 ; BROADWELL-LABEL: test_psignb:
   4852 ; BROADWELL:       # %bb.0:
   4853 ; BROADWELL-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
   4854 ; BROADWELL-NEXT:    psignb (%rdi), %mm0 # sched: [6:0.50]
   4855 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4856 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4857 ;
   4858 ; SKYLAKE-LABEL: test_psignb:
   4859 ; SKYLAKE:       # %bb.0:
   4860 ; SKYLAKE-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
   4861 ; SKYLAKE-NEXT:    psignb (%rdi), %mm0 # sched: [6:0.50]
   4862 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4863 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4864 ;
   4865 ; SKX-LABEL: test_psignb:
   4866 ; SKX:       # %bb.0:
   4867 ; SKX-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
   4868 ; SKX-NEXT:    psignb (%rdi), %mm0 # sched: [6:0.50]
   4869 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4870 ; SKX-NEXT:    retq # sched: [7:1.00]
   4871 ;
   4872 ; BTVER2-LABEL: test_psignb:
   4873 ; BTVER2:       # %bb.0:
   4874 ; BTVER2-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
   4875 ; BTVER2-NEXT:    psignb (%rdi), %mm0 # sched: [6:1.00]
   4876 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   4877 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4878 ;
   4879 ; ZNVER1-LABEL: test_psignb:
   4880 ; ZNVER1:       # %bb.0:
   4881 ; ZNVER1-NEXT:    psignb %mm1, %mm0 # sched: [1:0.25]
   4882 ; ZNVER1-NEXT:    psignb (%rdi), %mm0 # sched: [8:0.50]
   4883 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4884 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4885   %1 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %a0, x86_mmx %a1)
   4886   %2 = load x86_mmx, x86_mmx *%a2, align 8
   4887   %3 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %1, x86_mmx %2)
   4888   %4 = bitcast x86_mmx %3 to i64
   4889   ret i64 %4
   4890 }
   4891 declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
   4892 
   4893 define i64 @test_psignd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   4894 ; GENERIC-LABEL: test_psignd:
   4895 ; GENERIC:       # %bb.0:
   4896 ; GENERIC-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
   4897 ; GENERIC-NEXT:    psignd (%rdi), %mm0 # sched: [6:0.50]
   4898 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4899 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4900 ;
   4901 ; ATOM-LABEL: test_psignd:
   4902 ; ATOM:       # %bb.0:
   4903 ; ATOM-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
   4904 ; ATOM-NEXT:    psignd (%rdi), %mm0 # sched: [1:1.00]
   4905 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   4906 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4907 ;
   4908 ; SLM-LABEL: test_psignd:
   4909 ; SLM:       # %bb.0:
   4910 ; SLM-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
   4911 ; SLM-NEXT:    psignd (%rdi), %mm0 # sched: [4:1.00]
   4912 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   4913 ; SLM-NEXT:    retq # sched: [4:1.00]
   4914 ;
   4915 ; SANDY-LABEL: test_psignd:
   4916 ; SANDY:       # %bb.0:
   4917 ; SANDY-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
   4918 ; SANDY-NEXT:    psignd (%rdi), %mm0 # sched: [6:0.50]
   4919 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4920 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4921 ;
   4922 ; HASWELL-LABEL: test_psignd:
   4923 ; HASWELL:       # %bb.0:
   4924 ; HASWELL-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
   4925 ; HASWELL-NEXT:    psignd (%rdi), %mm0 # sched: [6:0.50]
   4926 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4927 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4928 ;
   4929 ; BROADWELL-LABEL: test_psignd:
   4930 ; BROADWELL:       # %bb.0:
   4931 ; BROADWELL-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
   4932 ; BROADWELL-NEXT:    psignd (%rdi), %mm0 # sched: [6:0.50]
   4933 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   4934 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4935 ;
   4936 ; SKYLAKE-LABEL: test_psignd:
   4937 ; SKYLAKE:       # %bb.0:
   4938 ; SKYLAKE-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
   4939 ; SKYLAKE-NEXT:    psignd (%rdi), %mm0 # sched: [6:0.50]
   4940 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4941 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4942 ;
   4943 ; SKX-LABEL: test_psignd:
   4944 ; SKX:       # %bb.0:
   4945 ; SKX-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
   4946 ; SKX-NEXT:    psignd (%rdi), %mm0 # sched: [6:0.50]
   4947 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4948 ; SKX-NEXT:    retq # sched: [7:1.00]
   4949 ;
   4950 ; BTVER2-LABEL: test_psignd:
   4951 ; BTVER2:       # %bb.0:
   4952 ; BTVER2-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
   4953 ; BTVER2-NEXT:    psignd (%rdi), %mm0 # sched: [6:1.00]
   4954 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   4955 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4956 ;
   4957 ; ZNVER1-LABEL: test_psignd:
   4958 ; ZNVER1:       # %bb.0:
   4959 ; ZNVER1-NEXT:    psignd %mm1, %mm0 # sched: [1:0.25]
   4960 ; ZNVER1-NEXT:    psignd (%rdi), %mm0 # sched: [8:0.50]
   4961 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4962 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4963   %1 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %a0, x86_mmx %a1)
   4964   %2 = load x86_mmx, x86_mmx *%a2, align 8
   4965   %3 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %1, x86_mmx %2)
   4966   %4 = bitcast x86_mmx %3 to i64
   4967   ret i64 %4
   4968 }
   4969 declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
   4970 
   4971 define i64 @test_psignw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   4972 ; GENERIC-LABEL: test_psignw:
   4973 ; GENERIC:       # %bb.0:
   4974 ; GENERIC-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
   4975 ; GENERIC-NEXT:    psignw (%rdi), %mm0 # sched: [6:0.50]
   4976 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4977 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4978 ;
   4979 ; ATOM-LABEL: test_psignw:
   4980 ; ATOM:       # %bb.0:
   4981 ; ATOM-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
   4982 ; ATOM-NEXT:    psignw (%rdi), %mm0 # sched: [1:1.00]
   4983 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   4984 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4985 ;
   4986 ; SLM-LABEL: test_psignw:
   4987 ; SLM:       # %bb.0:
   4988 ; SLM-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
   4989 ; SLM-NEXT:    psignw (%rdi), %mm0 # sched: [4:1.00]
   4990 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   4991 ; SLM-NEXT:    retq # sched: [4:1.00]
   4992 ;
   4993 ; SANDY-LABEL: test_psignw:
   4994 ; SANDY:       # %bb.0:
   4995 ; SANDY-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
   4996 ; SANDY-NEXT:    psignw (%rdi), %mm0 # sched: [6:0.50]
   4997 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   4998 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4999 ;
   5000 ; HASWELL-LABEL: test_psignw:
   5001 ; HASWELL:       # %bb.0:
   5002 ; HASWELL-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
   5003 ; HASWELL-NEXT:    psignw (%rdi), %mm0 # sched: [6:0.50]
   5004 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5005 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5006 ;
   5007 ; BROADWELL-LABEL: test_psignw:
   5008 ; BROADWELL:       # %bb.0:
   5009 ; BROADWELL-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
   5010 ; BROADWELL-NEXT:    psignw (%rdi), %mm0 # sched: [6:0.50]
   5011 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5012 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5013 ;
   5014 ; SKYLAKE-LABEL: test_psignw:
   5015 ; SKYLAKE:       # %bb.0:
   5016 ; SKYLAKE-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
   5017 ; SKYLAKE-NEXT:    psignw (%rdi), %mm0 # sched: [6:0.50]
   5018 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5019 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5020 ;
   5021 ; SKX-LABEL: test_psignw:
   5022 ; SKX:       # %bb.0:
   5023 ; SKX-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
   5024 ; SKX-NEXT:    psignw (%rdi), %mm0 # sched: [6:0.50]
   5025 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5026 ; SKX-NEXT:    retq # sched: [7:1.00]
   5027 ;
   5028 ; BTVER2-LABEL: test_psignw:
   5029 ; BTVER2:       # %bb.0:
   5030 ; BTVER2-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
   5031 ; BTVER2-NEXT:    psignw (%rdi), %mm0 # sched: [6:1.00]
   5032 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   5033 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5034 ;
   5035 ; ZNVER1-LABEL: test_psignw:
   5036 ; ZNVER1:       # %bb.0:
   5037 ; ZNVER1-NEXT:    psignw %mm1, %mm0 # sched: [1:0.25]
   5038 ; ZNVER1-NEXT:    psignw (%rdi), %mm0 # sched: [8:0.50]
   5039 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5040 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5041   %1 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %a0, x86_mmx %a1)
   5042   %2 = load x86_mmx, x86_mmx *%a2, align 8
   5043   %3 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %1, x86_mmx %2)
   5044   %4 = bitcast x86_mmx %3 to i64
   5045   ret i64 %4
   5046 }
   5047 declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
   5048 
   5049 define i64 @test_pslld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   5050 ; GENERIC-LABEL: test_pslld:
   5051 ; GENERIC:       # %bb.0:
   5052 ; GENERIC-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
   5053 ; GENERIC-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
   5054 ; GENERIC-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
   5055 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5056 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5057 ;
   5058 ; ATOM-LABEL: test_pslld:
   5059 ; ATOM:       # %bb.0:
   5060 ; ATOM-NEXT:    pslld %mm1, %mm0 # sched: [2:1.00]
   5061 ; ATOM-NEXT:    pslld (%rdi), %mm0 # sched: [3:1.50]
   5062 ; ATOM-NEXT:    pslld $7, %mm0 # sched: [1:0.50]
   5063 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   5064 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5065 ;
   5066 ; SLM-LABEL: test_pslld:
   5067 ; SLM:       # %bb.0:
   5068 ; SLM-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
   5069 ; SLM-NEXT:    pslld (%rdi), %mm0 # sched: [4:1.00]
   5070 ; SLM-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
   5071 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   5072 ; SLM-NEXT:    retq # sched: [4:1.00]
   5073 ;
   5074 ; SANDY-LABEL: test_pslld:
   5075 ; SANDY:       # %bb.0:
   5076 ; SANDY-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
   5077 ; SANDY-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
   5078 ; SANDY-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
   5079 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5080 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5081 ;
   5082 ; HASWELL-LABEL: test_pslld:
   5083 ; HASWELL:       # %bb.0:
   5084 ; HASWELL-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
   5085 ; HASWELL-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
   5086 ; HASWELL-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
   5087 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5088 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5089 ;
   5090 ; BROADWELL-LABEL: test_pslld:
   5091 ; BROADWELL:       # %bb.0:
   5092 ; BROADWELL-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
   5093 ; BROADWELL-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
   5094 ; BROADWELL-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
   5095 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5096 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5097 ;
   5098 ; SKYLAKE-LABEL: test_pslld:
   5099 ; SKYLAKE:       # %bb.0:
   5100 ; SKYLAKE-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
   5101 ; SKYLAKE-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
   5102 ; SKYLAKE-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
   5103 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5104 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5105 ;
   5106 ; SKX-LABEL: test_pslld:
   5107 ; SKX:       # %bb.0:
   5108 ; SKX-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
   5109 ; SKX-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
   5110 ; SKX-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
   5111 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5112 ; SKX-NEXT:    retq # sched: [7:1.00]
   5113 ;
   5114 ; BTVER2-LABEL: test_pslld:
   5115 ; BTVER2:       # %bb.0:
   5116 ; BTVER2-NEXT:    pslld %mm1, %mm0 # sched: [1:0.50]
   5117 ; BTVER2-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
   5118 ; BTVER2-NEXT:    pslld $7, %mm0 # sched: [1:0.50]
   5119 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   5120 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5121 ;
   5122 ; ZNVER1-LABEL: test_pslld:
   5123 ; ZNVER1:       # %bb.0:
   5124 ; ZNVER1-NEXT:    pslld %mm1, %mm0 # sched: [1:0.25]
   5125 ; ZNVER1-NEXT:    pslld (%rdi), %mm0 # sched: [8:0.50]
   5126 ; ZNVER1-NEXT:    pslld $7, %mm0 # sched: [1:0.25]
   5127 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5128 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5129   %1 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %a0, x86_mmx %a1)
   5130   %2 = load x86_mmx, x86_mmx *%a2, align 8
   5131   %3 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %1, x86_mmx %2)
   5132   %4 = call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %3, i32 7)
   5133   %5 = bitcast x86_mmx %4 to i64
   5134   ret i64 %5
   5135 }
   5136 declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
   5137 declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
   5138 
   5139 define i64 @test_psllq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   5140 ; GENERIC-LABEL: test_psllq:
   5141 ; GENERIC:       # %bb.0:
   5142 ; GENERIC-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
   5143 ; GENERIC-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
   5144 ; GENERIC-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
   5145 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5146 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5147 ;
   5148 ; ATOM-LABEL: test_psllq:
   5149 ; ATOM:       # %bb.0:
   5150 ; ATOM-NEXT:    psllq %mm1, %mm0 # sched: [2:1.00]
   5151 ; ATOM-NEXT:    psllq (%rdi), %mm0 # sched: [3:1.50]
   5152 ; ATOM-NEXT:    psllq $7, %mm0 # sched: [1:0.50]
   5153 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   5154 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5155 ;
   5156 ; SLM-LABEL: test_psllq:
   5157 ; SLM:       # %bb.0:
   5158 ; SLM-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
   5159 ; SLM-NEXT:    psllq (%rdi), %mm0 # sched: [4:1.00]
   5160 ; SLM-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
   5161 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   5162 ; SLM-NEXT:    retq # sched: [4:1.00]
   5163 ;
   5164 ; SANDY-LABEL: test_psllq:
   5165 ; SANDY:       # %bb.0:
   5166 ; SANDY-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
   5167 ; SANDY-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
   5168 ; SANDY-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
   5169 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5170 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5171 ;
   5172 ; HASWELL-LABEL: test_psllq:
   5173 ; HASWELL:       # %bb.0:
   5174 ; HASWELL-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
   5175 ; HASWELL-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
   5176 ; HASWELL-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
   5177 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5178 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5179 ;
   5180 ; BROADWELL-LABEL: test_psllq:
   5181 ; BROADWELL:       # %bb.0:
   5182 ; BROADWELL-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
   5183 ; BROADWELL-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
   5184 ; BROADWELL-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
   5185 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5186 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5187 ;
   5188 ; SKYLAKE-LABEL: test_psllq:
   5189 ; SKYLAKE:       # %bb.0:
   5190 ; SKYLAKE-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
   5191 ; SKYLAKE-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
   5192 ; SKYLAKE-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
   5193 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5194 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5195 ;
   5196 ; SKX-LABEL: test_psllq:
   5197 ; SKX:       # %bb.0:
   5198 ; SKX-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
   5199 ; SKX-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
   5200 ; SKX-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
   5201 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5202 ; SKX-NEXT:    retq # sched: [7:1.00]
   5203 ;
   5204 ; BTVER2-LABEL: test_psllq:
   5205 ; BTVER2:       # %bb.0:
   5206 ; BTVER2-NEXT:    psllq %mm1, %mm0 # sched: [1:0.50]
   5207 ; BTVER2-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
   5208 ; BTVER2-NEXT:    psllq $7, %mm0 # sched: [1:0.50]
   5209 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   5210 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5211 ;
   5212 ; ZNVER1-LABEL: test_psllq:
   5213 ; ZNVER1:       # %bb.0:
   5214 ; ZNVER1-NEXT:    psllq %mm1, %mm0 # sched: [1:0.25]
   5215 ; ZNVER1-NEXT:    psllq (%rdi), %mm0 # sched: [8:0.50]
   5216 ; ZNVER1-NEXT:    psllq $7, %mm0 # sched: [1:0.25]
   5217 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5218 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5219   %1 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %a0, x86_mmx %a1)
   5220   %2 = load x86_mmx, x86_mmx *%a2, align 8
   5221   %3 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %1, x86_mmx %2)
   5222   %4 = call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %3, i32 7)
   5223   %5 = bitcast x86_mmx %4 to i64
   5224   ret i64 %5
   5225 }
   5226 declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
   5227 declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
   5228 
   5229 define i64 @test_psllw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   5230 ; GENERIC-LABEL: test_psllw:
   5231 ; GENERIC:       # %bb.0:
   5232 ; GENERIC-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
   5233 ; GENERIC-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
   5234 ; GENERIC-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
   5235 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5236 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5237 ;
   5238 ; ATOM-LABEL: test_psllw:
   5239 ; ATOM:       # %bb.0:
   5240 ; ATOM-NEXT:    psllw %mm1, %mm0 # sched: [2:1.00]
   5241 ; ATOM-NEXT:    psllw (%rdi), %mm0 # sched: [3:1.50]
   5242 ; ATOM-NEXT:    psllw $7, %mm0 # sched: [1:0.50]
   5243 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   5244 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5245 ;
   5246 ; SLM-LABEL: test_psllw:
   5247 ; SLM:       # %bb.0:
   5248 ; SLM-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
   5249 ; SLM-NEXT:    psllw (%rdi), %mm0 # sched: [4:1.00]
   5250 ; SLM-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
   5251 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   5252 ; SLM-NEXT:    retq # sched: [4:1.00]
   5253 ;
   5254 ; SANDY-LABEL: test_psllw:
   5255 ; SANDY:       # %bb.0:
   5256 ; SANDY-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
   5257 ; SANDY-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
   5258 ; SANDY-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
   5259 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5260 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5261 ;
   5262 ; HASWELL-LABEL: test_psllw:
   5263 ; HASWELL:       # %bb.0:
   5264 ; HASWELL-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
   5265 ; HASWELL-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
   5266 ; HASWELL-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
   5267 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5268 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5269 ;
   5270 ; BROADWELL-LABEL: test_psllw:
   5271 ; BROADWELL:       # %bb.0:
   5272 ; BROADWELL-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
   5273 ; BROADWELL-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
   5274 ; BROADWELL-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
   5275 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5276 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5277 ;
   5278 ; SKYLAKE-LABEL: test_psllw:
   5279 ; SKYLAKE:       # %bb.0:
   5280 ; SKYLAKE-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
   5281 ; SKYLAKE-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
   5282 ; SKYLAKE-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
   5283 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5284 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5285 ;
   5286 ; SKX-LABEL: test_psllw:
   5287 ; SKX:       # %bb.0:
   5288 ; SKX-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
   5289 ; SKX-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
   5290 ; SKX-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
   5291 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5292 ; SKX-NEXT:    retq # sched: [7:1.00]
   5293 ;
   5294 ; BTVER2-LABEL: test_psllw:
   5295 ; BTVER2:       # %bb.0:
   5296 ; BTVER2-NEXT:    psllw %mm1, %mm0 # sched: [1:0.50]
   5297 ; BTVER2-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
   5298 ; BTVER2-NEXT:    psllw $7, %mm0 # sched: [1:0.50]
   5299 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   5300 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5301 ;
   5302 ; ZNVER1-LABEL: test_psllw:
   5303 ; ZNVER1:       # %bb.0:
   5304 ; ZNVER1-NEXT:    psllw %mm1, %mm0 # sched: [1:0.25]
   5305 ; ZNVER1-NEXT:    psllw (%rdi), %mm0 # sched: [8:0.50]
   5306 ; ZNVER1-NEXT:    psllw $7, %mm0 # sched: [1:0.25]
   5307 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5308 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5309   %1 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %a0, x86_mmx %a1)
   5310   %2 = load x86_mmx, x86_mmx *%a2, align 8
   5311   %3 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %1, x86_mmx %2)
   5312   %4 = call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %3, i32 7)
   5313   %5 = bitcast x86_mmx %4 to i64
   5314   ret i64 %5
   5315 }
   5316 declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
   5317 declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
   5318 
   5319 define i64 @test_psrad(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   5320 ; GENERIC-LABEL: test_psrad:
   5321 ; GENERIC:       # %bb.0:
   5322 ; GENERIC-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
   5323 ; GENERIC-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
   5324 ; GENERIC-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
   5325 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5326 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5327 ;
   5328 ; ATOM-LABEL: test_psrad:
   5329 ; ATOM:       # %bb.0:
   5330 ; ATOM-NEXT:    psrad %mm1, %mm0 # sched: [2:1.00]
   5331 ; ATOM-NEXT:    psrad (%rdi), %mm0 # sched: [3:1.50]
   5332 ; ATOM-NEXT:    psrad $7, %mm0 # sched: [1:0.50]
   5333 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   5334 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5335 ;
   5336 ; SLM-LABEL: test_psrad:
   5337 ; SLM:       # %bb.0:
   5338 ; SLM-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
   5339 ; SLM-NEXT:    psrad (%rdi), %mm0 # sched: [4:1.00]
   5340 ; SLM-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
   5341 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   5342 ; SLM-NEXT:    retq # sched: [4:1.00]
   5343 ;
   5344 ; SANDY-LABEL: test_psrad:
   5345 ; SANDY:       # %bb.0:
   5346 ; SANDY-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
   5347 ; SANDY-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
   5348 ; SANDY-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
   5349 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5350 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5351 ;
   5352 ; HASWELL-LABEL: test_psrad:
   5353 ; HASWELL:       # %bb.0:
   5354 ; HASWELL-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
   5355 ; HASWELL-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
   5356 ; HASWELL-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
   5357 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5358 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5359 ;
   5360 ; BROADWELL-LABEL: test_psrad:
   5361 ; BROADWELL:       # %bb.0:
   5362 ; BROADWELL-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
   5363 ; BROADWELL-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
   5364 ; BROADWELL-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
   5365 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5366 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5367 ;
   5368 ; SKYLAKE-LABEL: test_psrad:
   5369 ; SKYLAKE:       # %bb.0:
   5370 ; SKYLAKE-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
   5371 ; SKYLAKE-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
   5372 ; SKYLAKE-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
   5373 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5374 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5375 ;
   5376 ; SKX-LABEL: test_psrad:
   5377 ; SKX:       # %bb.0:
   5378 ; SKX-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
   5379 ; SKX-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
   5380 ; SKX-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
   5381 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5382 ; SKX-NEXT:    retq # sched: [7:1.00]
   5383 ;
   5384 ; BTVER2-LABEL: test_psrad:
   5385 ; BTVER2:       # %bb.0:
   5386 ; BTVER2-NEXT:    psrad %mm1, %mm0 # sched: [1:0.50]
   5387 ; BTVER2-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
   5388 ; BTVER2-NEXT:    psrad $7, %mm0 # sched: [1:0.50]
   5389 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   5390 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5391 ;
   5392 ; ZNVER1-LABEL: test_psrad:
   5393 ; ZNVER1:       # %bb.0:
   5394 ; ZNVER1-NEXT:    psrad %mm1, %mm0 # sched: [1:0.25]
   5395 ; ZNVER1-NEXT:    psrad (%rdi), %mm0 # sched: [8:0.50]
   5396 ; ZNVER1-NEXT:    psrad $7, %mm0 # sched: [1:0.25]
   5397 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5398 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5399   %1 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %a0, x86_mmx %a1)
   5400   %2 = load x86_mmx, x86_mmx *%a2, align 8
   5401   %3 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %1, x86_mmx %2)
   5402   %4 = call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %3, i32 7)
   5403   %5 = bitcast x86_mmx %4 to i64
   5404   ret i64 %5
   5405 }
   5406 declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
   5407 declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
   5408 
   5409 define i64 @test_psraw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   5410 ; GENERIC-LABEL: test_psraw:
   5411 ; GENERIC:       # %bb.0:
   5412 ; GENERIC-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
   5413 ; GENERIC-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
   5414 ; GENERIC-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
   5415 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5416 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5417 ;
   5418 ; ATOM-LABEL: test_psraw:
   5419 ; ATOM:       # %bb.0:
   5420 ; ATOM-NEXT:    psraw %mm1, %mm0 # sched: [2:1.00]
   5421 ; ATOM-NEXT:    psraw (%rdi), %mm0 # sched: [3:1.50]
   5422 ; ATOM-NEXT:    psraw $7, %mm0 # sched: [1:0.50]
   5423 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   5424 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5425 ;
   5426 ; SLM-LABEL: test_psraw:
   5427 ; SLM:       # %bb.0:
   5428 ; SLM-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
   5429 ; SLM-NEXT:    psraw (%rdi), %mm0 # sched: [4:1.00]
   5430 ; SLM-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
   5431 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   5432 ; SLM-NEXT:    retq # sched: [4:1.00]
   5433 ;
   5434 ; SANDY-LABEL: test_psraw:
   5435 ; SANDY:       # %bb.0:
   5436 ; SANDY-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
   5437 ; SANDY-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
   5438 ; SANDY-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
   5439 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5440 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5441 ;
   5442 ; HASWELL-LABEL: test_psraw:
   5443 ; HASWELL:       # %bb.0:
   5444 ; HASWELL-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
   5445 ; HASWELL-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
   5446 ; HASWELL-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
   5447 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5448 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5449 ;
   5450 ; BROADWELL-LABEL: test_psraw:
   5451 ; BROADWELL:       # %bb.0:
   5452 ; BROADWELL-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
   5453 ; BROADWELL-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
   5454 ; BROADWELL-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
   5455 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5456 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5457 ;
   5458 ; SKYLAKE-LABEL: test_psraw:
   5459 ; SKYLAKE:       # %bb.0:
   5460 ; SKYLAKE-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
   5461 ; SKYLAKE-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
   5462 ; SKYLAKE-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
   5463 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5464 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5465 ;
   5466 ; SKX-LABEL: test_psraw:
   5467 ; SKX:       # %bb.0:
   5468 ; SKX-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
   5469 ; SKX-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
   5470 ; SKX-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
   5471 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5472 ; SKX-NEXT:    retq # sched: [7:1.00]
   5473 ;
   5474 ; BTVER2-LABEL: test_psraw:
   5475 ; BTVER2:       # %bb.0:
   5476 ; BTVER2-NEXT:    psraw %mm1, %mm0 # sched: [1:0.50]
   5477 ; BTVER2-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
   5478 ; BTVER2-NEXT:    psraw $7, %mm0 # sched: [1:0.50]
   5479 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   5480 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5481 ;
   5482 ; ZNVER1-LABEL: test_psraw:
   5483 ; ZNVER1:       # %bb.0:
   5484 ; ZNVER1-NEXT:    psraw %mm1, %mm0 # sched: [1:0.25]
   5485 ; ZNVER1-NEXT:    psraw (%rdi), %mm0 # sched: [8:0.50]
   5486 ; ZNVER1-NEXT:    psraw $7, %mm0 # sched: [1:0.25]
   5487 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5488 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5489   %1 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %a0, x86_mmx %a1)
   5490   %2 = load x86_mmx, x86_mmx *%a2, align 8
   5491   %3 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %1, x86_mmx %2)
   5492   %4 = call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %3, i32 7)
   5493   %5 = bitcast x86_mmx %4 to i64
   5494   ret i64 %5
   5495 }
   5496 declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
   5497 declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
   5498 
   5499 define i64 @test_psrld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   5500 ; GENERIC-LABEL: test_psrld:
   5501 ; GENERIC:       # %bb.0:
   5502 ; GENERIC-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
   5503 ; GENERIC-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
   5504 ; GENERIC-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
   5505 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5506 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5507 ;
   5508 ; ATOM-LABEL: test_psrld:
   5509 ; ATOM:       # %bb.0:
   5510 ; ATOM-NEXT:    psrld %mm1, %mm0 # sched: [2:1.00]
   5511 ; ATOM-NEXT:    psrld (%rdi), %mm0 # sched: [3:1.50]
   5512 ; ATOM-NEXT:    psrld $7, %mm0 # sched: [1:0.50]
   5513 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   5514 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5515 ;
   5516 ; SLM-LABEL: test_psrld:
   5517 ; SLM:       # %bb.0:
   5518 ; SLM-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
   5519 ; SLM-NEXT:    psrld (%rdi), %mm0 # sched: [4:1.00]
   5520 ; SLM-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
   5521 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   5522 ; SLM-NEXT:    retq # sched: [4:1.00]
   5523 ;
   5524 ; SANDY-LABEL: test_psrld:
   5525 ; SANDY:       # %bb.0:
   5526 ; SANDY-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
   5527 ; SANDY-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
   5528 ; SANDY-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
   5529 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5530 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5531 ;
   5532 ; HASWELL-LABEL: test_psrld:
   5533 ; HASWELL:       # %bb.0:
   5534 ; HASWELL-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
   5535 ; HASWELL-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
   5536 ; HASWELL-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
   5537 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5538 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5539 ;
   5540 ; BROADWELL-LABEL: test_psrld:
   5541 ; BROADWELL:       # %bb.0:
   5542 ; BROADWELL-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
   5543 ; BROADWELL-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
   5544 ; BROADWELL-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
   5545 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5546 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5547 ;
   5548 ; SKYLAKE-LABEL: test_psrld:
   5549 ; SKYLAKE:       # %bb.0:
   5550 ; SKYLAKE-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
   5551 ; SKYLAKE-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
   5552 ; SKYLAKE-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
   5553 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5554 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5555 ;
   5556 ; SKX-LABEL: test_psrld:
   5557 ; SKX:       # %bb.0:
   5558 ; SKX-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
   5559 ; SKX-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
   5560 ; SKX-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
   5561 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5562 ; SKX-NEXT:    retq # sched: [7:1.00]
   5563 ;
   5564 ; BTVER2-LABEL: test_psrld:
   5565 ; BTVER2:       # %bb.0:
   5566 ; BTVER2-NEXT:    psrld %mm1, %mm0 # sched: [1:0.50]
   5567 ; BTVER2-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
   5568 ; BTVER2-NEXT:    psrld $7, %mm0 # sched: [1:0.50]
   5569 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   5570 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5571 ;
   5572 ; ZNVER1-LABEL: test_psrld:
   5573 ; ZNVER1:       # %bb.0:
   5574 ; ZNVER1-NEXT:    psrld %mm1, %mm0 # sched: [1:0.25]
   5575 ; ZNVER1-NEXT:    psrld (%rdi), %mm0 # sched: [8:0.50]
   5576 ; ZNVER1-NEXT:    psrld $7, %mm0 # sched: [1:0.25]
   5577 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5578 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5579   %1 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %a0, x86_mmx %a1)
   5580   %2 = load x86_mmx, x86_mmx *%a2, align 8
   5581   %3 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %1, x86_mmx %2)
   5582   %4 = call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %3, i32 7)
   5583   %5 = bitcast x86_mmx %4 to i64
   5584   ret i64 %5
   5585 }
   5586 declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
   5587 declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
   5588 
   5589 define i64 @test_psrlq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   5590 ; GENERIC-LABEL: test_psrlq:
   5591 ; GENERIC:       # %bb.0:
   5592 ; GENERIC-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
   5593 ; GENERIC-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
   5594 ; GENERIC-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
   5595 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5596 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5597 ;
   5598 ; ATOM-LABEL: test_psrlq:
   5599 ; ATOM:       # %bb.0:
   5600 ; ATOM-NEXT:    psrlq %mm1, %mm0 # sched: [2:1.00]
   5601 ; ATOM-NEXT:    psrlq (%rdi), %mm0 # sched: [3:1.50]
   5602 ; ATOM-NEXT:    psrlq $7, %mm0 # sched: [1:0.50]
   5603 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   5604 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5605 ;
   5606 ; SLM-LABEL: test_psrlq:
   5607 ; SLM:       # %bb.0:
   5608 ; SLM-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
   5609 ; SLM-NEXT:    psrlq (%rdi), %mm0 # sched: [4:1.00]
   5610 ; SLM-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
   5611 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   5612 ; SLM-NEXT:    retq # sched: [4:1.00]
   5613 ;
   5614 ; SANDY-LABEL: test_psrlq:
   5615 ; SANDY:       # %bb.0:
   5616 ; SANDY-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
   5617 ; SANDY-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
   5618 ; SANDY-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
   5619 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5620 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5621 ;
   5622 ; HASWELL-LABEL: test_psrlq:
   5623 ; HASWELL:       # %bb.0:
   5624 ; HASWELL-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
   5625 ; HASWELL-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
   5626 ; HASWELL-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
   5627 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5628 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5629 ;
   5630 ; BROADWELL-LABEL: test_psrlq:
   5631 ; BROADWELL:       # %bb.0:
   5632 ; BROADWELL-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
   5633 ; BROADWELL-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
   5634 ; BROADWELL-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
   5635 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5636 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5637 ;
   5638 ; SKYLAKE-LABEL: test_psrlq:
   5639 ; SKYLAKE:       # %bb.0:
   5640 ; SKYLAKE-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
   5641 ; SKYLAKE-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
   5642 ; SKYLAKE-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
   5643 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5644 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5645 ;
   5646 ; SKX-LABEL: test_psrlq:
   5647 ; SKX:       # %bb.0:
   5648 ; SKX-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
   5649 ; SKX-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
   5650 ; SKX-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
   5651 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5652 ; SKX-NEXT:    retq # sched: [7:1.00]
   5653 ;
   5654 ; BTVER2-LABEL: test_psrlq:
   5655 ; BTVER2:       # %bb.0:
   5656 ; BTVER2-NEXT:    psrlq %mm1, %mm0 # sched: [1:0.50]
   5657 ; BTVER2-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
   5658 ; BTVER2-NEXT:    psrlq $7, %mm0 # sched: [1:0.50]
   5659 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   5660 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5661 ;
   5662 ; ZNVER1-LABEL: test_psrlq:
   5663 ; ZNVER1:       # %bb.0:
   5664 ; ZNVER1-NEXT:    psrlq %mm1, %mm0 # sched: [1:0.25]
   5665 ; ZNVER1-NEXT:    psrlq (%rdi), %mm0 # sched: [8:0.50]
   5666 ; ZNVER1-NEXT:    psrlq $7, %mm0 # sched: [1:0.25]
   5667 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5668 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5669   %1 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %a0, x86_mmx %a1)
   5670   %2 = load x86_mmx, x86_mmx *%a2, align 8
   5671   %3 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %1, x86_mmx %2)
   5672   %4 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %3, i32 7)
   5673   %5 = bitcast x86_mmx %4 to i64
   5674   ret i64 %5
   5675 }
   5676 declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
   5677 declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
   5678 
   5679 define i64 @test_psrlw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   5680 ; GENERIC-LABEL: test_psrlw:
   5681 ; GENERIC:       # %bb.0:
   5682 ; GENERIC-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
   5683 ; GENERIC-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
   5684 ; GENERIC-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
   5685 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5686 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5687 ;
   5688 ; ATOM-LABEL: test_psrlw:
   5689 ; ATOM:       # %bb.0:
   5690 ; ATOM-NEXT:    psrlw %mm1, %mm0 # sched: [2:1.00]
   5691 ; ATOM-NEXT:    psrlw (%rdi), %mm0 # sched: [3:1.50]
   5692 ; ATOM-NEXT:    psrlw $7, %mm0 # sched: [1:0.50]
   5693 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   5694 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5695 ;
   5696 ; SLM-LABEL: test_psrlw:
   5697 ; SLM:       # %bb.0:
   5698 ; SLM-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
   5699 ; SLM-NEXT:    psrlw (%rdi), %mm0 # sched: [4:1.00]
   5700 ; SLM-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
   5701 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   5702 ; SLM-NEXT:    retq # sched: [4:1.00]
   5703 ;
   5704 ; SANDY-LABEL: test_psrlw:
   5705 ; SANDY:       # %bb.0:
   5706 ; SANDY-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
   5707 ; SANDY-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
   5708 ; SANDY-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
   5709 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5710 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5711 ;
   5712 ; HASWELL-LABEL: test_psrlw:
   5713 ; HASWELL:       # %bb.0:
   5714 ; HASWELL-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
   5715 ; HASWELL-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
   5716 ; HASWELL-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
   5717 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5718 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5719 ;
   5720 ; BROADWELL-LABEL: test_psrlw:
   5721 ; BROADWELL:       # %bb.0:
   5722 ; BROADWELL-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
   5723 ; BROADWELL-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
   5724 ; BROADWELL-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
   5725 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5726 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5727 ;
   5728 ; SKYLAKE-LABEL: test_psrlw:
   5729 ; SKYLAKE:       # %bb.0:
   5730 ; SKYLAKE-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
   5731 ; SKYLAKE-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
   5732 ; SKYLAKE-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
   5733 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5734 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5735 ;
   5736 ; SKX-LABEL: test_psrlw:
   5737 ; SKX:       # %bb.0:
   5738 ; SKX-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
   5739 ; SKX-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
   5740 ; SKX-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
   5741 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5742 ; SKX-NEXT:    retq # sched: [7:1.00]
   5743 ;
   5744 ; BTVER2-LABEL: test_psrlw:
   5745 ; BTVER2:       # %bb.0:
   5746 ; BTVER2-NEXT:    psrlw %mm1, %mm0 # sched: [1:0.50]
   5747 ; BTVER2-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
   5748 ; BTVER2-NEXT:    psrlw $7, %mm0 # sched: [1:0.50]
   5749 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   5750 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5751 ;
   5752 ; ZNVER1-LABEL: test_psrlw:
   5753 ; ZNVER1:       # %bb.0:
   5754 ; ZNVER1-NEXT:    psrlw %mm1, %mm0 # sched: [1:0.25]
   5755 ; ZNVER1-NEXT:    psrlw (%rdi), %mm0 # sched: [8:0.50]
   5756 ; ZNVER1-NEXT:    psrlw $7, %mm0 # sched: [1:0.25]
   5757 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5758 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5759   %1 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %a0, x86_mmx %a1)
   5760   %2 = load x86_mmx, x86_mmx *%a2, align 8
   5761   %3 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %1, x86_mmx %2)
   5762   %4 = call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %3, i32 7)
   5763   %5 = bitcast x86_mmx %4 to i64
   5764   ret i64 %5
   5765 }
   5766 declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
   5767 declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
   5768 
   5769 define i64 @test_psubb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   5770 ; GENERIC-LABEL: test_psubb:
   5771 ; GENERIC:       # %bb.0:
   5772 ; GENERIC-NEXT:    psubb %mm1, %mm0 # sched: [3:1.00]
   5773 ; GENERIC-NEXT:    psubb (%rdi), %mm0 # sched: [8:1.00]
   5774 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5775 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5776 ;
   5777 ; ATOM-LABEL: test_psubb:
   5778 ; ATOM:       # %bb.0:
   5779 ; ATOM-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
   5780 ; ATOM-NEXT:    psubb (%rdi), %mm0 # sched: [1:1.00]
   5781 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   5782 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5783 ;
   5784 ; SLM-LABEL: test_psubb:
   5785 ; SLM:       # %bb.0:
   5786 ; SLM-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
   5787 ; SLM-NEXT:    psubb (%rdi), %mm0 # sched: [4:1.00]
   5788 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   5789 ; SLM-NEXT:    retq # sched: [4:1.00]
   5790 ;
   5791 ; SANDY-LABEL: test_psubb:
   5792 ; SANDY:       # %bb.0:
   5793 ; SANDY-NEXT:    psubb %mm1, %mm0 # sched: [3:1.00]
   5794 ; SANDY-NEXT:    psubb (%rdi), %mm0 # sched: [8:1.00]
   5795 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5796 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5797 ;
   5798 ; HASWELL-LABEL: test_psubb:
   5799 ; HASWELL:       # %bb.0:
   5800 ; HASWELL-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
   5801 ; HASWELL-NEXT:    psubb (%rdi), %mm0 # sched: [6:0.50]
   5802 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5803 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5804 ;
   5805 ; BROADWELL-LABEL: test_psubb:
   5806 ; BROADWELL:       # %bb.0:
   5807 ; BROADWELL-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
   5808 ; BROADWELL-NEXT:    psubb (%rdi), %mm0 # sched: [6:0.50]
   5809 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5810 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5811 ;
   5812 ; SKYLAKE-LABEL: test_psubb:
   5813 ; SKYLAKE:       # %bb.0:
   5814 ; SKYLAKE-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
   5815 ; SKYLAKE-NEXT:    psubb (%rdi), %mm0 # sched: [6:0.50]
   5816 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5817 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5818 ;
   5819 ; SKX-LABEL: test_psubb:
   5820 ; SKX:       # %bb.0:
   5821 ; SKX-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
   5822 ; SKX-NEXT:    psubb (%rdi), %mm0 # sched: [6:0.50]
   5823 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5824 ; SKX-NEXT:    retq # sched: [7:1.00]
   5825 ;
   5826 ; BTVER2-LABEL: test_psubb:
   5827 ; BTVER2:       # %bb.0:
   5828 ; BTVER2-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
   5829 ; BTVER2-NEXT:    psubb (%rdi), %mm0 # sched: [6:1.00]
   5830 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   5831 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5832 ;
   5833 ; ZNVER1-LABEL: test_psubb:
   5834 ; ZNVER1:       # %bb.0:
   5835 ; ZNVER1-NEXT:    psubb %mm1, %mm0 # sched: [1:0.25]
   5836 ; ZNVER1-NEXT:    psubb (%rdi), %mm0 # sched: [8:0.50]
   5837 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5838 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5839   %1 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %a0, x86_mmx %a1)
   5840   %2 = load x86_mmx, x86_mmx *%a2, align 8
   5841   %3 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %1, x86_mmx %2)
   5842   %4 = bitcast x86_mmx %3 to i64
   5843   ret i64 %4
   5844 }
   5845 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
   5846 
   5847 define i64 @test_psubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   5848 ; GENERIC-LABEL: test_psubd:
   5849 ; GENERIC:       # %bb.0:
   5850 ; GENERIC-NEXT:    psubd %mm1, %mm0 # sched: [3:1.00]
   5851 ; GENERIC-NEXT:    psubd (%rdi), %mm0 # sched: [8:1.00]
   5852 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5853 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5854 ;
   5855 ; ATOM-LABEL: test_psubd:
   5856 ; ATOM:       # %bb.0:
   5857 ; ATOM-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
   5858 ; ATOM-NEXT:    psubd (%rdi), %mm0 # sched: [1:1.00]
   5859 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   5860 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5861 ;
   5862 ; SLM-LABEL: test_psubd:
   5863 ; SLM:       # %bb.0:
   5864 ; SLM-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
   5865 ; SLM-NEXT:    psubd (%rdi), %mm0 # sched: [4:1.00]
   5866 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   5867 ; SLM-NEXT:    retq # sched: [4:1.00]
   5868 ;
   5869 ; SANDY-LABEL: test_psubd:
   5870 ; SANDY:       # %bb.0:
   5871 ; SANDY-NEXT:    psubd %mm1, %mm0 # sched: [3:1.00]
   5872 ; SANDY-NEXT:    psubd (%rdi), %mm0 # sched: [8:1.00]
   5873 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5874 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5875 ;
   5876 ; HASWELL-LABEL: test_psubd:
   5877 ; HASWELL:       # %bb.0:
   5878 ; HASWELL-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
   5879 ; HASWELL-NEXT:    psubd (%rdi), %mm0 # sched: [6:0.50]
   5880 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5881 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5882 ;
   5883 ; BROADWELL-LABEL: test_psubd:
   5884 ; BROADWELL:       # %bb.0:
   5885 ; BROADWELL-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
   5886 ; BROADWELL-NEXT:    psubd (%rdi), %mm0 # sched: [6:0.50]
   5887 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5888 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5889 ;
   5890 ; SKYLAKE-LABEL: test_psubd:
   5891 ; SKYLAKE:       # %bb.0:
   5892 ; SKYLAKE-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
   5893 ; SKYLAKE-NEXT:    psubd (%rdi), %mm0 # sched: [6:0.50]
   5894 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5895 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5896 ;
   5897 ; SKX-LABEL: test_psubd:
   5898 ; SKX:       # %bb.0:
   5899 ; SKX-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
   5900 ; SKX-NEXT:    psubd (%rdi), %mm0 # sched: [6:0.50]
   5901 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5902 ; SKX-NEXT:    retq # sched: [7:1.00]
   5903 ;
   5904 ; BTVER2-LABEL: test_psubd:
   5905 ; BTVER2:       # %bb.0:
   5906 ; BTVER2-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
   5907 ; BTVER2-NEXT:    psubd (%rdi), %mm0 # sched: [6:1.00]
   5908 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   5909 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5910 ;
   5911 ; ZNVER1-LABEL: test_psubd:
   5912 ; ZNVER1:       # %bb.0:
   5913 ; ZNVER1-NEXT:    psubd %mm1, %mm0 # sched: [1:0.25]
   5914 ; ZNVER1-NEXT:    psubd (%rdi), %mm0 # sched: [8:0.50]
   5915 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5916 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5917   %1 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %a0, x86_mmx %a1)
   5918   %2 = load x86_mmx, x86_mmx *%a2, align 8
   5919   %3 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %1, x86_mmx %2)
   5920   %4 = bitcast x86_mmx %3 to i64
   5921   ret i64 %4
   5922 }
   5923 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
   5924 
   5925 define i64 @test_psubq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   5926 ; GENERIC-LABEL: test_psubq:
   5927 ; GENERIC:       # %bb.0:
   5928 ; GENERIC-NEXT:    psubq %mm1, %mm0 # sched: [3:1.00]
   5929 ; GENERIC-NEXT:    psubq (%rdi), %mm0 # sched: [8:1.00]
   5930 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5931 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5932 ;
   5933 ; ATOM-LABEL: test_psubq:
   5934 ; ATOM:       # %bb.0:
   5935 ; ATOM-NEXT:    psubq %mm1, %mm0 # sched: [2:1.00]
   5936 ; ATOM-NEXT:    psubq (%rdi), %mm0 # sched: [3:1.50]
   5937 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   5938 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5939 ;
   5940 ; SLM-LABEL: test_psubq:
   5941 ; SLM:       # %bb.0:
   5942 ; SLM-NEXT:    psubq %mm1, %mm0 # sched: [1:0.50]
   5943 ; SLM-NEXT:    psubq (%rdi), %mm0 # sched: [4:1.00]
   5944 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   5945 ; SLM-NEXT:    retq # sched: [4:1.00]
   5946 ;
   5947 ; SANDY-LABEL: test_psubq:
   5948 ; SANDY:       # %bb.0:
   5949 ; SANDY-NEXT:    psubq %mm1, %mm0 # sched: [3:1.00]
   5950 ; SANDY-NEXT:    psubq (%rdi), %mm0 # sched: [8:1.00]
   5951 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5952 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5953 ;
   5954 ; HASWELL-LABEL: test_psubq:
   5955 ; HASWELL:       # %bb.0:
   5956 ; HASWELL-NEXT:    psubq %mm1, %mm0 # sched: [1:0.50]
   5957 ; HASWELL-NEXT:    psubq (%rdi), %mm0 # sched: [6:0.50]
   5958 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5959 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5960 ;
   5961 ; BROADWELL-LABEL: test_psubq:
   5962 ; BROADWELL:       # %bb.0:
   5963 ; BROADWELL-NEXT:    psubq %mm1, %mm0 # sched: [1:0.50]
   5964 ; BROADWELL-NEXT:    psubq (%rdi), %mm0 # sched: [6:0.50]
   5965 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   5966 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5967 ;
   5968 ; SKYLAKE-LABEL: test_psubq:
   5969 ; SKYLAKE:       # %bb.0:
   5970 ; SKYLAKE-NEXT:    psubq %mm1, %mm0 # sched: [1:0.50]
   5971 ; SKYLAKE-NEXT:    psubq (%rdi), %mm0 # sched: [6:0.50]
   5972 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5973 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5974 ;
   5975 ; SKX-LABEL: test_psubq:
   5976 ; SKX:       # %bb.0:
   5977 ; SKX-NEXT:    psubq %mm1, %mm0 # sched: [1:0.50]
   5978 ; SKX-NEXT:    psubq (%rdi), %mm0 # sched: [6:0.50]
   5979 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5980 ; SKX-NEXT:    retq # sched: [7:1.00]
   5981 ;
   5982 ; BTVER2-LABEL: test_psubq:
   5983 ; BTVER2:       # %bb.0:
   5984 ; BTVER2-NEXT:    psubq %mm1, %mm0 # sched: [1:0.50]
   5985 ; BTVER2-NEXT:    psubq (%rdi), %mm0 # sched: [6:1.00]
   5986 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   5987 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5988 ;
   5989 ; ZNVER1-LABEL: test_psubq:
   5990 ; ZNVER1:       # %bb.0:
   5991 ; ZNVER1-NEXT:    psubq %mm1, %mm0 # sched: [1:0.25]
   5992 ; ZNVER1-NEXT:    psubq (%rdi), %mm0 # sched: [8:0.50]
   5993 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   5994 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5995   %1 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %a0, x86_mmx %a1)
   5996   %2 = load x86_mmx, x86_mmx *%a2, align 8
   5997   %3 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %1, x86_mmx %2)
   5998   %4 = bitcast x86_mmx %3 to i64
   5999   ret i64 %4
   6000 }
   6001 declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
   6002 
   6003 define i64 @test_psubsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   6004 ; GENERIC-LABEL: test_psubsb:
   6005 ; GENERIC:       # %bb.0:
   6006 ; GENERIC-NEXT:    psubsb %mm1, %mm0 # sched: [3:1.00]
   6007 ; GENERIC-NEXT:    psubsb (%rdi), %mm0 # sched: [8:1.00]
   6008 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6009 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6010 ;
   6011 ; ATOM-LABEL: test_psubsb:
   6012 ; ATOM:       # %bb.0:
   6013 ; ATOM-NEXT:    psubsb %mm1, %mm0 # sched: [1:0.50]
   6014 ; ATOM-NEXT:    psubsb (%rdi), %mm0 # sched: [1:1.00]
   6015 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   6016 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6017 ;
   6018 ; SLM-LABEL: test_psubsb:
   6019 ; SLM:       # %bb.0:
   6020 ; SLM-NEXT:    psubsb %mm1, %mm0 # sched: [1:0.50]
   6021 ; SLM-NEXT:    psubsb (%rdi), %mm0 # sched: [4:1.00]
   6022 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   6023 ; SLM-NEXT:    retq # sched: [4:1.00]
   6024 ;
   6025 ; SANDY-LABEL: test_psubsb:
   6026 ; SANDY:       # %bb.0:
   6027 ; SANDY-NEXT:    psubsb %mm1, %mm0 # sched: [3:1.00]
   6028 ; SANDY-NEXT:    psubsb (%rdi), %mm0 # sched: [8:1.00]
   6029 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6030 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6031 ;
   6032 ; HASWELL-LABEL: test_psubsb:
   6033 ; HASWELL:       # %bb.0:
   6034 ; HASWELL-NEXT:    psubsb %mm1, %mm0 # sched: [1:0.50]
   6035 ; HASWELL-NEXT:    psubsb (%rdi), %mm0 # sched: [6:0.50]
   6036 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6037 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6038 ;
   6039 ; BROADWELL-LABEL: test_psubsb:
   6040 ; BROADWELL:       # %bb.0:
   6041 ; BROADWELL-NEXT:    psubsb %mm1, %mm0 # sched: [1:0.50]
   6042 ; BROADWELL-NEXT:    psubsb (%rdi), %mm0 # sched: [6:0.50]
   6043 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6044 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6045 ;
   6046 ; SKYLAKE-LABEL: test_psubsb:
   6047 ; SKYLAKE:       # %bb.0:
   6048 ; SKYLAKE-NEXT:    psubsb %mm1, %mm0 # sched: [1:1.00]
   6049 ; SKYLAKE-NEXT:    psubsb (%rdi), %mm0 # sched: [6:1.00]
   6050 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6051 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6052 ;
   6053 ; SKX-LABEL: test_psubsb:
   6054 ; SKX:       # %bb.0:
   6055 ; SKX-NEXT:    psubsb %mm1, %mm0 # sched: [1:1.00]
   6056 ; SKX-NEXT:    psubsb (%rdi), %mm0 # sched: [6:1.00]
   6057 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6058 ; SKX-NEXT:    retq # sched: [7:1.00]
   6059 ;
   6060 ; BTVER2-LABEL: test_psubsb:
   6061 ; BTVER2:       # %bb.0:
   6062 ; BTVER2-NEXT:    psubsb %mm1, %mm0 # sched: [1:0.50]
   6063 ; BTVER2-NEXT:    psubsb (%rdi), %mm0 # sched: [6:1.00]
   6064 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   6065 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6066 ;
   6067 ; ZNVER1-LABEL: test_psubsb:
   6068 ; ZNVER1:       # %bb.0:
   6069 ; ZNVER1-NEXT:    psubsb %mm1, %mm0 # sched: [1:0.25]
   6070 ; ZNVER1-NEXT:    psubsb (%rdi), %mm0 # sched: [8:0.50]
   6071 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6072 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6073   %1 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %a0, x86_mmx %a1)
   6074   %2 = load x86_mmx, x86_mmx *%a2, align 8
   6075   %3 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %1, x86_mmx %2)
   6076   %4 = bitcast x86_mmx %3 to i64
   6077   ret i64 %4
   6078 }
   6079 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
   6080 
   6081 define i64 @test_psubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   6082 ; GENERIC-LABEL: test_psubsw:
   6083 ; GENERIC:       # %bb.0:
   6084 ; GENERIC-NEXT:    psubsw %mm1, %mm0 # sched: [3:1.00]
   6085 ; GENERIC-NEXT:    psubsw (%rdi), %mm0 # sched: [8:1.00]
   6086 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6087 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6088 ;
   6089 ; ATOM-LABEL: test_psubsw:
   6090 ; ATOM:       # %bb.0:
   6091 ; ATOM-NEXT:    psubsw %mm1, %mm0 # sched: [1:0.50]
   6092 ; ATOM-NEXT:    psubsw (%rdi), %mm0 # sched: [1:1.00]
   6093 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   6094 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6095 ;
   6096 ; SLM-LABEL: test_psubsw:
   6097 ; SLM:       # %bb.0:
   6098 ; SLM-NEXT:    psubsw %mm1, %mm0 # sched: [1:0.50]
   6099 ; SLM-NEXT:    psubsw (%rdi), %mm0 # sched: [4:1.00]
   6100 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   6101 ; SLM-NEXT:    retq # sched: [4:1.00]
   6102 ;
   6103 ; SANDY-LABEL: test_psubsw:
   6104 ; SANDY:       # %bb.0:
   6105 ; SANDY-NEXT:    psubsw %mm1, %mm0 # sched: [3:1.00]
   6106 ; SANDY-NEXT:    psubsw (%rdi), %mm0 # sched: [8:1.00]
   6107 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6108 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6109 ;
   6110 ; HASWELL-LABEL: test_psubsw:
   6111 ; HASWELL:       # %bb.0:
   6112 ; HASWELL-NEXT:    psubsw %mm1, %mm0 # sched: [1:0.50]
   6113 ; HASWELL-NEXT:    psubsw (%rdi), %mm0 # sched: [6:0.50]
   6114 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6115 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6116 ;
   6117 ; BROADWELL-LABEL: test_psubsw:
   6118 ; BROADWELL:       # %bb.0:
   6119 ; BROADWELL-NEXT:    psubsw %mm1, %mm0 # sched: [1:0.50]
   6120 ; BROADWELL-NEXT:    psubsw (%rdi), %mm0 # sched: [6:0.50]
   6121 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6122 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6123 ;
   6124 ; SKYLAKE-LABEL: test_psubsw:
   6125 ; SKYLAKE:       # %bb.0:
   6126 ; SKYLAKE-NEXT:    psubsw %mm1, %mm0 # sched: [1:1.00]
   6127 ; SKYLAKE-NEXT:    psubsw (%rdi), %mm0 # sched: [6:1.00]
   6128 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6129 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6130 ;
   6131 ; SKX-LABEL: test_psubsw:
   6132 ; SKX:       # %bb.0:
   6133 ; SKX-NEXT:    psubsw %mm1, %mm0 # sched: [1:1.00]
   6134 ; SKX-NEXT:    psubsw (%rdi), %mm0 # sched: [6:1.00]
   6135 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6136 ; SKX-NEXT:    retq # sched: [7:1.00]
   6137 ;
   6138 ; BTVER2-LABEL: test_psubsw:
   6139 ; BTVER2:       # %bb.0:
   6140 ; BTVER2-NEXT:    psubsw %mm1, %mm0 # sched: [1:0.50]
   6141 ; BTVER2-NEXT:    psubsw (%rdi), %mm0 # sched: [6:1.00]
   6142 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   6143 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6144 ;
   6145 ; ZNVER1-LABEL: test_psubsw:
   6146 ; ZNVER1:       # %bb.0:
   6147 ; ZNVER1-NEXT:    psubsw %mm1, %mm0 # sched: [1:0.25]
   6148 ; ZNVER1-NEXT:    psubsw (%rdi), %mm0 # sched: [8:0.50]
   6149 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6150 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6151   %1 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %a0, x86_mmx %a1)
   6152   %2 = load x86_mmx, x86_mmx *%a2, align 8
   6153   %3 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %1, x86_mmx %2)
   6154   %4 = bitcast x86_mmx %3 to i64
   6155   ret i64 %4
   6156 }
   6157 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
   6158 
   6159 define i64 @test_psubusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   6160 ; GENERIC-LABEL: test_psubusb:
   6161 ; GENERIC:       # %bb.0:
   6162 ; GENERIC-NEXT:    psubusb %mm1, %mm0 # sched: [3:1.00]
   6163 ; GENERIC-NEXT:    psubusb (%rdi), %mm0 # sched: [8:1.00]
   6164 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6165 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6166 ;
   6167 ; ATOM-LABEL: test_psubusb:
   6168 ; ATOM:       # %bb.0:
   6169 ; ATOM-NEXT:    psubusb %mm1, %mm0 # sched: [1:0.50]
   6170 ; ATOM-NEXT:    psubusb (%rdi), %mm0 # sched: [1:1.00]
   6171 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   6172 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6173 ;
   6174 ; SLM-LABEL: test_psubusb:
   6175 ; SLM:       # %bb.0:
   6176 ; SLM-NEXT:    psubusb %mm1, %mm0 # sched: [1:0.50]
   6177 ; SLM-NEXT:    psubusb (%rdi), %mm0 # sched: [4:1.00]
   6178 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   6179 ; SLM-NEXT:    retq # sched: [4:1.00]
   6180 ;
   6181 ; SANDY-LABEL: test_psubusb:
   6182 ; SANDY:       # %bb.0:
   6183 ; SANDY-NEXT:    psubusb %mm1, %mm0 # sched: [3:1.00]
   6184 ; SANDY-NEXT:    psubusb (%rdi), %mm0 # sched: [8:1.00]
   6185 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6186 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6187 ;
   6188 ; HASWELL-LABEL: test_psubusb:
   6189 ; HASWELL:       # %bb.0:
   6190 ; HASWELL-NEXT:    psubusb %mm1, %mm0 # sched: [1:0.50]
   6191 ; HASWELL-NEXT:    psubusb (%rdi), %mm0 # sched: [6:0.50]
   6192 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6193 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6194 ;
   6195 ; BROADWELL-LABEL: test_psubusb:
   6196 ; BROADWELL:       # %bb.0:
   6197 ; BROADWELL-NEXT:    psubusb %mm1, %mm0 # sched: [1:0.50]
   6198 ; BROADWELL-NEXT:    psubusb (%rdi), %mm0 # sched: [6:0.50]
   6199 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6200 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6201 ;
   6202 ; SKYLAKE-LABEL: test_psubusb:
   6203 ; SKYLAKE:       # %bb.0:
   6204 ; SKYLAKE-NEXT:    psubusb %mm1, %mm0 # sched: [1:1.00]
   6205 ; SKYLAKE-NEXT:    psubusb (%rdi), %mm0 # sched: [6:1.00]
   6206 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6207 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6208 ;
   6209 ; SKX-LABEL: test_psubusb:
   6210 ; SKX:       # %bb.0:
   6211 ; SKX-NEXT:    psubusb %mm1, %mm0 # sched: [1:1.00]
   6212 ; SKX-NEXT:    psubusb (%rdi), %mm0 # sched: [6:1.00]
   6213 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6214 ; SKX-NEXT:    retq # sched: [7:1.00]
   6215 ;
   6216 ; BTVER2-LABEL: test_psubusb:
   6217 ; BTVER2:       # %bb.0:
   6218 ; BTVER2-NEXT:    psubusb %mm1, %mm0 # sched: [1:0.50]
   6219 ; BTVER2-NEXT:    psubusb (%rdi), %mm0 # sched: [6:1.00]
   6220 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   6221 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6222 ;
   6223 ; ZNVER1-LABEL: test_psubusb:
   6224 ; ZNVER1:       # %bb.0:
   6225 ; ZNVER1-NEXT:    psubusb %mm1, %mm0 # sched: [1:0.25]
   6226 ; ZNVER1-NEXT:    psubusb (%rdi), %mm0 # sched: [8:0.50]
   6227 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6228 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6229   %1 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %a0, x86_mmx %a1)
   6230   %2 = load x86_mmx, x86_mmx *%a2, align 8
   6231   %3 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %1, x86_mmx %2)
   6232   %4 = bitcast x86_mmx %3 to i64
   6233   ret i64 %4
   6234 }
   6235 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
   6236 
   6237 define i64 @test_psubusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   6238 ; GENERIC-LABEL: test_psubusw:
   6239 ; GENERIC:       # %bb.0:
   6240 ; GENERIC-NEXT:    psubusw %mm1, %mm0 # sched: [3:1.00]
   6241 ; GENERIC-NEXT:    psubusw (%rdi), %mm0 # sched: [8:1.00]
   6242 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6243 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6244 ;
   6245 ; ATOM-LABEL: test_psubusw:
   6246 ; ATOM:       # %bb.0:
   6247 ; ATOM-NEXT:    psubusw %mm1, %mm0 # sched: [1:0.50]
   6248 ; ATOM-NEXT:    psubusw (%rdi), %mm0 # sched: [1:1.00]
   6249 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   6250 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6251 ;
   6252 ; SLM-LABEL: test_psubusw:
   6253 ; SLM:       # %bb.0:
   6254 ; SLM-NEXT:    psubusw %mm1, %mm0 # sched: [1:0.50]
   6255 ; SLM-NEXT:    psubusw (%rdi), %mm0 # sched: [4:1.00]
   6256 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   6257 ; SLM-NEXT:    retq # sched: [4:1.00]
   6258 ;
   6259 ; SANDY-LABEL: test_psubusw:
   6260 ; SANDY:       # %bb.0:
   6261 ; SANDY-NEXT:    psubusw %mm1, %mm0 # sched: [3:1.00]
   6262 ; SANDY-NEXT:    psubusw (%rdi), %mm0 # sched: [8:1.00]
   6263 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6264 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6265 ;
   6266 ; HASWELL-LABEL: test_psubusw:
   6267 ; HASWELL:       # %bb.0:
   6268 ; HASWELL-NEXT:    psubusw %mm1, %mm0 # sched: [1:0.50]
   6269 ; HASWELL-NEXT:    psubusw (%rdi), %mm0 # sched: [6:0.50]
   6270 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6271 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6272 ;
   6273 ; BROADWELL-LABEL: test_psubusw:
   6274 ; BROADWELL:       # %bb.0:
   6275 ; BROADWELL-NEXT:    psubusw %mm1, %mm0 # sched: [1:0.50]
   6276 ; BROADWELL-NEXT:    psubusw (%rdi), %mm0 # sched: [6:0.50]
   6277 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6278 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6279 ;
   6280 ; SKYLAKE-LABEL: test_psubusw:
   6281 ; SKYLAKE:       # %bb.0:
   6282 ; SKYLAKE-NEXT:    psubusw %mm1, %mm0 # sched: [1:1.00]
   6283 ; SKYLAKE-NEXT:    psubusw (%rdi), %mm0 # sched: [6:1.00]
   6284 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6285 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6286 ;
   6287 ; SKX-LABEL: test_psubusw:
   6288 ; SKX:       # %bb.0:
   6289 ; SKX-NEXT:    psubusw %mm1, %mm0 # sched: [1:1.00]
   6290 ; SKX-NEXT:    psubusw (%rdi), %mm0 # sched: [6:1.00]
   6291 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6292 ; SKX-NEXT:    retq # sched: [7:1.00]
   6293 ;
   6294 ; BTVER2-LABEL: test_psubusw:
   6295 ; BTVER2:       # %bb.0:
   6296 ; BTVER2-NEXT:    psubusw %mm1, %mm0 # sched: [1:0.50]
   6297 ; BTVER2-NEXT:    psubusw (%rdi), %mm0 # sched: [6:1.00]
   6298 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   6299 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6300 ;
   6301 ; ZNVER1-LABEL: test_psubusw:
   6302 ; ZNVER1:       # %bb.0:
   6303 ; ZNVER1-NEXT:    psubusw %mm1, %mm0 # sched: [1:0.25]
   6304 ; ZNVER1-NEXT:    psubusw (%rdi), %mm0 # sched: [8:0.50]
   6305 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6306 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6307   %1 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %a0, x86_mmx %a1)
   6308   %2 = load x86_mmx, x86_mmx *%a2, align 8
   6309   %3 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %1, x86_mmx %2)
   6310   %4 = bitcast x86_mmx %3 to i64
   6311   ret i64 %4
   6312 }
   6313 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
   6314 
   6315 define i64 @test_psubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   6316 ; GENERIC-LABEL: test_psubw:
   6317 ; GENERIC:       # %bb.0:
   6318 ; GENERIC-NEXT:    psubw %mm1, %mm0 # sched: [3:1.00]
   6319 ; GENERIC-NEXT:    psubw (%rdi), %mm0 # sched: [8:1.00]
   6320 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6321 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6322 ;
   6323 ; ATOM-LABEL: test_psubw:
   6324 ; ATOM:       # %bb.0:
   6325 ; ATOM-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
   6326 ; ATOM-NEXT:    psubw (%rdi), %mm0 # sched: [1:1.00]
   6327 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   6328 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6329 ;
   6330 ; SLM-LABEL: test_psubw:
   6331 ; SLM:       # %bb.0:
   6332 ; SLM-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
   6333 ; SLM-NEXT:    psubw (%rdi), %mm0 # sched: [4:1.00]
   6334 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   6335 ; SLM-NEXT:    retq # sched: [4:1.00]
   6336 ;
   6337 ; SANDY-LABEL: test_psubw:
   6338 ; SANDY:       # %bb.0:
   6339 ; SANDY-NEXT:    psubw %mm1, %mm0 # sched: [3:1.00]
   6340 ; SANDY-NEXT:    psubw (%rdi), %mm0 # sched: [8:1.00]
   6341 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6342 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6343 ;
   6344 ; HASWELL-LABEL: test_psubw:
   6345 ; HASWELL:       # %bb.0:
   6346 ; HASWELL-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
   6347 ; HASWELL-NEXT:    psubw (%rdi), %mm0 # sched: [6:0.50]
   6348 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6349 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6350 ;
   6351 ; BROADWELL-LABEL: test_psubw:
   6352 ; BROADWELL:       # %bb.0:
   6353 ; BROADWELL-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
   6354 ; BROADWELL-NEXT:    psubw (%rdi), %mm0 # sched: [6:0.50]
   6355 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6356 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6357 ;
   6358 ; SKYLAKE-LABEL: test_psubw:
   6359 ; SKYLAKE:       # %bb.0:
   6360 ; SKYLAKE-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
   6361 ; SKYLAKE-NEXT:    psubw (%rdi), %mm0 # sched: [6:0.50]
   6362 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6363 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6364 ;
   6365 ; SKX-LABEL: test_psubw:
   6366 ; SKX:       # %bb.0:
   6367 ; SKX-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
   6368 ; SKX-NEXT:    psubw (%rdi), %mm0 # sched: [6:0.50]
   6369 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6370 ; SKX-NEXT:    retq # sched: [7:1.00]
   6371 ;
   6372 ; BTVER2-LABEL: test_psubw:
   6373 ; BTVER2:       # %bb.0:
   6374 ; BTVER2-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
   6375 ; BTVER2-NEXT:    psubw (%rdi), %mm0 # sched: [6:1.00]
   6376 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   6377 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6378 ;
   6379 ; ZNVER1-LABEL: test_psubw:
   6380 ; ZNVER1:       # %bb.0:
   6381 ; ZNVER1-NEXT:    psubw %mm1, %mm0 # sched: [1:0.25]
   6382 ; ZNVER1-NEXT:    psubw (%rdi), %mm0 # sched: [8:0.50]
   6383 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6384 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6385   %1 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %a0, x86_mmx %a1)
   6386   %2 = load x86_mmx, x86_mmx *%a2, align 8
   6387   %3 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %1, x86_mmx %2)
   6388   %4 = bitcast x86_mmx %3 to i64
   6389   ret i64 %4
   6390 }
   6391 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
   6392 
   6393 define i64 @test_punpckhbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   6394 ; GENERIC-LABEL: test_punpckhbw:
   6395 ; GENERIC:       # %bb.0:
   6396 ; GENERIC-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
   6397 ; GENERIC-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
   6398 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6399 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6400 ;
   6401 ; ATOM-LABEL: test_punpckhbw:
   6402 ; ATOM:       # %bb.0:
   6403 ; ATOM-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.50]
   6404 ; ATOM-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [1:1.00]
   6405 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   6406 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6407 ;
   6408 ; SLM-LABEL: test_punpckhbw:
   6409 ; SLM:       # %bb.0:
   6410 ; SLM-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
   6411 ; SLM-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [4:1.00]
   6412 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   6413 ; SLM-NEXT:    retq # sched: [4:1.00]
   6414 ;
   6415 ; SANDY-LABEL: test_punpckhbw:
   6416 ; SANDY:       # %bb.0:
   6417 ; SANDY-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
   6418 ; SANDY-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
   6419 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6420 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6421 ;
   6422 ; HASWELL-LABEL: test_punpckhbw:
   6423 ; HASWELL:       # %bb.0:
   6424 ; HASWELL-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
   6425 ; HASWELL-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
   6426 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6427 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6428 ;
   6429 ; BROADWELL-LABEL: test_punpckhbw:
   6430 ; BROADWELL:       # %bb.0:
   6431 ; BROADWELL-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
   6432 ; BROADWELL-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
   6433 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6434 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6435 ;
   6436 ; SKYLAKE-LABEL: test_punpckhbw:
   6437 ; SKYLAKE:       # %bb.0:
   6438 ; SKYLAKE-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
   6439 ; SKYLAKE-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
   6440 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6441 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6442 ;
   6443 ; SKX-LABEL: test_punpckhbw:
   6444 ; SKX:       # %bb.0:
   6445 ; SKX-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
   6446 ; SKX-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
   6447 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6448 ; SKX-NEXT:    retq # sched: [7:1.00]
   6449 ;
   6450 ; BTVER2-LABEL: test_punpckhbw:
   6451 ; BTVER2:       # %bb.0:
   6452 ; BTVER2-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.50]
   6453 ; BTVER2-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
   6454 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   6455 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6456 ;
   6457 ; ZNVER1-LABEL: test_punpckhbw:
   6458 ; ZNVER1:       # %bb.0:
   6459 ; ZNVER1-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.25]
   6460 ; ZNVER1-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [8:0.50]
   6461 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6462 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6463   %1 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %a0, x86_mmx %a1)
   6464   %2 = load x86_mmx, x86_mmx *%a2, align 8
   6465   %3 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %1, x86_mmx %2)
   6466   %4 = bitcast x86_mmx %3 to i64
   6467   ret i64 %4
   6468 }
   6469 declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
   6470 
   6471 define i64 @test_punpckhdq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   6472 ; GENERIC-LABEL: test_punpckhdq:
   6473 ; GENERIC:       # %bb.0:
   6474 ; GENERIC-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
   6475 ; GENERIC-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
   6476 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6477 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6478 ;
   6479 ; ATOM-LABEL: test_punpckhdq:
   6480 ; ATOM:       # %bb.0:
   6481 ; ATOM-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.50]
   6482 ; ATOM-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [1:1.00]
   6483 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   6484 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6485 ;
   6486 ; SLM-LABEL: test_punpckhdq:
   6487 ; SLM:       # %bb.0:
   6488 ; SLM-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
   6489 ; SLM-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [4:1.00]
   6490 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   6491 ; SLM-NEXT:    retq # sched: [4:1.00]
   6492 ;
   6493 ; SANDY-LABEL: test_punpckhdq:
   6494 ; SANDY:       # %bb.0:
   6495 ; SANDY-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
   6496 ; SANDY-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
   6497 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6498 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6499 ;
   6500 ; HASWELL-LABEL: test_punpckhdq:
   6501 ; HASWELL:       # %bb.0:
   6502 ; HASWELL-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
   6503 ; HASWELL-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
   6504 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6505 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6506 ;
   6507 ; BROADWELL-LABEL: test_punpckhdq:
   6508 ; BROADWELL:       # %bb.0:
   6509 ; BROADWELL-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
   6510 ; BROADWELL-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
   6511 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6512 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6513 ;
   6514 ; SKYLAKE-LABEL: test_punpckhdq:
   6515 ; SKYLAKE:       # %bb.0:
   6516 ; SKYLAKE-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
   6517 ; SKYLAKE-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
   6518 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6519 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6520 ;
   6521 ; SKX-LABEL: test_punpckhdq:
   6522 ; SKX:       # %bb.0:
   6523 ; SKX-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
   6524 ; SKX-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
   6525 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6526 ; SKX-NEXT:    retq # sched: [7:1.00]
   6527 ;
   6528 ; BTVER2-LABEL: test_punpckhdq:
   6529 ; BTVER2:       # %bb.0:
   6530 ; BTVER2-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.50]
   6531 ; BTVER2-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
   6532 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   6533 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6534 ;
   6535 ; ZNVER1-LABEL: test_punpckhdq:
   6536 ; ZNVER1:       # %bb.0:
   6537 ; ZNVER1-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.25]
   6538 ; ZNVER1-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [8:0.50]
   6539 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6540 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6541   %1 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %a0, x86_mmx %a1)
   6542   %2 = load x86_mmx, x86_mmx *%a2, align 8
   6543   %3 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %1, x86_mmx %2)
   6544   %4 = bitcast x86_mmx %3 to i64
   6545   ret i64 %4
   6546 }
   6547 declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
   6548 
   6549 define i64 @test_punpckhwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   6550 ; GENERIC-LABEL: test_punpckhwd:
   6551 ; GENERIC:       # %bb.0:
   6552 ; GENERIC-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
   6553 ; GENERIC-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
   6554 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6555 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6556 ;
   6557 ; ATOM-LABEL: test_punpckhwd:
   6558 ; ATOM:       # %bb.0:
   6559 ; ATOM-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50]
   6560 ; ATOM-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00]
   6561 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   6562 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6563 ;
   6564 ; SLM-LABEL: test_punpckhwd:
   6565 ; SLM:       # %bb.0:
   6566 ; SLM-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
   6567 ; SLM-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [4:1.00]
   6568 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   6569 ; SLM-NEXT:    retq # sched: [4:1.00]
   6570 ;
   6571 ; SANDY-LABEL: test_punpckhwd:
   6572 ; SANDY:       # %bb.0:
   6573 ; SANDY-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
   6574 ; SANDY-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
   6575 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6576 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6577 ;
   6578 ; HASWELL-LABEL: test_punpckhwd:
   6579 ; HASWELL:       # %bb.0:
   6580 ; HASWELL-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
   6581 ; HASWELL-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
   6582 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6583 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6584 ;
   6585 ; BROADWELL-LABEL: test_punpckhwd:
   6586 ; BROADWELL:       # %bb.0:
   6587 ; BROADWELL-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
   6588 ; BROADWELL-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
   6589 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6590 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6591 ;
   6592 ; SKYLAKE-LABEL: test_punpckhwd:
   6593 ; SKYLAKE:       # %bb.0:
   6594 ; SKYLAKE-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
   6595 ; SKYLAKE-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
   6596 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6597 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6598 ;
   6599 ; SKX-LABEL: test_punpckhwd:
   6600 ; SKX:       # %bb.0:
   6601 ; SKX-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
   6602 ; SKX-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
   6603 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6604 ; SKX-NEXT:    retq # sched: [7:1.00]
   6605 ;
   6606 ; BTVER2-LABEL: test_punpckhwd:
   6607 ; BTVER2:       # %bb.0:
   6608 ; BTVER2-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50]
   6609 ; BTVER2-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
   6610 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   6611 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6612 ;
   6613 ; ZNVER1-LABEL: test_punpckhwd:
   6614 ; ZNVER1:       # %bb.0:
   6615 ; ZNVER1-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.25]
   6616 ; ZNVER1-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [8:0.50]
   6617 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6618 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6619   %1 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %a0, x86_mmx %a1)
   6620   %2 = load x86_mmx, x86_mmx *%a2, align 8
   6621   %3 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %1, x86_mmx %2)
   6622   %4 = bitcast x86_mmx %3 to i64
   6623   ret i64 %4
   6624 }
   6625 declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
   6626 
   6627 define i64 @test_punpcklbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   6628 ; GENERIC-LABEL: test_punpcklbw:
   6629 ; GENERIC:       # %bb.0:
   6630 ; GENERIC-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
   6631 ; GENERIC-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
   6632 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6633 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6634 ;
   6635 ; ATOM-LABEL: test_punpcklbw:
   6636 ; ATOM:       # %bb.0:
   6637 ; ATOM-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
   6638 ; ATOM-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00]
   6639 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   6640 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6641 ;
   6642 ; SLM-LABEL: test_punpcklbw:
   6643 ; SLM:       # %bb.0:
   6644 ; SLM-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
   6645 ; SLM-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [4:1.00]
   6646 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   6647 ; SLM-NEXT:    retq # sched: [4:1.00]
   6648 ;
   6649 ; SANDY-LABEL: test_punpcklbw:
   6650 ; SANDY:       # %bb.0:
   6651 ; SANDY-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
   6652 ; SANDY-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
   6653 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6654 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6655 ;
   6656 ; HASWELL-LABEL: test_punpcklbw:
   6657 ; HASWELL:       # %bb.0:
   6658 ; HASWELL-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
   6659 ; HASWELL-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
   6660 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6661 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6662 ;
   6663 ; BROADWELL-LABEL: test_punpcklbw:
   6664 ; BROADWELL:       # %bb.0:
   6665 ; BROADWELL-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
   6666 ; BROADWELL-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
   6667 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6668 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6669 ;
   6670 ; SKYLAKE-LABEL: test_punpcklbw:
   6671 ; SKYLAKE:       # %bb.0:
   6672 ; SKYLAKE-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
   6673 ; SKYLAKE-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
   6674 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6675 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6676 ;
   6677 ; SKX-LABEL: test_punpcklbw:
   6678 ; SKX:       # %bb.0:
   6679 ; SKX-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
   6680 ; SKX-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
   6681 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6682 ; SKX-NEXT:    retq # sched: [7:1.00]
   6683 ;
   6684 ; BTVER2-LABEL: test_punpcklbw:
   6685 ; BTVER2:       # %bb.0:
   6686 ; BTVER2-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50]
   6687 ; BTVER2-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
   6688 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   6689 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6690 ;
   6691 ; ZNVER1-LABEL: test_punpcklbw:
   6692 ; ZNVER1:       # %bb.0:
   6693 ; ZNVER1-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.25]
   6694 ; ZNVER1-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [8:0.50]
   6695 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6696 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6697   %1 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %a0, x86_mmx %a1)
   6698   %2 = load x86_mmx, x86_mmx *%a2, align 8
   6699   %3 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %1, x86_mmx %2)
   6700   %4 = bitcast x86_mmx %3 to i64
   6701   ret i64 %4
   6702 }
   6703 declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
   6704 
   6705 define i64 @test_punpckldq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   6706 ; GENERIC-LABEL: test_punpckldq:
   6707 ; GENERIC:       # %bb.0:
   6708 ; GENERIC-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
   6709 ; GENERIC-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
   6710 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6711 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6712 ;
   6713 ; ATOM-LABEL: test_punpckldq:
   6714 ; ATOM:       # %bb.0:
   6715 ; ATOM-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
   6716 ; ATOM-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [1:1.00]
   6717 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   6718 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6719 ;
   6720 ; SLM-LABEL: test_punpckldq:
   6721 ; SLM:       # %bb.0:
   6722 ; SLM-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
   6723 ; SLM-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [4:1.00]
   6724 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   6725 ; SLM-NEXT:    retq # sched: [4:1.00]
   6726 ;
   6727 ; SANDY-LABEL: test_punpckldq:
   6728 ; SANDY:       # %bb.0:
   6729 ; SANDY-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
   6730 ; SANDY-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
   6731 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6732 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6733 ;
   6734 ; HASWELL-LABEL: test_punpckldq:
   6735 ; HASWELL:       # %bb.0:
   6736 ; HASWELL-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
   6737 ; HASWELL-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
   6738 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6739 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6740 ;
   6741 ; BROADWELL-LABEL: test_punpckldq:
   6742 ; BROADWELL:       # %bb.0:
   6743 ; BROADWELL-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
   6744 ; BROADWELL-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
   6745 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6746 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6747 ;
   6748 ; SKYLAKE-LABEL: test_punpckldq:
   6749 ; SKYLAKE:       # %bb.0:
   6750 ; SKYLAKE-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
   6751 ; SKYLAKE-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
   6752 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6753 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6754 ;
   6755 ; SKX-LABEL: test_punpckldq:
   6756 ; SKX:       # %bb.0:
   6757 ; SKX-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
   6758 ; SKX-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
   6759 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6760 ; SKX-NEXT:    retq # sched: [7:1.00]
   6761 ;
   6762 ; BTVER2-LABEL: test_punpckldq:
   6763 ; BTVER2:       # %bb.0:
   6764 ; BTVER2-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:0.50]
   6765 ; BTVER2-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
   6766 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   6767 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6768 ;
   6769 ; ZNVER1-LABEL: test_punpckldq:
   6770 ; ZNVER1:       # %bb.0:
   6771 ; ZNVER1-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:0.25]
   6772 ; ZNVER1-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [8:0.50]
   6773 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6774 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6775   %1 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %a0, x86_mmx %a1)
   6776   %2 = load x86_mmx, x86_mmx *%a2, align 8
   6777   %3 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %1, x86_mmx %2)
   6778   %4 = bitcast x86_mmx %3 to i64
   6779   ret i64 %4
   6780 }
   6781 declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
   6782 
   6783 define i64 @test_punpcklwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   6784 ; GENERIC-LABEL: test_punpcklwd:
   6785 ; GENERIC:       # %bb.0:
   6786 ; GENERIC-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
   6787 ; GENERIC-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
   6788 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6789 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6790 ;
   6791 ; ATOM-LABEL: test_punpcklwd:
   6792 ; ATOM:       # %bb.0:
   6793 ; ATOM-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
   6794 ; ATOM-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [1:1.00]
   6795 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   6796 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6797 ;
   6798 ; SLM-LABEL: test_punpcklwd:
   6799 ; SLM:       # %bb.0:
   6800 ; SLM-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
   6801 ; SLM-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [4:1.00]
   6802 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   6803 ; SLM-NEXT:    retq # sched: [4:1.00]
   6804 ;
   6805 ; SANDY-LABEL: test_punpcklwd:
   6806 ; SANDY:       # %bb.0:
   6807 ; SANDY-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
   6808 ; SANDY-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
   6809 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6810 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6811 ;
   6812 ; HASWELL-LABEL: test_punpcklwd:
   6813 ; HASWELL:       # %bb.0:
   6814 ; HASWELL-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
   6815 ; HASWELL-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
   6816 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6817 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6818 ;
   6819 ; BROADWELL-LABEL: test_punpcklwd:
   6820 ; BROADWELL:       # %bb.0:
   6821 ; BROADWELL-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
   6822 ; BROADWELL-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
   6823 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6824 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6825 ;
   6826 ; SKYLAKE-LABEL: test_punpcklwd:
   6827 ; SKYLAKE:       # %bb.0:
   6828 ; SKYLAKE-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
   6829 ; SKYLAKE-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
   6830 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6831 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6832 ;
   6833 ; SKX-LABEL: test_punpcklwd:
   6834 ; SKX:       # %bb.0:
   6835 ; SKX-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
   6836 ; SKX-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
   6837 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6838 ; SKX-NEXT:    retq # sched: [7:1.00]
   6839 ;
   6840 ; BTVER2-LABEL: test_punpcklwd:
   6841 ; BTVER2:       # %bb.0:
   6842 ; BTVER2-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:0.50]
   6843 ; BTVER2-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
   6844 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   6845 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6846 ;
   6847 ; ZNVER1-LABEL: test_punpcklwd:
   6848 ; ZNVER1:       # %bb.0:
   6849 ; ZNVER1-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:0.25]
   6850 ; ZNVER1-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [8:0.50]
   6851 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6852 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6853   %1 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %a0, x86_mmx %a1)
   6854   %2 = load x86_mmx, x86_mmx *%a2, align 8
   6855   %3 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %1, x86_mmx %2)
   6856   %4 = bitcast x86_mmx %3 to i64
   6857   ret i64 %4
   6858 }
   6859 declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
   6860 
   6861 define i64 @test_pxor(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
   6862 ; GENERIC-LABEL: test_pxor:
   6863 ; GENERIC:       # %bb.0:
   6864 ; GENERIC-NEXT:    pxor %mm1, %mm0 # sched: [1:0.33]
   6865 ; GENERIC-NEXT:    pxor (%rdi), %mm0 # sched: [6:0.50]
   6866 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6867 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6868 ;
   6869 ; ATOM-LABEL: test_pxor:
   6870 ; ATOM:       # %bb.0:
   6871 ; ATOM-NEXT:    pxor %mm1, %mm0 # sched: [1:0.50]
   6872 ; ATOM-NEXT:    pxor (%rdi), %mm0 # sched: [1:1.00]
   6873 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
   6874 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6875 ;
   6876 ; SLM-LABEL: test_pxor:
   6877 ; SLM:       # %bb.0:
   6878 ; SLM-NEXT:    pxor %mm1, %mm0 # sched: [1:0.50]
   6879 ; SLM-NEXT:    pxor (%rdi), %mm0 # sched: [4:1.00]
   6880 ; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
   6881 ; SLM-NEXT:    retq # sched: [4:1.00]
   6882 ;
   6883 ; SANDY-LABEL: test_pxor:
   6884 ; SANDY:       # %bb.0:
   6885 ; SANDY-NEXT:    pxor %mm1, %mm0 # sched: [1:0.33]
   6886 ; SANDY-NEXT:    pxor (%rdi), %mm0 # sched: [6:0.50]
   6887 ; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6888 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6889 ;
   6890 ; HASWELL-LABEL: test_pxor:
   6891 ; HASWELL:       # %bb.0:
   6892 ; HASWELL-NEXT:    pxor %mm1, %mm0 # sched: [1:0.33]
   6893 ; HASWELL-NEXT:    pxor (%rdi), %mm0 # sched: [6:0.50]
   6894 ; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6895 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6896 ;
   6897 ; BROADWELL-LABEL: test_pxor:
   6898 ; BROADWELL:       # %bb.0:
   6899 ; BROADWELL-NEXT:    pxor %mm1, %mm0 # sched: [1:0.33]
   6900 ; BROADWELL-NEXT:    pxor (%rdi), %mm0 # sched: [6:0.50]
   6901 ; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
   6902 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6903 ;
   6904 ; SKYLAKE-LABEL: test_pxor:
   6905 ; SKYLAKE:       # %bb.0:
   6906 ; SKYLAKE-NEXT:    pxor %mm1, %mm0 # sched: [1:0.50]
   6907 ; SKYLAKE-NEXT:    pxor (%rdi), %mm0 # sched: [6:0.50]
   6908 ; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6909 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6910 ;
   6911 ; SKX-LABEL: test_pxor:
   6912 ; SKX:       # %bb.0:
   6913 ; SKX-NEXT:    pxor %mm1, %mm0 # sched: [1:0.50]
   6914 ; SKX-NEXT:    pxor (%rdi), %mm0 # sched: [6:0.50]
   6915 ; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6916 ; SKX-NEXT:    retq # sched: [7:1.00]
   6917 ;
   6918 ; BTVER2-LABEL: test_pxor:
   6919 ; BTVER2:       # %bb.0:
   6920 ; BTVER2-NEXT:    pxor %mm1, %mm0 # sched: [1:0.50]
   6921 ; BTVER2-NEXT:    pxor (%rdi), %mm0 # sched: [6:1.00]
   6922 ; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
   6923 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6924 ;
   6925 ; ZNVER1-LABEL: test_pxor:
   6926 ; ZNVER1:       # %bb.0:
   6927 ; ZNVER1-NEXT:    pxor %mm1, %mm0 # sched: [1:0.25]
   6928 ; ZNVER1-NEXT:    pxor (%rdi), %mm0 # sched: [8:0.50]
   6929 ; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
   6930 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6931   %1 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %a0, x86_mmx %a1)
   6932   %2 = load x86_mmx, x86_mmx *%a2, align 8
   6933   %3 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %1, x86_mmx %2)
   6934   %4 = bitcast x86_mmx %3 to i64
   6935   ret i64 %4
   6936 }
   6937 declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
   6938