Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,GENERIC
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefixes=CHECK,ATOM
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SLM
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,SANDY
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,SANDY
      9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
     10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,HASWELL
     11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
     12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,BROADWELL
     13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
     14 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,SKYLAKE
     15 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
     16 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,SKX
     17 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
     18 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,BTVER2
     19 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
     20 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,ZNVER1
     21 
     22 define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) {
     23 ; GENERIC-LABEL: test_pabsb:
     24 ; GENERIC:       # %bb.0:
     25 ; GENERIC-NEXT:    pabsb %xmm0, %xmm1 # sched: [1:0.50]
     26 ; GENERIC-NEXT:    pabsb (%rdi), %xmm0 # sched: [7:0.50]
     27 ; GENERIC-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
     28 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     29 ;
     30 ; ATOM-LABEL: test_pabsb:
     31 ; ATOM:       # %bb.0:
     32 ; ATOM-NEXT:    pabsb %xmm0, %xmm1 # sched: [1:0.50]
     33 ; ATOM-NEXT:    pabsb (%rdi), %xmm0 # sched: [1:1.00]
     34 ; ATOM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
     35 ; ATOM-NEXT:    nop # sched: [1:0.50]
     36 ; ATOM-NEXT:    nop # sched: [1:0.50]
     37 ; ATOM-NEXT:    retq # sched: [79:39.50]
     38 ;
     39 ; SLM-LABEL: test_pabsb:
     40 ; SLM:       # %bb.0:
     41 ; SLM-NEXT:    pabsb %xmm0, %xmm1 # sched: [1:0.50]
     42 ; SLM-NEXT:    pabsb (%rdi), %xmm0 # sched: [4:1.00]
     43 ; SLM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
     44 ; SLM-NEXT:    retq # sched: [4:1.00]
     45 ;
     46 ; SANDY-SSE-LABEL: test_pabsb:
     47 ; SANDY-SSE:       # %bb.0:
     48 ; SANDY-SSE-NEXT:    pabsb %xmm0, %xmm1 # sched: [1:0.50]
     49 ; SANDY-SSE-NEXT:    pabsb (%rdi), %xmm0 # sched: [7:0.50]
     50 ; SANDY-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
     51 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
     52 ;
     53 ; SANDY-LABEL: test_pabsb:
     54 ; SANDY:       # %bb.0:
     55 ; SANDY-NEXT:    vpabsb %xmm0, %xmm0 # sched: [1:0.50]
     56 ; SANDY-NEXT:    vpabsb (%rdi), %xmm1 # sched: [7:0.50]
     57 ; SANDY-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
     58 ; SANDY-NEXT:    retq # sched: [1:1.00]
     59 ;
     60 ; HASWELL-SSE-LABEL: test_pabsb:
     61 ; HASWELL-SSE:       # %bb.0:
     62 ; HASWELL-SSE-NEXT:    pabsb %xmm0, %xmm1 # sched: [1:0.50]
     63 ; HASWELL-SSE-NEXT:    pabsb (%rdi), %xmm0 # sched: [7:0.50]
     64 ; HASWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
     65 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
     66 ;
     67 ; HASWELL-LABEL: test_pabsb:
     68 ; HASWELL:       # %bb.0:
     69 ; HASWELL-NEXT:    vpabsb %xmm0, %xmm0 # sched: [1:0.50]
     70 ; HASWELL-NEXT:    vpabsb (%rdi), %xmm1 # sched: [7:0.50]
     71 ; HASWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
     72 ; HASWELL-NEXT:    retq # sched: [7:1.00]
     73 ;
     74 ; BROADWELL-SSE-LABEL: test_pabsb:
     75 ; BROADWELL-SSE:       # %bb.0:
     76 ; BROADWELL-SSE-NEXT:    pabsb %xmm0, %xmm1 # sched: [1:0.50]
     77 ; BROADWELL-SSE-NEXT:    pabsb (%rdi), %xmm0 # sched: [6:0.50]
     78 ; BROADWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
     79 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
     80 ;
     81 ; BROADWELL-LABEL: test_pabsb:
     82 ; BROADWELL:       # %bb.0:
     83 ; BROADWELL-NEXT:    vpabsb %xmm0, %xmm0 # sched: [1:0.50]
     84 ; BROADWELL-NEXT:    vpabsb (%rdi), %xmm1 # sched: [6:0.50]
     85 ; BROADWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
     86 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
     87 ;
     88 ; SKYLAKE-SSE-LABEL: test_pabsb:
     89 ; SKYLAKE-SSE:       # %bb.0:
     90 ; SKYLAKE-SSE-NEXT:    pabsb %xmm0, %xmm1 # sched: [1:0.50]
     91 ; SKYLAKE-SSE-NEXT:    pabsb (%rdi), %xmm0 # sched: [7:0.50]
     92 ; SKYLAKE-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
     93 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
     94 ;
     95 ; SKYLAKE-LABEL: test_pabsb:
     96 ; SKYLAKE:       # %bb.0:
     97 ; SKYLAKE-NEXT:    vpabsb %xmm0, %xmm0 # sched: [1:0.50]
     98 ; SKYLAKE-NEXT:    vpabsb (%rdi), %xmm1 # sched: [7:0.50]
     99 ; SKYLAKE-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
    100 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    101 ;
    102 ; SKX-SSE-LABEL: test_pabsb:
    103 ; SKX-SSE:       # %bb.0:
    104 ; SKX-SSE-NEXT:    pabsb %xmm0, %xmm1 # sched: [1:0.50]
    105 ; SKX-SSE-NEXT:    pabsb (%rdi), %xmm0 # sched: [7:0.50]
    106 ; SKX-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
    107 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    108 ;
    109 ; SKX-LABEL: test_pabsb:
    110 ; SKX:       # %bb.0:
    111 ; SKX-NEXT:    vpabsb %xmm0, %xmm0 # sched: [1:0.50]
    112 ; SKX-NEXT:    vpabsb (%rdi), %xmm1 # sched: [7:0.50]
    113 ; SKX-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
    114 ; SKX-NEXT:    retq # sched: [7:1.00]
    115 ;
    116 ; BTVER2-SSE-LABEL: test_pabsb:
    117 ; BTVER2-SSE:       # %bb.0:
    118 ; BTVER2-SSE-NEXT:    pabsb %xmm0, %xmm1 # sched: [1:0.50]
    119 ; BTVER2-SSE-NEXT:    pabsb (%rdi), %xmm0 # sched: [6:1.00]
    120 ; BTVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
    121 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    122 ;
    123 ; BTVER2-LABEL: test_pabsb:
    124 ; BTVER2:       # %bb.0:
    125 ; BTVER2-NEXT:    vpabsb (%rdi), %xmm1 # sched: [6:1.00]
    126 ; BTVER2-NEXT:    vpabsb %xmm0, %xmm0 # sched: [1:0.50]
    127 ; BTVER2-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
    128 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    129 ;
    130 ; ZNVER1-SSE-LABEL: test_pabsb:
    131 ; ZNVER1-SSE:       # %bb.0:
    132 ; ZNVER1-SSE-NEXT:    pabsb %xmm0, %xmm1 # sched: [1:0.25]
    133 ; ZNVER1-SSE-NEXT:    pabsb (%rdi), %xmm0 # sched: [8:0.50]
    134 ; ZNVER1-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.25]
    135 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    136 ;
    137 ; ZNVER1-LABEL: test_pabsb:
    138 ; ZNVER1:       # %bb.0:
    139 ; ZNVER1-NEXT:    vpabsb (%rdi), %xmm1 # sched: [8:0.50]
    140 ; ZNVER1-NEXT:    vpabsb %xmm0, %xmm0 # sched: [1:0.25]
    141 ; ZNVER1-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
    142 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    143   %1 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0)
    144   %2 = load <16 x i8>, <16 x i8> *%a1, align 16
    145   %3 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %2)
    146   %4 = or <16 x i8> %1, %3
    147   ret <16 x i8> %4
    148 }
    149 declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
    150 
    151 define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) {
    152 ; GENERIC-LABEL: test_pabsd:
    153 ; GENERIC:       # %bb.0:
    154 ; GENERIC-NEXT:    pabsd %xmm0, %xmm1 # sched: [1:0.50]
    155 ; GENERIC-NEXT:    pabsd (%rdi), %xmm0 # sched: [7:0.50]
    156 ; GENERIC-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
    157 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    158 ;
    159 ; ATOM-LABEL: test_pabsd:
    160 ; ATOM:       # %bb.0:
    161 ; ATOM-NEXT:    pabsd %xmm0, %xmm1 # sched: [1:0.50]
    162 ; ATOM-NEXT:    pabsd (%rdi), %xmm0 # sched: [1:1.00]
    163 ; ATOM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
    164 ; ATOM-NEXT:    nop # sched: [1:0.50]
    165 ; ATOM-NEXT:    nop # sched: [1:0.50]
    166 ; ATOM-NEXT:    retq # sched: [79:39.50]
    167 ;
    168 ; SLM-LABEL: test_pabsd:
    169 ; SLM:       # %bb.0:
    170 ; SLM-NEXT:    pabsd %xmm0, %xmm1 # sched: [1:0.50]
    171 ; SLM-NEXT:    pabsd (%rdi), %xmm0 # sched: [4:1.00]
    172 ; SLM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
    173 ; SLM-NEXT:    retq # sched: [4:1.00]
    174 ;
    175 ; SANDY-SSE-LABEL: test_pabsd:
    176 ; SANDY-SSE:       # %bb.0:
    177 ; SANDY-SSE-NEXT:    pabsd %xmm0, %xmm1 # sched: [1:0.50]
    178 ; SANDY-SSE-NEXT:    pabsd (%rdi), %xmm0 # sched: [7:0.50]
    179 ; SANDY-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
    180 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    181 ;
    182 ; SANDY-LABEL: test_pabsd:
    183 ; SANDY:       # %bb.0:
    184 ; SANDY-NEXT:    vpabsd %xmm0, %xmm0 # sched: [1:0.50]
    185 ; SANDY-NEXT:    vpabsd (%rdi), %xmm1 # sched: [7:0.50]
    186 ; SANDY-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
    187 ; SANDY-NEXT:    retq # sched: [1:1.00]
    188 ;
    189 ; HASWELL-SSE-LABEL: test_pabsd:
    190 ; HASWELL-SSE:       # %bb.0:
    191 ; HASWELL-SSE-NEXT:    pabsd %xmm0, %xmm1 # sched: [1:0.50]
    192 ; HASWELL-SSE-NEXT:    pabsd (%rdi), %xmm0 # sched: [7:0.50]
    193 ; HASWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
    194 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    195 ;
    196 ; HASWELL-LABEL: test_pabsd:
    197 ; HASWELL:       # %bb.0:
    198 ; HASWELL-NEXT:    vpabsd %xmm0, %xmm0 # sched: [1:0.50]
    199 ; HASWELL-NEXT:    vpabsd (%rdi), %xmm1 # sched: [7:0.50]
    200 ; HASWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
    201 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    202 ;
    203 ; BROADWELL-SSE-LABEL: test_pabsd:
    204 ; BROADWELL-SSE:       # %bb.0:
    205 ; BROADWELL-SSE-NEXT:    pabsd %xmm0, %xmm1 # sched: [1:0.50]
    206 ; BROADWELL-SSE-NEXT:    pabsd (%rdi), %xmm0 # sched: [6:0.50]
    207 ; BROADWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
    208 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    209 ;
    210 ; BROADWELL-LABEL: test_pabsd:
    211 ; BROADWELL:       # %bb.0:
    212 ; BROADWELL-NEXT:    vpabsd %xmm0, %xmm0 # sched: [1:0.50]
    213 ; BROADWELL-NEXT:    vpabsd (%rdi), %xmm1 # sched: [6:0.50]
    214 ; BROADWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
    215 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    216 ;
    217 ; SKYLAKE-SSE-LABEL: test_pabsd:
    218 ; SKYLAKE-SSE:       # %bb.0:
    219 ; SKYLAKE-SSE-NEXT:    pabsd %xmm0, %xmm1 # sched: [1:0.50]
    220 ; SKYLAKE-SSE-NEXT:    pabsd (%rdi), %xmm0 # sched: [7:0.50]
    221 ; SKYLAKE-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
    222 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    223 ;
    224 ; SKYLAKE-LABEL: test_pabsd:
    225 ; SKYLAKE:       # %bb.0:
    226 ; SKYLAKE-NEXT:    vpabsd %xmm0, %xmm0 # sched: [1:0.50]
    227 ; SKYLAKE-NEXT:    vpabsd (%rdi), %xmm1 # sched: [7:0.50]
    228 ; SKYLAKE-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
    229 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    230 ;
    231 ; SKX-SSE-LABEL: test_pabsd:
    232 ; SKX-SSE:       # %bb.0:
    233 ; SKX-SSE-NEXT:    pabsd %xmm0, %xmm1 # sched: [1:0.50]
    234 ; SKX-SSE-NEXT:    pabsd (%rdi), %xmm0 # sched: [7:0.50]
    235 ; SKX-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
    236 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    237 ;
    238 ; SKX-LABEL: test_pabsd:
    239 ; SKX:       # %bb.0:
    240 ; SKX-NEXT:    vpabsd %xmm0, %xmm0 # sched: [1:0.50]
    241 ; SKX-NEXT:    vpabsd (%rdi), %xmm1 # sched: [7:0.50]
    242 ; SKX-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
    243 ; SKX-NEXT:    retq # sched: [7:1.00]
    244 ;
    245 ; BTVER2-SSE-LABEL: test_pabsd:
    246 ; BTVER2-SSE:       # %bb.0:
    247 ; BTVER2-SSE-NEXT:    pabsd %xmm0, %xmm1 # sched: [1:0.50]
    248 ; BTVER2-SSE-NEXT:    pabsd (%rdi), %xmm0 # sched: [6:1.00]
    249 ; BTVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
    250 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    251 ;
    252 ; BTVER2-LABEL: test_pabsd:
    253 ; BTVER2:       # %bb.0:
    254 ; BTVER2-NEXT:    vpabsd (%rdi), %xmm1 # sched: [6:1.00]
    255 ; BTVER2-NEXT:    vpabsd %xmm0, %xmm0 # sched: [1:0.50]
    256 ; BTVER2-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
    257 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    258 ;
    259 ; ZNVER1-SSE-LABEL: test_pabsd:
    260 ; ZNVER1-SSE:       # %bb.0:
    261 ; ZNVER1-SSE-NEXT:    pabsd %xmm0, %xmm1 # sched: [1:0.25]
    262 ; ZNVER1-SSE-NEXT:    pabsd (%rdi), %xmm0 # sched: [8:0.50]
    263 ; ZNVER1-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.25]
    264 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    265 ;
    266 ; ZNVER1-LABEL: test_pabsd:
    267 ; ZNVER1:       # %bb.0:
    268 ; ZNVER1-NEXT:    vpabsd (%rdi), %xmm1 # sched: [8:0.50]
    269 ; ZNVER1-NEXT:    vpabsd %xmm0, %xmm0 # sched: [1:0.25]
    270 ; ZNVER1-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
    271 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    272   %1 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0)
    273   %2 = load <4 x i32>, <4 x i32> *%a1, align 16
    274   %3 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %2)
    275   %4 = or <4 x i32> %1, %3
    276   ret <4 x i32> %4
    277 }
    278 declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
    279 
    280 define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) {
    281 ; GENERIC-LABEL: test_pabsw:
    282 ; GENERIC:       # %bb.0:
    283 ; GENERIC-NEXT:    pabsw %xmm0, %xmm1 # sched: [1:0.50]
    284 ; GENERIC-NEXT:    pabsw (%rdi), %xmm0 # sched: [7:0.50]
    285 ; GENERIC-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
    286 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    287 ;
    288 ; ATOM-LABEL: test_pabsw:
    289 ; ATOM:       # %bb.0:
    290 ; ATOM-NEXT:    pabsw %xmm0, %xmm1 # sched: [1:0.50]
    291 ; ATOM-NEXT:    pabsw (%rdi), %xmm0 # sched: [1:1.00]
    292 ; ATOM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
    293 ; ATOM-NEXT:    nop # sched: [1:0.50]
    294 ; ATOM-NEXT:    nop # sched: [1:0.50]
    295 ; ATOM-NEXT:    retq # sched: [79:39.50]
    296 ;
    297 ; SLM-LABEL: test_pabsw:
    298 ; SLM:       # %bb.0:
    299 ; SLM-NEXT:    pabsw %xmm0, %xmm1 # sched: [1:0.50]
    300 ; SLM-NEXT:    pabsw (%rdi), %xmm0 # sched: [4:1.00]
    301 ; SLM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
    302 ; SLM-NEXT:    retq # sched: [4:1.00]
    303 ;
    304 ; SANDY-SSE-LABEL: test_pabsw:
    305 ; SANDY-SSE:       # %bb.0:
    306 ; SANDY-SSE-NEXT:    pabsw %xmm0, %xmm1 # sched: [1:0.50]
    307 ; SANDY-SSE-NEXT:    pabsw (%rdi), %xmm0 # sched: [7:0.50]
    308 ; SANDY-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
    309 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    310 ;
    311 ; SANDY-LABEL: test_pabsw:
    312 ; SANDY:       # %bb.0:
    313 ; SANDY-NEXT:    vpabsw %xmm0, %xmm0 # sched: [1:0.50]
    314 ; SANDY-NEXT:    vpabsw (%rdi), %xmm1 # sched: [7:0.50]
    315 ; SANDY-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
    316 ; SANDY-NEXT:    retq # sched: [1:1.00]
    317 ;
    318 ; HASWELL-SSE-LABEL: test_pabsw:
    319 ; HASWELL-SSE:       # %bb.0:
    320 ; HASWELL-SSE-NEXT:    pabsw %xmm0, %xmm1 # sched: [1:0.50]
    321 ; HASWELL-SSE-NEXT:    pabsw (%rdi), %xmm0 # sched: [7:0.50]
    322 ; HASWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
    323 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    324 ;
    325 ; HASWELL-LABEL: test_pabsw:
    326 ; HASWELL:       # %bb.0:
    327 ; HASWELL-NEXT:    vpabsw %xmm0, %xmm0 # sched: [1:0.50]
    328 ; HASWELL-NEXT:    vpabsw (%rdi), %xmm1 # sched: [7:0.50]
    329 ; HASWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
    330 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    331 ;
    332 ; BROADWELL-SSE-LABEL: test_pabsw:
    333 ; BROADWELL-SSE:       # %bb.0:
    334 ; BROADWELL-SSE-NEXT:    pabsw %xmm0, %xmm1 # sched: [1:0.50]
    335 ; BROADWELL-SSE-NEXT:    pabsw (%rdi), %xmm0 # sched: [6:0.50]
    336 ; BROADWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
    337 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    338 ;
    339 ; BROADWELL-LABEL: test_pabsw:
    340 ; BROADWELL:       # %bb.0:
    341 ; BROADWELL-NEXT:    vpabsw %xmm0, %xmm0 # sched: [1:0.50]
    342 ; BROADWELL-NEXT:    vpabsw (%rdi), %xmm1 # sched: [6:0.50]
    343 ; BROADWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
    344 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    345 ;
    346 ; SKYLAKE-SSE-LABEL: test_pabsw:
    347 ; SKYLAKE-SSE:       # %bb.0:
    348 ; SKYLAKE-SSE-NEXT:    pabsw %xmm0, %xmm1 # sched: [1:0.50]
    349 ; SKYLAKE-SSE-NEXT:    pabsw (%rdi), %xmm0 # sched: [7:0.50]
    350 ; SKYLAKE-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
    351 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    352 ;
    353 ; SKYLAKE-LABEL: test_pabsw:
    354 ; SKYLAKE:       # %bb.0:
    355 ; SKYLAKE-NEXT:    vpabsw %xmm0, %xmm0 # sched: [1:0.50]
    356 ; SKYLAKE-NEXT:    vpabsw (%rdi), %xmm1 # sched: [7:0.50]
    357 ; SKYLAKE-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
    358 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    359 ;
    360 ; SKX-SSE-LABEL: test_pabsw:
    361 ; SKX-SSE:       # %bb.0:
    362 ; SKX-SSE-NEXT:    pabsw %xmm0, %xmm1 # sched: [1:0.50]
    363 ; SKX-SSE-NEXT:    pabsw (%rdi), %xmm0 # sched: [7:0.50]
    364 ; SKX-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
    365 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    366 ;
    367 ; SKX-LABEL: test_pabsw:
    368 ; SKX:       # %bb.0:
    369 ; SKX-NEXT:    vpabsw %xmm0, %xmm0 # sched: [1:0.50]
    370 ; SKX-NEXT:    vpabsw (%rdi), %xmm1 # sched: [7:0.50]
    371 ; SKX-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
    372 ; SKX-NEXT:    retq # sched: [7:1.00]
    373 ;
    374 ; BTVER2-SSE-LABEL: test_pabsw:
    375 ; BTVER2-SSE:       # %bb.0:
    376 ; BTVER2-SSE-NEXT:    pabsw %xmm0, %xmm1 # sched: [1:0.50]
    377 ; BTVER2-SSE-NEXT:    pabsw (%rdi), %xmm0 # sched: [6:1.00]
    378 ; BTVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
    379 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    380 ;
    381 ; BTVER2-LABEL: test_pabsw:
    382 ; BTVER2:       # %bb.0:
    383 ; BTVER2-NEXT:    vpabsw (%rdi), %xmm1 # sched: [6:1.00]
    384 ; BTVER2-NEXT:    vpabsw %xmm0, %xmm0 # sched: [1:0.50]
    385 ; BTVER2-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
    386 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    387 ;
    388 ; ZNVER1-SSE-LABEL: test_pabsw:
    389 ; ZNVER1-SSE:       # %bb.0:
    390 ; ZNVER1-SSE-NEXT:    pabsw %xmm0, %xmm1 # sched: [1:0.25]
    391 ; ZNVER1-SSE-NEXT:    pabsw (%rdi), %xmm0 # sched: [8:0.50]
    392 ; ZNVER1-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.25]
    393 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    394 ;
    395 ; ZNVER1-LABEL: test_pabsw:
    396 ; ZNVER1:       # %bb.0:
    397 ; ZNVER1-NEXT:    vpabsw (%rdi), %xmm1 # sched: [8:0.50]
    398 ; ZNVER1-NEXT:    vpabsw %xmm0, %xmm0 # sched: [1:0.25]
    399 ; ZNVER1-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
    400 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    401   %1 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0)
    402   %2 = load <8 x i16>, <8 x i16> *%a1, align 16
    403   %3 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %2)
    404   %4 = or <8 x i16> %1, %3
    405   ret <8 x i16> %4
    406 }
    407 declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
    408 
    409 define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
    410 ; GENERIC-LABEL: test_palignr:
    411 ; GENERIC:       # %bb.0:
    412 ; GENERIC-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
    413 ; GENERIC-NEXT:    palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
    414 ; GENERIC-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
    415 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    416 ;
    417 ; ATOM-LABEL: test_palignr:
    418 ; ATOM:       # %bb.0:
    419 ; ATOM-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
    420 ; ATOM-NEXT:    palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00]
    421 ; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
    422 ; ATOM-NEXT:    nop # sched: [1:0.50]
    423 ; ATOM-NEXT:    nop # sched: [1:0.50]
    424 ; ATOM-NEXT:    retq # sched: [79:39.50]
    425 ;
    426 ; SLM-LABEL: test_palignr:
    427 ; SLM:       # %bb.0:
    428 ; SLM-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
    429 ; SLM-NEXT:    palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [4:1.00]
    430 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
    431 ; SLM-NEXT:    retq # sched: [4:1.00]
    432 ;
    433 ; SANDY-SSE-LABEL: test_palignr:
    434 ; SANDY-SSE:       # %bb.0:
    435 ; SANDY-SSE-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
    436 ; SANDY-SSE-NEXT:    palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
    437 ; SANDY-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
    438 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    439 ;
    440 ; SANDY-LABEL: test_palignr:
    441 ; SANDY:       # %bb.0:
    442 ; SANDY-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
    443 ; SANDY-NEXT:    vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
    444 ; SANDY-NEXT:    retq # sched: [1:1.00]
    445 ;
    446 ; HASWELL-SSE-LABEL: test_palignr:
    447 ; HASWELL-SSE:       # %bb.0:
    448 ; HASWELL-SSE-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
    449 ; HASWELL-SSE-NEXT:    palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
    450 ; HASWELL-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
    451 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    452 ;
    453 ; HASWELL-LABEL: test_palignr:
    454 ; HASWELL:       # %bb.0:
    455 ; HASWELL-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
    456 ; HASWELL-NEXT:    vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
    457 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    458 ;
    459 ; BROADWELL-SSE-LABEL: test_palignr:
    460 ; BROADWELL-SSE:       # %bb.0:
    461 ; BROADWELL-SSE-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
    462 ; BROADWELL-SSE-NEXT:    palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00]
    463 ; BROADWELL-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
    464 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    465 ;
    466 ; BROADWELL-LABEL: test_palignr:
    467 ; BROADWELL:       # %bb.0:
    468 ; BROADWELL-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
    469 ; BROADWELL-NEXT:    vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00]
    470 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    471 ;
    472 ; SKYLAKE-SSE-LABEL: test_palignr:
    473 ; SKYLAKE-SSE:       # %bb.0:
    474 ; SKYLAKE-SSE-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
    475 ; SKYLAKE-SSE-NEXT:    palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
    476 ; SKYLAKE-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
    477 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    478 ;
    479 ; SKYLAKE-LABEL: test_palignr:
    480 ; SKYLAKE:       # %bb.0:
    481 ; SKYLAKE-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
    482 ; SKYLAKE-NEXT:    vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
    483 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    484 ;
    485 ; SKX-SSE-LABEL: test_palignr:
    486 ; SKX-SSE:       # %bb.0:
    487 ; SKX-SSE-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
    488 ; SKX-SSE-NEXT:    palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
    489 ; SKX-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
    490 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    491 ;
    492 ; SKX-LABEL: test_palignr:
    493 ; SKX:       # %bb.0:
    494 ; SKX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
    495 ; SKX-NEXT:    vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
    496 ; SKX-NEXT:    retq # sched: [7:1.00]
    497 ;
    498 ; BTVER2-SSE-LABEL: test_palignr:
    499 ; BTVER2-SSE:       # %bb.0:
    500 ; BTVER2-SSE-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
    501 ; BTVER2-SSE-NEXT:    palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00]
    502 ; BTVER2-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
    503 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    504 ;
    505 ; BTVER2-LABEL: test_palignr:
    506 ; BTVER2:       # %bb.0:
    507 ; BTVER2-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
    508 ; BTVER2-NEXT:    vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00]
    509 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    510 ;
    511 ; ZNVER1-SSE-LABEL: test_palignr:
    512 ; ZNVER1-SSE:       # %bb.0:
    513 ; ZNVER1-SSE-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.25]
    514 ; ZNVER1-SSE-NEXT:    palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [8:0.50]
    515 ; ZNVER1-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.25]
    516 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    517 ;
    518 ; ZNVER1-LABEL: test_palignr:
    519 ; ZNVER1:       # %bb.0:
    520 ; ZNVER1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.25]
    521 ; ZNVER1-NEXT:    vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [8:0.50]
    522 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    523   %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
    524   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
    525   %3 = shufflevector <8 x i16> %2, <8 x i16> %1, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
    526   ret <8 x i16> %3
    527 }
    528 
    529 define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
    530 ; GENERIC-LABEL: test_phaddd:
    531 ; GENERIC:       # %bb.0:
    532 ; GENERIC-NEXT:    phaddd %xmm1, %xmm0 # sched: [3:1.50]
    533 ; GENERIC-NEXT:    phaddd (%rdi), %xmm0 # sched: [9:1.50]
    534 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    535 ;
    536 ; ATOM-LABEL: test_phaddd:
    537 ; ATOM:       # %bb.0:
    538 ; ATOM-NEXT:    phaddd %xmm1, %xmm0 # sched: [3:1.50]
    539 ; ATOM-NEXT:    phaddd (%rdi), %xmm0 # sched: [4:2.00]
    540 ; ATOM-NEXT:    retq # sched: [79:39.50]
    541 ;
    542 ; SLM-LABEL: test_phaddd:
    543 ; SLM:       # %bb.0:
    544 ; SLM-NEXT:    phaddd %xmm1, %xmm0 # sched: [1:0.50]
    545 ; SLM-NEXT:    phaddd (%rdi), %xmm0 # sched: [4:1.00]
    546 ; SLM-NEXT:    retq # sched: [4:1.00]
    547 ;
    548 ; SANDY-SSE-LABEL: test_phaddd:
    549 ; SANDY-SSE:       # %bb.0:
    550 ; SANDY-SSE-NEXT:    phaddd %xmm1, %xmm0 # sched: [3:1.50]
    551 ; SANDY-SSE-NEXT:    phaddd (%rdi), %xmm0 # sched: [9:1.50]
    552 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    553 ;
    554 ; SANDY-LABEL: test_phaddd:
    555 ; SANDY:       # %bb.0:
    556 ; SANDY-NEXT:    vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
    557 ; SANDY-NEXT:    vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
    558 ; SANDY-NEXT:    retq # sched: [1:1.00]
    559 ;
    560 ; HASWELL-SSE-LABEL: test_phaddd:
    561 ; HASWELL-SSE:       # %bb.0:
    562 ; HASWELL-SSE-NEXT:    phaddd %xmm1, %xmm0 # sched: [3:2.00]
    563 ; HASWELL-SSE-NEXT:    phaddd (%rdi), %xmm0 # sched: [9:2.00]
    564 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    565 ;
    566 ; HASWELL-LABEL: test_phaddd:
    567 ; HASWELL:       # %bb.0:
    568 ; HASWELL-NEXT:    vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
    569 ; HASWELL-NEXT:    vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
    570 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    571 ;
    572 ; BROADWELL-SSE-LABEL: test_phaddd:
    573 ; BROADWELL-SSE:       # %bb.0:
    574 ; BROADWELL-SSE-NEXT:    phaddd %xmm1, %xmm0 # sched: [3:2.00]
    575 ; BROADWELL-SSE-NEXT:    phaddd (%rdi), %xmm0 # sched: [8:2.00]
    576 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    577 ;
    578 ; BROADWELL-LABEL: test_phaddd:
    579 ; BROADWELL:       # %bb.0:
    580 ; BROADWELL-NEXT:    vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
    581 ; BROADWELL-NEXT:    vphaddd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
    582 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    583 ;
    584 ; SKYLAKE-SSE-LABEL: test_phaddd:
    585 ; SKYLAKE-SSE:       # %bb.0:
    586 ; SKYLAKE-SSE-NEXT:    phaddd %xmm1, %xmm0 # sched: [3:2.00]
    587 ; SKYLAKE-SSE-NEXT:    phaddd (%rdi), %xmm0 # sched: [9:2.00]
    588 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    589 ;
    590 ; SKYLAKE-LABEL: test_phaddd:
    591 ; SKYLAKE:       # %bb.0:
    592 ; SKYLAKE-NEXT:    vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
    593 ; SKYLAKE-NEXT:    vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
    594 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    595 ;
    596 ; SKX-SSE-LABEL: test_phaddd:
    597 ; SKX-SSE:       # %bb.0:
    598 ; SKX-SSE-NEXT:    phaddd %xmm1, %xmm0 # sched: [3:2.00]
    599 ; SKX-SSE-NEXT:    phaddd (%rdi), %xmm0 # sched: [9:2.00]
    600 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    601 ;
    602 ; SKX-LABEL: test_phaddd:
    603 ; SKX:       # %bb.0:
    604 ; SKX-NEXT:    vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
    605 ; SKX-NEXT:    vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
    606 ; SKX-NEXT:    retq # sched: [7:1.00]
    607 ;
    608 ; BTVER2-SSE-LABEL: test_phaddd:
    609 ; BTVER2-SSE:       # %bb.0:
    610 ; BTVER2-SSE-NEXT:    phaddd %xmm1, %xmm0 # sched: [1:0.50]
    611 ; BTVER2-SSE-NEXT:    phaddd (%rdi), %xmm0 # sched: [6:1.00]
    612 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    613 ;
    614 ; BTVER2-LABEL: test_phaddd:
    615 ; BTVER2:       # %bb.0:
    616 ; BTVER2-NEXT:    vphaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
    617 ; BTVER2-NEXT:    vphaddd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
    618 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    619 ;
    620 ; ZNVER1-SSE-LABEL: test_phaddd:
    621 ; ZNVER1-SSE:       # %bb.0:
    622 ; ZNVER1-SSE-NEXT:    phaddd %xmm1, %xmm0 # sched: [100:0.25]
    623 ; ZNVER1-SSE-NEXT:    phaddd (%rdi), %xmm0 # sched: [100:0.25]
    624 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    625 ;
    626 ; ZNVER1-LABEL: test_phaddd:
    627 ; ZNVER1:       # %bb.0:
    628 ; ZNVER1-NEXT:    vphaddd %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
    629 ; ZNVER1-NEXT:    vphaddd (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
    630 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    631   %1 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1)
    632   %2 = load <4 x i32>, <4 x i32> *%a2, align 16
    633   %3 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %1, <4 x i32> %2)
    634   ret <4 x i32> %3
    635 }
    636 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
    637 
    638 define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
    639 ; GENERIC-LABEL: test_phaddsw:
    640 ; GENERIC:       # %bb.0:
    641 ; GENERIC-NEXT:    phaddsw %xmm1, %xmm0 # sched: [3:1.50]
    642 ; GENERIC-NEXT:    phaddsw (%rdi), %xmm0 # sched: [9:1.50]
    643 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    644 ;
    645 ; ATOM-LABEL: test_phaddsw:
    646 ; ATOM:       # %bb.0:
    647 ; ATOM-NEXT:    phaddsw %xmm1, %xmm0 # sched: [7:3.50]
    648 ; ATOM-NEXT:    phaddsw (%rdi), %xmm0 # sched: [8:4.00]
    649 ; ATOM-NEXT:    retq # sched: [79:39.50]
    650 ;
    651 ; SLM-LABEL: test_phaddsw:
    652 ; SLM:       # %bb.0:
    653 ; SLM-NEXT:    phaddsw %xmm1, %xmm0 # sched: [1:0.50]
    654 ; SLM-NEXT:    phaddsw (%rdi), %xmm0 # sched: [4:1.00]
    655 ; SLM-NEXT:    retq # sched: [4:1.00]
    656 ;
    657 ; SANDY-SSE-LABEL: test_phaddsw:
    658 ; SANDY-SSE:       # %bb.0:
    659 ; SANDY-SSE-NEXT:    phaddsw %xmm1, %xmm0 # sched: [3:1.50]
    660 ; SANDY-SSE-NEXT:    phaddsw (%rdi), %xmm0 # sched: [9:1.50]
    661 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    662 ;
    663 ; SANDY-LABEL: test_phaddsw:
    664 ; SANDY:       # %bb.0:
    665 ; SANDY-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
    666 ; SANDY-NEXT:    vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
    667 ; SANDY-NEXT:    retq # sched: [1:1.00]
    668 ;
    669 ; HASWELL-SSE-LABEL: test_phaddsw:
    670 ; HASWELL-SSE:       # %bb.0:
    671 ; HASWELL-SSE-NEXT:    phaddsw %xmm1, %xmm0 # sched: [3:2.00]
    672 ; HASWELL-SSE-NEXT:    phaddsw (%rdi), %xmm0 # sched: [9:2.00]
    673 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    674 ;
    675 ; HASWELL-LABEL: test_phaddsw:
    676 ; HASWELL:       # %bb.0:
    677 ; HASWELL-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
    678 ; HASWELL-NEXT:    vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
    679 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    680 ;
    681 ; BROADWELL-SSE-LABEL: test_phaddsw:
    682 ; BROADWELL-SSE:       # %bb.0:
    683 ; BROADWELL-SSE-NEXT:    phaddsw %xmm1, %xmm0 # sched: [3:2.00]
    684 ; BROADWELL-SSE-NEXT:    phaddsw (%rdi), %xmm0 # sched: [8:2.00]
    685 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    686 ;
    687 ; BROADWELL-LABEL: test_phaddsw:
    688 ; BROADWELL:       # %bb.0:
    689 ; BROADWELL-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
    690 ; BROADWELL-NEXT:    vphaddsw (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
    691 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    692 ;
    693 ; SKYLAKE-SSE-LABEL: test_phaddsw:
    694 ; SKYLAKE-SSE:       # %bb.0:
    695 ; SKYLAKE-SSE-NEXT:    phaddsw %xmm1, %xmm0 # sched: [3:2.00]
    696 ; SKYLAKE-SSE-NEXT:    phaddsw (%rdi), %xmm0 # sched: [9:2.00]
    697 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    698 ;
    699 ; SKYLAKE-LABEL: test_phaddsw:
    700 ; SKYLAKE:       # %bb.0:
    701 ; SKYLAKE-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
    702 ; SKYLAKE-NEXT:    vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
    703 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    704 ;
    705 ; SKX-SSE-LABEL: test_phaddsw:
    706 ; SKX-SSE:       # %bb.0:
    707 ; SKX-SSE-NEXT:    phaddsw %xmm1, %xmm0 # sched: [3:2.00]
    708 ; SKX-SSE-NEXT:    phaddsw (%rdi), %xmm0 # sched: [9:2.00]
    709 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    710 ;
    711 ; SKX-LABEL: test_phaddsw:
    712 ; SKX:       # %bb.0:
    713 ; SKX-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
    714 ; SKX-NEXT:    vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
    715 ; SKX-NEXT:    retq # sched: [7:1.00]
    716 ;
    717 ; BTVER2-SSE-LABEL: test_phaddsw:
    718 ; BTVER2-SSE:       # %bb.0:
    719 ; BTVER2-SSE-NEXT:    phaddsw %xmm1, %xmm0 # sched: [1:0.50]
    720 ; BTVER2-SSE-NEXT:    phaddsw (%rdi), %xmm0 # sched: [6:1.00]
    721 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    722 ;
    723 ; BTVER2-LABEL: test_phaddsw:
    724 ; BTVER2:       # %bb.0:
    725 ; BTVER2-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
    726 ; BTVER2-NEXT:    vphaddsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
    727 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    728 ;
    729 ; ZNVER1-SSE-LABEL: test_phaddsw:
    730 ; ZNVER1-SSE:       # %bb.0:
    731 ; ZNVER1-SSE-NEXT:    phaddsw %xmm1, %xmm0 # sched: [100:0.25]
    732 ; ZNVER1-SSE-NEXT:    phaddsw (%rdi), %xmm0 # sched: [100:0.25]
    733 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    734 ;
    735 ; ZNVER1-LABEL: test_phaddsw:
    736 ; ZNVER1:       # %bb.0:
    737 ; ZNVER1-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
    738 ; ZNVER1-NEXT:    vphaddsw (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
    739 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    740   %1 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1)
    741   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
    742   %3 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %1, <8 x i16> %2)
    743   ret <8 x i16> %3
    744 }
    745 declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
    746 
    747 define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
    748 ; GENERIC-LABEL: test_phaddw:
    749 ; GENERIC:       # %bb.0:
    750 ; GENERIC-NEXT:    phaddw %xmm1, %xmm0 # sched: [3:1.50]
    751 ; GENERIC-NEXT:    phaddw (%rdi), %xmm0 # sched: [9:1.50]
    752 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    753 ;
    754 ; ATOM-LABEL: test_phaddw:
    755 ; ATOM:       # %bb.0:
    756 ; ATOM-NEXT:    phaddw %xmm1, %xmm0 # sched: [7:3.50]
    757 ; ATOM-NEXT:    phaddw (%rdi), %xmm0 # sched: [8:4.00]
    758 ; ATOM-NEXT:    retq # sched: [79:39.50]
    759 ;
    760 ; SLM-LABEL: test_phaddw:
    761 ; SLM:       # %bb.0:
    762 ; SLM-NEXT:    phaddw %xmm1, %xmm0 # sched: [1:0.50]
    763 ; SLM-NEXT:    phaddw (%rdi), %xmm0 # sched: [4:1.00]
    764 ; SLM-NEXT:    retq # sched: [4:1.00]
    765 ;
    766 ; SANDY-SSE-LABEL: test_phaddw:
    767 ; SANDY-SSE:       # %bb.0:
    768 ; SANDY-SSE-NEXT:    phaddw %xmm1, %xmm0 # sched: [3:1.50]
    769 ; SANDY-SSE-NEXT:    phaddw (%rdi), %xmm0 # sched: [9:1.50]
    770 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    771 ;
    772 ; SANDY-LABEL: test_phaddw:
    773 ; SANDY:       # %bb.0:
    774 ; SANDY-NEXT:    vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
    775 ; SANDY-NEXT:    vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
    776 ; SANDY-NEXT:    retq # sched: [1:1.00]
    777 ;
    778 ; HASWELL-SSE-LABEL: test_phaddw:
    779 ; HASWELL-SSE:       # %bb.0:
    780 ; HASWELL-SSE-NEXT:    phaddw %xmm1, %xmm0 # sched: [3:2.00]
    781 ; HASWELL-SSE-NEXT:    phaddw (%rdi), %xmm0 # sched: [9:2.00]
    782 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    783 ;
    784 ; HASWELL-LABEL: test_phaddw:
    785 ; HASWELL:       # %bb.0:
    786 ; HASWELL-NEXT:    vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
    787 ; HASWELL-NEXT:    vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
    788 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    789 ;
    790 ; BROADWELL-SSE-LABEL: test_phaddw:
    791 ; BROADWELL-SSE:       # %bb.0:
    792 ; BROADWELL-SSE-NEXT:    phaddw %xmm1, %xmm0 # sched: [3:2.00]
    793 ; BROADWELL-SSE-NEXT:    phaddw (%rdi), %xmm0 # sched: [8:2.00]
    794 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    795 ;
    796 ; BROADWELL-LABEL: test_phaddw:
    797 ; BROADWELL:       # %bb.0:
    798 ; BROADWELL-NEXT:    vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
    799 ; BROADWELL-NEXT:    vphaddw (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
    800 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    801 ;
    802 ; SKYLAKE-SSE-LABEL: test_phaddw:
    803 ; SKYLAKE-SSE:       # %bb.0:
    804 ; SKYLAKE-SSE-NEXT:    phaddw %xmm1, %xmm0 # sched: [3:2.00]
    805 ; SKYLAKE-SSE-NEXT:    phaddw (%rdi), %xmm0 # sched: [9:2.00]
    806 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    807 ;
    808 ; SKYLAKE-LABEL: test_phaddw:
    809 ; SKYLAKE:       # %bb.0:
    810 ; SKYLAKE-NEXT:    vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
    811 ; SKYLAKE-NEXT:    vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
    812 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    813 ;
    814 ; SKX-SSE-LABEL: test_phaddw:
    815 ; SKX-SSE:       # %bb.0:
    816 ; SKX-SSE-NEXT:    phaddw %xmm1, %xmm0 # sched: [3:2.00]
    817 ; SKX-SSE-NEXT:    phaddw (%rdi), %xmm0 # sched: [9:2.00]
    818 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    819 ;
    820 ; SKX-LABEL: test_phaddw:
    821 ; SKX:       # %bb.0:
    822 ; SKX-NEXT:    vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
    823 ; SKX-NEXT:    vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
    824 ; SKX-NEXT:    retq # sched: [7:1.00]
    825 ;
    826 ; BTVER2-SSE-LABEL: test_phaddw:
    827 ; BTVER2-SSE:       # %bb.0:
    828 ; BTVER2-SSE-NEXT:    phaddw %xmm1, %xmm0 # sched: [1:0.50]
    829 ; BTVER2-SSE-NEXT:    phaddw (%rdi), %xmm0 # sched: [6:1.00]
    830 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    831 ;
    832 ; BTVER2-LABEL: test_phaddw:
    833 ; BTVER2:       # %bb.0:
    834 ; BTVER2-NEXT:    vphaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
    835 ; BTVER2-NEXT:    vphaddw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
    836 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    837 ;
    838 ; ZNVER1-SSE-LABEL: test_phaddw:
    839 ; ZNVER1-SSE:       # %bb.0:
    840 ; ZNVER1-SSE-NEXT:    phaddw %xmm1, %xmm0 # sched: [100:0.25]
    841 ; ZNVER1-SSE-NEXT:    phaddw (%rdi), %xmm0 # sched: [100:0.25]
    842 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    843 ;
    844 ; ZNVER1-LABEL: test_phaddw:
    845 ; ZNVER1:       # %bb.0:
    846 ; ZNVER1-NEXT:    vphaddw %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
    847 ; ZNVER1-NEXT:    vphaddw (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
    848 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    849   %1 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1)
    850   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
    851   %3 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %1, <8 x i16> %2)
    852   ret <8 x i16> %3
    853 }
    854 declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
    855 
    856 define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
    857 ; GENERIC-LABEL: test_phsubd:
    858 ; GENERIC:       # %bb.0:
    859 ; GENERIC-NEXT:    phsubd %xmm1, %xmm0 # sched: [3:1.50]
    860 ; GENERIC-NEXT:    phsubd (%rdi), %xmm0 # sched: [9:1.50]
    861 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    862 ;
    863 ; ATOM-LABEL: test_phsubd:
    864 ; ATOM:       # %bb.0:
    865 ; ATOM-NEXT:    phsubd %xmm1, %xmm0 # sched: [3:1.50]
    866 ; ATOM-NEXT:    phsubd (%rdi), %xmm0 # sched: [4:2.00]
    867 ; ATOM-NEXT:    retq # sched: [79:39.50]
    868 ;
    869 ; SLM-LABEL: test_phsubd:
    870 ; SLM:       # %bb.0:
    871 ; SLM-NEXT:    phsubd %xmm1, %xmm0 # sched: [1:0.50]
    872 ; SLM-NEXT:    phsubd (%rdi), %xmm0 # sched: [4:1.00]
    873 ; SLM-NEXT:    retq # sched: [4:1.00]
    874 ;
    875 ; SANDY-SSE-LABEL: test_phsubd:
    876 ; SANDY-SSE:       # %bb.0:
    877 ; SANDY-SSE-NEXT:    phsubd %xmm1, %xmm0 # sched: [3:1.50]
    878 ; SANDY-SSE-NEXT:    phsubd (%rdi), %xmm0 # sched: [9:1.50]
    879 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    880 ;
    881 ; SANDY-LABEL: test_phsubd:
    882 ; SANDY:       # %bb.0:
    883 ; SANDY-NEXT:    vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
    884 ; SANDY-NEXT:    vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
    885 ; SANDY-NEXT:    retq # sched: [1:1.00]
    886 ;
    887 ; HASWELL-SSE-LABEL: test_phsubd:
    888 ; HASWELL-SSE:       # %bb.0:
    889 ; HASWELL-SSE-NEXT:    phsubd %xmm1, %xmm0 # sched: [3:2.00]
    890 ; HASWELL-SSE-NEXT:    phsubd (%rdi), %xmm0 # sched: [9:2.00]
    891 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    892 ;
    893 ; HASWELL-LABEL: test_phsubd:
    894 ; HASWELL:       # %bb.0:
    895 ; HASWELL-NEXT:    vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
    896 ; HASWELL-NEXT:    vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
    897 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    898 ;
    899 ; BROADWELL-SSE-LABEL: test_phsubd:
    900 ; BROADWELL-SSE:       # %bb.0:
    901 ; BROADWELL-SSE-NEXT:    phsubd %xmm1, %xmm0 # sched: [3:2.00]
    902 ; BROADWELL-SSE-NEXT:    phsubd (%rdi), %xmm0 # sched: [8:2.00]
    903 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    904 ;
    905 ; BROADWELL-LABEL: test_phsubd:
    906 ; BROADWELL:       # %bb.0:
    907 ; BROADWELL-NEXT:    vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
    908 ; BROADWELL-NEXT:    vphsubd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
    909 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    910 ;
    911 ; SKYLAKE-SSE-LABEL: test_phsubd:
    912 ; SKYLAKE-SSE:       # %bb.0:
    913 ; SKYLAKE-SSE-NEXT:    phsubd %xmm1, %xmm0 # sched: [3:2.00]
    914 ; SKYLAKE-SSE-NEXT:    phsubd (%rdi), %xmm0 # sched: [9:2.00]
    915 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    916 ;
    917 ; SKYLAKE-LABEL: test_phsubd:
    918 ; SKYLAKE:       # %bb.0:
    919 ; SKYLAKE-NEXT:    vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
    920 ; SKYLAKE-NEXT:    vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
    921 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    922 ;
    923 ; SKX-SSE-LABEL: test_phsubd:
    924 ; SKX-SSE:       # %bb.0:
    925 ; SKX-SSE-NEXT:    phsubd %xmm1, %xmm0 # sched: [3:2.00]
    926 ; SKX-SSE-NEXT:    phsubd (%rdi), %xmm0 # sched: [9:2.00]
    927 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    928 ;
    929 ; SKX-LABEL: test_phsubd:
    930 ; SKX:       # %bb.0:
    931 ; SKX-NEXT:    vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
    932 ; SKX-NEXT:    vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
    933 ; SKX-NEXT:    retq # sched: [7:1.00]
    934 ;
    935 ; BTVER2-SSE-LABEL: test_phsubd:
    936 ; BTVER2-SSE:       # %bb.0:
    937 ; BTVER2-SSE-NEXT:    phsubd %xmm1, %xmm0 # sched: [1:0.50]
    938 ; BTVER2-SSE-NEXT:    phsubd (%rdi), %xmm0 # sched: [6:1.00]
    939 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    940 ;
    941 ; BTVER2-LABEL: test_phsubd:
    942 ; BTVER2:       # %bb.0:
    943 ; BTVER2-NEXT:    vphsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
    944 ; BTVER2-NEXT:    vphsubd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
    945 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    946 ;
    947 ; ZNVER1-SSE-LABEL: test_phsubd:
    948 ; ZNVER1-SSE:       # %bb.0:
    949 ; ZNVER1-SSE-NEXT:    phsubd %xmm1, %xmm0 # sched: [100:0.25]
    950 ; ZNVER1-SSE-NEXT:    phsubd (%rdi), %xmm0 # sched: [100:0.25]
    951 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    952 ;
    953 ; ZNVER1-LABEL: test_phsubd:
    954 ; ZNVER1:       # %bb.0:
    955 ; ZNVER1-NEXT:    vphsubd %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
    956 ; ZNVER1-NEXT:    vphsubd (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
    957 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    958   %1 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1)
    959   %2 = load <4 x i32>, <4 x i32> *%a2, align 16
    960   %3 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %1, <4 x i32> %2)
    961   ret <4 x i32> %3
    962 }
    963 declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
    964 
    965 define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
    966 ; GENERIC-LABEL: test_phsubsw:
    967 ; GENERIC:       # %bb.0:
    968 ; GENERIC-NEXT:    phsubsw %xmm1, %xmm0 # sched: [3:1.50]
    969 ; GENERIC-NEXT:    phsubsw (%rdi), %xmm0 # sched: [9:1.50]
    970 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    971 ;
    972 ; ATOM-LABEL: test_phsubsw:
    973 ; ATOM:       # %bb.0:
    974 ; ATOM-NEXT:    phsubsw %xmm1, %xmm0 # sched: [7:3.50]
    975 ; ATOM-NEXT:    phsubsw (%rdi), %xmm0 # sched: [8:4.00]
    976 ; ATOM-NEXT:    retq # sched: [79:39.50]
    977 ;
    978 ; SLM-LABEL: test_phsubsw:
    979 ; SLM:       # %bb.0:
    980 ; SLM-NEXT:    phsubsw %xmm1, %xmm0 # sched: [1:0.50]
    981 ; SLM-NEXT:    phsubsw (%rdi), %xmm0 # sched: [4:1.00]
    982 ; SLM-NEXT:    retq # sched: [4:1.00]
    983 ;
    984 ; SANDY-SSE-LABEL: test_phsubsw:
    985 ; SANDY-SSE:       # %bb.0:
    986 ; SANDY-SSE-NEXT:    phsubsw %xmm1, %xmm0 # sched: [3:1.50]
    987 ; SANDY-SSE-NEXT:    phsubsw (%rdi), %xmm0 # sched: [9:1.50]
    988 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    989 ;
    990 ; SANDY-LABEL: test_phsubsw:
    991 ; SANDY:       # %bb.0:
    992 ; SANDY-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
    993 ; SANDY-NEXT:    vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
    994 ; SANDY-NEXT:    retq # sched: [1:1.00]
    995 ;
    996 ; HASWELL-SSE-LABEL: test_phsubsw:
    997 ; HASWELL-SSE:       # %bb.0:
    998 ; HASWELL-SSE-NEXT:    phsubsw %xmm1, %xmm0 # sched: [3:2.00]
    999 ; HASWELL-SSE-NEXT:    phsubsw (%rdi), %xmm0 # sched: [9:2.00]
   1000 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1001 ;
   1002 ; HASWELL-LABEL: test_phsubsw:
   1003 ; HASWELL:       # %bb.0:
   1004 ; HASWELL-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
   1005 ; HASWELL-NEXT:    vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
   1006 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1007 ;
   1008 ; BROADWELL-SSE-LABEL: test_phsubsw:
   1009 ; BROADWELL-SSE:       # %bb.0:
   1010 ; BROADWELL-SSE-NEXT:    phsubsw %xmm1, %xmm0 # sched: [3:2.00]
   1011 ; BROADWELL-SSE-NEXT:    phsubsw (%rdi), %xmm0 # sched: [8:2.00]
   1012 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1013 ;
   1014 ; BROADWELL-LABEL: test_phsubsw:
   1015 ; BROADWELL:       # %bb.0:
   1016 ; BROADWELL-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
   1017 ; BROADWELL-NEXT:    vphsubsw (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
   1018 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1019 ;
   1020 ; SKYLAKE-SSE-LABEL: test_phsubsw:
   1021 ; SKYLAKE-SSE:       # %bb.0:
   1022 ; SKYLAKE-SSE-NEXT:    phsubsw %xmm1, %xmm0 # sched: [3:2.00]
   1023 ; SKYLAKE-SSE-NEXT:    phsubsw (%rdi), %xmm0 # sched: [9:2.00]
   1024 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1025 ;
   1026 ; SKYLAKE-LABEL: test_phsubsw:
   1027 ; SKYLAKE:       # %bb.0:
   1028 ; SKYLAKE-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
   1029 ; SKYLAKE-NEXT:    vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
   1030 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1031 ;
   1032 ; SKX-SSE-LABEL: test_phsubsw:
   1033 ; SKX-SSE:       # %bb.0:
   1034 ; SKX-SSE-NEXT:    phsubsw %xmm1, %xmm0 # sched: [3:2.00]
   1035 ; SKX-SSE-NEXT:    phsubsw (%rdi), %xmm0 # sched: [9:2.00]
   1036 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1037 ;
   1038 ; SKX-LABEL: test_phsubsw:
   1039 ; SKX:       # %bb.0:
   1040 ; SKX-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
   1041 ; SKX-NEXT:    vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
   1042 ; SKX-NEXT:    retq # sched: [7:1.00]
   1043 ;
   1044 ; BTVER2-SSE-LABEL: test_phsubsw:
   1045 ; BTVER2-SSE:       # %bb.0:
   1046 ; BTVER2-SSE-NEXT:    phsubsw %xmm1, %xmm0 # sched: [1:0.50]
   1047 ; BTVER2-SSE-NEXT:    phsubsw (%rdi), %xmm0 # sched: [6:1.00]
   1048 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1049 ;
   1050 ; BTVER2-LABEL: test_phsubsw:
   1051 ; BTVER2:       # %bb.0:
   1052 ; BTVER2-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1053 ; BTVER2-NEXT:    vphsubsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   1054 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1055 ;
   1056 ; ZNVER1-SSE-LABEL: test_phsubsw:
   1057 ; ZNVER1-SSE:       # %bb.0:
   1058 ; ZNVER1-SSE-NEXT:    phsubsw %xmm1, %xmm0 # sched: [100:0.25]
   1059 ; ZNVER1-SSE-NEXT:    phsubsw (%rdi), %xmm0 # sched: [100:0.25]
   1060 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1061 ;
   1062 ; ZNVER1-LABEL: test_phsubsw:
   1063 ; ZNVER1:       # %bb.0:
   1064 ; ZNVER1-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
   1065 ; ZNVER1-NEXT:    vphsubsw (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
   1066 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1067   %1 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1)
   1068   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   1069   %3 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %1, <8 x i16> %2)
   1070   ret <8 x i16> %3
   1071 }
   1072 declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
   1073 
   1074 define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   1075 ; GENERIC-LABEL: test_phsubw:
   1076 ; GENERIC:       # %bb.0:
   1077 ; GENERIC-NEXT:    phsubw %xmm1, %xmm0 # sched: [3:1.50]
   1078 ; GENERIC-NEXT:    phsubw (%rdi), %xmm0 # sched: [9:1.50]
   1079 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1080 ;
   1081 ; ATOM-LABEL: test_phsubw:
   1082 ; ATOM:       # %bb.0:
   1083 ; ATOM-NEXT:    phsubw %xmm1, %xmm0 # sched: [7:3.50]
   1084 ; ATOM-NEXT:    phsubw (%rdi), %xmm0 # sched: [8:4.00]
   1085 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1086 ;
   1087 ; SLM-LABEL: test_phsubw:
   1088 ; SLM:       # %bb.0:
   1089 ; SLM-NEXT:    phsubw %xmm1, %xmm0 # sched: [1:0.50]
   1090 ; SLM-NEXT:    phsubw (%rdi), %xmm0 # sched: [4:1.00]
   1091 ; SLM-NEXT:    retq # sched: [4:1.00]
   1092 ;
   1093 ; SANDY-SSE-LABEL: test_phsubw:
   1094 ; SANDY-SSE:       # %bb.0:
   1095 ; SANDY-SSE-NEXT:    phsubw %xmm1, %xmm0 # sched: [3:1.50]
   1096 ; SANDY-SSE-NEXT:    phsubw (%rdi), %xmm0 # sched: [9:1.50]
   1097 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1098 ;
   1099 ; SANDY-LABEL: test_phsubw:
   1100 ; SANDY:       # %bb.0:
   1101 ; SANDY-NEXT:    vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
   1102 ; SANDY-NEXT:    vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
   1103 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1104 ;
   1105 ; HASWELL-SSE-LABEL: test_phsubw:
   1106 ; HASWELL-SSE:       # %bb.0:
   1107 ; HASWELL-SSE-NEXT:    phsubw %xmm1, %xmm0 # sched: [3:2.00]
   1108 ; HASWELL-SSE-NEXT:    phsubw (%rdi), %xmm0 # sched: [9:2.00]
   1109 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1110 ;
   1111 ; HASWELL-LABEL: test_phsubw:
   1112 ; HASWELL:       # %bb.0:
   1113 ; HASWELL-NEXT:    vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
   1114 ; HASWELL-NEXT:    vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
   1115 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1116 ;
   1117 ; BROADWELL-SSE-LABEL: test_phsubw:
   1118 ; BROADWELL-SSE:       # %bb.0:
   1119 ; BROADWELL-SSE-NEXT:    phsubw %xmm1, %xmm0 # sched: [3:2.00]
   1120 ; BROADWELL-SSE-NEXT:    phsubw (%rdi), %xmm0 # sched: [8:2.00]
   1121 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1122 ;
   1123 ; BROADWELL-LABEL: test_phsubw:
   1124 ; BROADWELL:       # %bb.0:
   1125 ; BROADWELL-NEXT:    vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
   1126 ; BROADWELL-NEXT:    vphsubw (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
   1127 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1128 ;
   1129 ; SKYLAKE-SSE-LABEL: test_phsubw:
   1130 ; SKYLAKE-SSE:       # %bb.0:
   1131 ; SKYLAKE-SSE-NEXT:    phsubw %xmm1, %xmm0 # sched: [3:2.00]
   1132 ; SKYLAKE-SSE-NEXT:    phsubw (%rdi), %xmm0 # sched: [9:2.00]
   1133 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1134 ;
   1135 ; SKYLAKE-LABEL: test_phsubw:
   1136 ; SKYLAKE:       # %bb.0:
   1137 ; SKYLAKE-NEXT:    vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
   1138 ; SKYLAKE-NEXT:    vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
   1139 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1140 ;
   1141 ; SKX-SSE-LABEL: test_phsubw:
   1142 ; SKX-SSE:       # %bb.0:
   1143 ; SKX-SSE-NEXT:    phsubw %xmm1, %xmm0 # sched: [3:2.00]
   1144 ; SKX-SSE-NEXT:    phsubw (%rdi), %xmm0 # sched: [9:2.00]
   1145 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1146 ;
   1147 ; SKX-LABEL: test_phsubw:
   1148 ; SKX:       # %bb.0:
   1149 ; SKX-NEXT:    vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
   1150 ; SKX-NEXT:    vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
   1151 ; SKX-NEXT:    retq # sched: [7:1.00]
   1152 ;
   1153 ; BTVER2-SSE-LABEL: test_phsubw:
   1154 ; BTVER2-SSE:       # %bb.0:
   1155 ; BTVER2-SSE-NEXT:    phsubw %xmm1, %xmm0 # sched: [1:0.50]
   1156 ; BTVER2-SSE-NEXT:    phsubw (%rdi), %xmm0 # sched: [6:1.00]
   1157 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1158 ;
   1159 ; BTVER2-LABEL: test_phsubw:
   1160 ; BTVER2:       # %bb.0:
   1161 ; BTVER2-NEXT:    vphsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1162 ; BTVER2-NEXT:    vphsubw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   1163 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1164 ;
   1165 ; ZNVER1-SSE-LABEL: test_phsubw:
   1166 ; ZNVER1-SSE:       # %bb.0:
   1167 ; ZNVER1-SSE-NEXT:    phsubw %xmm1, %xmm0 # sched: [100:0.25]
   1168 ; ZNVER1-SSE-NEXT:    phsubw (%rdi), %xmm0 # sched: [100:0.25]
   1169 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1170 ;
   1171 ; ZNVER1-LABEL: test_phsubw:
   1172 ; ZNVER1:       # %bb.0:
   1173 ; ZNVER1-NEXT:    vphsubw %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
   1174 ; ZNVER1-NEXT:    vphsubw (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
   1175 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1176   %1 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1)
   1177   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   1178   %3 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %1, <8 x i16> %2)
   1179   ret <8 x i16> %3
   1180 }
   1181 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
   1182 
   1183 define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
   1184 ; GENERIC-LABEL: test_pmaddubsw:
   1185 ; GENERIC:       # %bb.0:
   1186 ; GENERIC-NEXT:    pmaddubsw %xmm1, %xmm0 # sched: [5:1.00]
   1187 ; GENERIC-NEXT:    pmaddubsw (%rdi), %xmm0 # sched: [11:1.00]
   1188 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1189 ;
   1190 ; ATOM-LABEL: test_pmaddubsw:
   1191 ; ATOM:       # %bb.0:
   1192 ; ATOM-NEXT:    pmaddubsw %xmm1, %xmm0 # sched: [5:5.00]
   1193 ; ATOM-NEXT:    pmaddubsw (%rdi), %xmm0 # sched: [5:5.00]
   1194 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1195 ;
   1196 ; SLM-LABEL: test_pmaddubsw:
   1197 ; SLM:       # %bb.0:
   1198 ; SLM-NEXT:    pmaddubsw %xmm1, %xmm0 # sched: [4:1.00]
   1199 ; SLM-NEXT:    pmaddubsw (%rdi), %xmm0 # sched: [7:1.00]
   1200 ; SLM-NEXT:    retq # sched: [4:1.00]
   1201 ;
   1202 ; SANDY-SSE-LABEL: test_pmaddubsw:
   1203 ; SANDY-SSE:       # %bb.0:
   1204 ; SANDY-SSE-NEXT:    pmaddubsw %xmm1, %xmm0 # sched: [5:1.00]
   1205 ; SANDY-SSE-NEXT:    pmaddubsw (%rdi), %xmm0 # sched: [11:1.00]
   1206 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1207 ;
   1208 ; SANDY-LABEL: test_pmaddubsw:
   1209 ; SANDY:       # %bb.0:
   1210 ; SANDY-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   1211 ; SANDY-NEXT:    vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   1212 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1213 ;
   1214 ; HASWELL-SSE-LABEL: test_pmaddubsw:
   1215 ; HASWELL-SSE:       # %bb.0:
   1216 ; HASWELL-SSE-NEXT:    pmaddubsw %xmm1, %xmm0 # sched: [5:1.00]
   1217 ; HASWELL-SSE-NEXT:    pmaddubsw (%rdi), %xmm0 # sched: [11:1.00]
   1218 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1219 ;
   1220 ; HASWELL-LABEL: test_pmaddubsw:
   1221 ; HASWELL:       # %bb.0:
   1222 ; HASWELL-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   1223 ; HASWELL-NEXT:    vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   1224 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1225 ;
   1226 ; BROADWELL-SSE-LABEL: test_pmaddubsw:
   1227 ; BROADWELL-SSE:       # %bb.0:
   1228 ; BROADWELL-SSE-NEXT:    pmaddubsw %xmm1, %xmm0 # sched: [5:1.00]
   1229 ; BROADWELL-SSE-NEXT:    pmaddubsw (%rdi), %xmm0 # sched: [10:1.00]
   1230 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1231 ;
   1232 ; BROADWELL-LABEL: test_pmaddubsw:
   1233 ; BROADWELL:       # %bb.0:
   1234 ; BROADWELL-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   1235 ; BROADWELL-NEXT:    vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   1236 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1237 ;
   1238 ; SKYLAKE-SSE-LABEL: test_pmaddubsw:
   1239 ; SKYLAKE-SSE:       # %bb.0:
   1240 ; SKYLAKE-SSE-NEXT:    pmaddubsw %xmm1, %xmm0 # sched: [4:0.50]
   1241 ; SKYLAKE-SSE-NEXT:    pmaddubsw (%rdi), %xmm0 # sched: [10:0.50]
   1242 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1243 ;
   1244 ; SKYLAKE-LABEL: test_pmaddubsw:
   1245 ; SKYLAKE:       # %bb.0:
   1246 ; SKYLAKE-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1247 ; SKYLAKE-NEXT:    vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   1248 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1249 ;
   1250 ; SKX-SSE-LABEL: test_pmaddubsw:
   1251 ; SKX-SSE:       # %bb.0:
   1252 ; SKX-SSE-NEXT:    pmaddubsw %xmm1, %xmm0 # sched: [4:0.50]
   1253 ; SKX-SSE-NEXT:    pmaddubsw (%rdi), %xmm0 # sched: [10:0.50]
   1254 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1255 ;
   1256 ; SKX-LABEL: test_pmaddubsw:
   1257 ; SKX:       # %bb.0:
   1258 ; SKX-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1259 ; SKX-NEXT:    vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   1260 ; SKX-NEXT:    retq # sched: [7:1.00]
   1261 ;
   1262 ; BTVER2-SSE-LABEL: test_pmaddubsw:
   1263 ; BTVER2-SSE:       # %bb.0:
   1264 ; BTVER2-SSE-NEXT:    pmaddubsw %xmm1, %xmm0 # sched: [2:1.00]
   1265 ; BTVER2-SSE-NEXT:    pmaddubsw (%rdi), %xmm0 # sched: [7:1.00]
   1266 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1267 ;
   1268 ; BTVER2-LABEL: test_pmaddubsw:
   1269 ; BTVER2:       # %bb.0:
   1270 ; BTVER2-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   1271 ; BTVER2-NEXT:    vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   1272 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1273 ;
   1274 ; ZNVER1-SSE-LABEL: test_pmaddubsw:
   1275 ; ZNVER1-SSE:       # %bb.0:
   1276 ; ZNVER1-SSE-NEXT:    pmaddubsw %xmm1, %xmm0 # sched: [4:1.00]
   1277 ; ZNVER1-SSE-NEXT:    pmaddubsw (%rdi), %xmm0 # sched: [11:1.00]
   1278 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1279 ;
   1280 ; ZNVER1-LABEL: test_pmaddubsw:
   1281 ; ZNVER1:       # %bb.0:
   1282 ; ZNVER1-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
   1283 ; ZNVER1-NEXT:    vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   1284 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1285   %1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1)
   1286   %2 = load <16 x i8>, <16 x i8> *%a2, align 16
   1287   %3 = bitcast <8 x i16> %1 to <16 x i8>
   1288   %4 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %3, <16 x i8> %2)
   1289   ret <8 x i16> %4
   1290 }
   1291 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
   1292 
   1293 define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   1294 ; GENERIC-LABEL: test_pmulhrsw:
   1295 ; GENERIC:       # %bb.0:
   1296 ; GENERIC-NEXT:    pmulhrsw %xmm1, %xmm0 # sched: [5:1.00]
   1297 ; GENERIC-NEXT:    pmulhrsw (%rdi), %xmm0 # sched: [11:1.00]
   1298 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1299 ;
   1300 ; ATOM-LABEL: test_pmulhrsw:
   1301 ; ATOM:       # %bb.0:
   1302 ; ATOM-NEXT:    pmulhrsw %xmm1, %xmm0 # sched: [5:5.00]
   1303 ; ATOM-NEXT:    pmulhrsw (%rdi), %xmm0 # sched: [5:5.00]
   1304 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1305 ;
   1306 ; SLM-LABEL: test_pmulhrsw:
   1307 ; SLM:       # %bb.0:
   1308 ; SLM-NEXT:    pmulhrsw %xmm1, %xmm0 # sched: [4:1.00]
   1309 ; SLM-NEXT:    pmulhrsw (%rdi), %xmm0 # sched: [7:1.00]
   1310 ; SLM-NEXT:    retq # sched: [4:1.00]
   1311 ;
   1312 ; SANDY-SSE-LABEL: test_pmulhrsw:
   1313 ; SANDY-SSE:       # %bb.0:
   1314 ; SANDY-SSE-NEXT:    pmulhrsw %xmm1, %xmm0 # sched: [5:1.00]
   1315 ; SANDY-SSE-NEXT:    pmulhrsw (%rdi), %xmm0 # sched: [11:1.00]
   1316 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1317 ;
   1318 ; SANDY-LABEL: test_pmulhrsw:
   1319 ; SANDY:       # %bb.0:
   1320 ; SANDY-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   1321 ; SANDY-NEXT:    vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   1322 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1323 ;
   1324 ; HASWELL-SSE-LABEL: test_pmulhrsw:
   1325 ; HASWELL-SSE:       # %bb.0:
   1326 ; HASWELL-SSE-NEXT:    pmulhrsw %xmm1, %xmm0 # sched: [5:1.00]
   1327 ; HASWELL-SSE-NEXT:    pmulhrsw (%rdi), %xmm0 # sched: [11:1.00]
   1328 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1329 ;
   1330 ; HASWELL-LABEL: test_pmulhrsw:
   1331 ; HASWELL:       # %bb.0:
   1332 ; HASWELL-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   1333 ; HASWELL-NEXT:    vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   1334 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1335 ;
   1336 ; BROADWELL-SSE-LABEL: test_pmulhrsw:
   1337 ; BROADWELL-SSE:       # %bb.0:
   1338 ; BROADWELL-SSE-NEXT:    pmulhrsw %xmm1, %xmm0 # sched: [5:1.00]
   1339 ; BROADWELL-SSE-NEXT:    pmulhrsw (%rdi), %xmm0 # sched: [10:1.00]
   1340 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1341 ;
   1342 ; BROADWELL-LABEL: test_pmulhrsw:
   1343 ; BROADWELL:       # %bb.0:
   1344 ; BROADWELL-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   1345 ; BROADWELL-NEXT:    vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   1346 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1347 ;
   1348 ; SKYLAKE-SSE-LABEL: test_pmulhrsw:
   1349 ; SKYLAKE-SSE:       # %bb.0:
   1350 ; SKYLAKE-SSE-NEXT:    pmulhrsw %xmm1, %xmm0 # sched: [4:0.50]
   1351 ; SKYLAKE-SSE-NEXT:    pmulhrsw (%rdi), %xmm0 # sched: [10:0.50]
   1352 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1353 ;
   1354 ; SKYLAKE-LABEL: test_pmulhrsw:
   1355 ; SKYLAKE:       # %bb.0:
   1356 ; SKYLAKE-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1357 ; SKYLAKE-NEXT:    vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   1358 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1359 ;
   1360 ; SKX-SSE-LABEL: test_pmulhrsw:
   1361 ; SKX-SSE:       # %bb.0:
   1362 ; SKX-SSE-NEXT:    pmulhrsw %xmm1, %xmm0 # sched: [4:0.50]
   1363 ; SKX-SSE-NEXT:    pmulhrsw (%rdi), %xmm0 # sched: [10:0.50]
   1364 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1365 ;
   1366 ; SKX-LABEL: test_pmulhrsw:
   1367 ; SKX:       # %bb.0:
   1368 ; SKX-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1369 ; SKX-NEXT:    vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   1370 ; SKX-NEXT:    retq # sched: [7:1.00]
   1371 ;
   1372 ; BTVER2-SSE-LABEL: test_pmulhrsw:
   1373 ; BTVER2-SSE:       # %bb.0:
   1374 ; BTVER2-SSE-NEXT:    pmulhrsw %xmm1, %xmm0 # sched: [2:1.00]
   1375 ; BTVER2-SSE-NEXT:    pmulhrsw (%rdi), %xmm0 # sched: [7:1.00]
   1376 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1377 ;
   1378 ; BTVER2-LABEL: test_pmulhrsw:
   1379 ; BTVER2:       # %bb.0:
   1380 ; BTVER2-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   1381 ; BTVER2-NEXT:    vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   1382 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1383 ;
   1384 ; ZNVER1-SSE-LABEL: test_pmulhrsw:
   1385 ; ZNVER1-SSE:       # %bb.0:
   1386 ; ZNVER1-SSE-NEXT:    pmulhrsw %xmm1, %xmm0 # sched: [4:1.00]
   1387 ; ZNVER1-SSE-NEXT:    pmulhrsw (%rdi), %xmm0 # sched: [11:1.00]
   1388 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1389 ;
   1390 ; ZNVER1-LABEL: test_pmulhrsw:
   1391 ; ZNVER1:       # %bb.0:
   1392 ; ZNVER1-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
   1393 ; ZNVER1-NEXT:    vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   1394 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1395   %1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1)
   1396   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   1397   %3 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %1, <8 x i16> %2)
   1398   ret <8 x i16> %3
   1399 }
   1400 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
   1401 
   1402 define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
   1403 ; GENERIC-LABEL: test_pshufb:
   1404 ; GENERIC:       # %bb.0:
   1405 ; GENERIC-NEXT:    pshufb %xmm1, %xmm0 # sched: [1:0.50]
   1406 ; GENERIC-NEXT:    pshufb (%rdi), %xmm0 # sched: [7:0.50]
   1407 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1408 ;
   1409 ; ATOM-LABEL: test_pshufb:
   1410 ; ATOM:       # %bb.0:
   1411 ; ATOM-NEXT:    pshufb %xmm1, %xmm0 # sched: [4:2.00]
   1412 ; ATOM-NEXT:    pshufb (%rdi), %xmm0 # sched: [5:2.50]
   1413 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1414 ;
   1415 ; SLM-LABEL: test_pshufb:
   1416 ; SLM:       # %bb.0:
   1417 ; SLM-NEXT:    pshufb %xmm1, %xmm0 # sched: [1:1.00]
   1418 ; SLM-NEXT:    pshufb (%rdi), %xmm0 # sched: [4:1.00]
   1419 ; SLM-NEXT:    retq # sched: [4:1.00]
   1420 ;
   1421 ; SANDY-SSE-LABEL: test_pshufb:
   1422 ; SANDY-SSE:       # %bb.0:
   1423 ; SANDY-SSE-NEXT:    pshufb %xmm1, %xmm0 # sched: [1:0.50]
   1424 ; SANDY-SSE-NEXT:    pshufb (%rdi), %xmm0 # sched: [7:0.50]
   1425 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1426 ;
   1427 ; SANDY-LABEL: test_pshufb:
   1428 ; SANDY:       # %bb.0:
   1429 ; SANDY-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1430 ; SANDY-NEXT:    vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   1431 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1432 ;
   1433 ; HASWELL-SSE-LABEL: test_pshufb:
   1434 ; HASWELL-SSE:       # %bb.0:
   1435 ; HASWELL-SSE-NEXT:    pshufb %xmm1, %xmm0 # sched: [1:1.00]
   1436 ; HASWELL-SSE-NEXT:    pshufb (%rdi), %xmm0 # sched: [7:1.00]
   1437 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1438 ;
   1439 ; HASWELL-LABEL: test_pshufb:
   1440 ; HASWELL:       # %bb.0:
   1441 ; HASWELL-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   1442 ; HASWELL-NEXT:    vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   1443 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1444 ;
   1445 ; BROADWELL-SSE-LABEL: test_pshufb:
   1446 ; BROADWELL-SSE:       # %bb.0:
   1447 ; BROADWELL-SSE-NEXT:    pshufb %xmm1, %xmm0 # sched: [1:1.00]
   1448 ; BROADWELL-SSE-NEXT:    pshufb (%rdi), %xmm0 # sched: [6:1.00]
   1449 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1450 ;
   1451 ; BROADWELL-LABEL: test_pshufb:
   1452 ; BROADWELL:       # %bb.0:
   1453 ; BROADWELL-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   1454 ; BROADWELL-NEXT:    vpshufb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   1455 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1456 ;
   1457 ; SKYLAKE-SSE-LABEL: test_pshufb:
   1458 ; SKYLAKE-SSE:       # %bb.0:
   1459 ; SKYLAKE-SSE-NEXT:    pshufb %xmm1, %xmm0 # sched: [1:1.00]
   1460 ; SKYLAKE-SSE-NEXT:    pshufb (%rdi), %xmm0 # sched: [7:1.00]
   1461 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1462 ;
   1463 ; SKYLAKE-LABEL: test_pshufb:
   1464 ; SKYLAKE:       # %bb.0:
   1465 ; SKYLAKE-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   1466 ; SKYLAKE-NEXT:    vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   1467 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1468 ;
   1469 ; SKX-SSE-LABEL: test_pshufb:
   1470 ; SKX-SSE:       # %bb.0:
   1471 ; SKX-SSE-NEXT:    pshufb %xmm1, %xmm0 # sched: [1:1.00]
   1472 ; SKX-SSE-NEXT:    pshufb (%rdi), %xmm0 # sched: [7:1.00]
   1473 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1474 ;
   1475 ; SKX-LABEL: test_pshufb:
   1476 ; SKX:       # %bb.0:
   1477 ; SKX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   1478 ; SKX-NEXT:    vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   1479 ; SKX-NEXT:    retq # sched: [7:1.00]
   1480 ;
   1481 ; BTVER2-SSE-LABEL: test_pshufb:
   1482 ; BTVER2-SSE:       # %bb.0:
   1483 ; BTVER2-SSE-NEXT:    pshufb %xmm1, %xmm0 # sched: [2:2.00]
   1484 ; BTVER2-SSE-NEXT:    pshufb (%rdi), %xmm0 # sched: [7:2.00]
   1485 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1486 ;
   1487 ; BTVER2-LABEL: test_pshufb:
   1488 ; BTVER2:       # %bb.0:
   1489 ; BTVER2-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
   1490 ; BTVER2-NEXT:    vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
   1491 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1492 ;
   1493 ; ZNVER1-SSE-LABEL: test_pshufb:
   1494 ; ZNVER1-SSE:       # %bb.0:
   1495 ; ZNVER1-SSE-NEXT:    pshufb %xmm1, %xmm0 # sched: [1:0.25]
   1496 ; ZNVER1-SSE-NEXT:    pshufb (%rdi), %xmm0 # sched: [8:0.50]
   1497 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1498 ;
   1499 ; ZNVER1-LABEL: test_pshufb:
   1500 ; ZNVER1:       # %bb.0:
   1501 ; ZNVER1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   1502 ; ZNVER1-NEXT:    vpshufb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   1503 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1504   %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1)
   1505   %2 = load <16 x i8>, <16 x i8> *%a2, align 16
   1506   %3 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> %2)
   1507   ret <16 x i8> %3
   1508 }
   1509 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
   1510 
   1511 define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
   1512 ; GENERIC-LABEL: test_psignb:
   1513 ; GENERIC:       # %bb.0:
   1514 ; GENERIC-NEXT:    psignb %xmm1, %xmm0 # sched: [1:0.50]
   1515 ; GENERIC-NEXT:    psignb (%rdi), %xmm0 # sched: [7:0.50]
   1516 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1517 ;
   1518 ; ATOM-LABEL: test_psignb:
   1519 ; ATOM:       # %bb.0:
   1520 ; ATOM-NEXT:    psignb %xmm1, %xmm0 # sched: [1:0.50]
   1521 ; ATOM-NEXT:    psignb (%rdi), %xmm0 # sched: [1:1.00]
   1522 ; ATOM-NEXT:    nop # sched: [1:0.50]
   1523 ; ATOM-NEXT:    nop # sched: [1:0.50]
   1524 ; ATOM-NEXT:    nop # sched: [1:0.50]
   1525 ; ATOM-NEXT:    nop # sched: [1:0.50]
   1526 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1527 ;
   1528 ; SLM-LABEL: test_psignb:
   1529 ; SLM:       # %bb.0:
   1530 ; SLM-NEXT:    psignb %xmm1, %xmm0 # sched: [1:0.50]
   1531 ; SLM-NEXT:    psignb (%rdi), %xmm0 # sched: [4:1.00]
   1532 ; SLM-NEXT:    retq # sched: [4:1.00]
   1533 ;
   1534 ; SANDY-SSE-LABEL: test_psignb:
   1535 ; SANDY-SSE:       # %bb.0:
   1536 ; SANDY-SSE-NEXT:    psignb %xmm1, %xmm0 # sched: [1:0.50]
   1537 ; SANDY-SSE-NEXT:    psignb (%rdi), %xmm0 # sched: [7:0.50]
   1538 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1539 ;
   1540 ; SANDY-LABEL: test_psignb:
   1541 ; SANDY:       # %bb.0:
   1542 ; SANDY-NEXT:    vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1543 ; SANDY-NEXT:    vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   1544 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1545 ;
   1546 ; HASWELL-SSE-LABEL: test_psignb:
   1547 ; HASWELL-SSE:       # %bb.0:
   1548 ; HASWELL-SSE-NEXT:    psignb %xmm1, %xmm0 # sched: [1:0.50]
   1549 ; HASWELL-SSE-NEXT:    psignb (%rdi), %xmm0 # sched: [7:0.50]
   1550 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1551 ;
   1552 ; HASWELL-LABEL: test_psignb:
   1553 ; HASWELL:       # %bb.0:
   1554 ; HASWELL-NEXT:    vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1555 ; HASWELL-NEXT:    vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   1556 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1557 ;
   1558 ; BROADWELL-SSE-LABEL: test_psignb:
   1559 ; BROADWELL-SSE:       # %bb.0:
   1560 ; BROADWELL-SSE-NEXT:    psignb %xmm1, %xmm0 # sched: [1:0.50]
   1561 ; BROADWELL-SSE-NEXT:    psignb (%rdi), %xmm0 # sched: [6:0.50]
   1562 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1563 ;
   1564 ; BROADWELL-LABEL: test_psignb:
   1565 ; BROADWELL:       # %bb.0:
   1566 ; BROADWELL-NEXT:    vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1567 ; BROADWELL-NEXT:    vpsignb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   1568 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1569 ;
   1570 ; SKYLAKE-SSE-LABEL: test_psignb:
   1571 ; SKYLAKE-SSE:       # %bb.0:
   1572 ; SKYLAKE-SSE-NEXT:    psignb %xmm1, %xmm0 # sched: [1:0.50]
   1573 ; SKYLAKE-SSE-NEXT:    psignb (%rdi), %xmm0 # sched: [7:0.50]
   1574 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1575 ;
   1576 ; SKYLAKE-LABEL: test_psignb:
   1577 ; SKYLAKE:       # %bb.0:
   1578 ; SKYLAKE-NEXT:    vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1579 ; SKYLAKE-NEXT:    vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   1580 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1581 ;
   1582 ; SKX-SSE-LABEL: test_psignb:
   1583 ; SKX-SSE:       # %bb.0:
   1584 ; SKX-SSE-NEXT:    psignb %xmm1, %xmm0 # sched: [1:0.50]
   1585 ; SKX-SSE-NEXT:    psignb (%rdi), %xmm0 # sched: [7:0.50]
   1586 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1587 ;
   1588 ; SKX-LABEL: test_psignb:
   1589 ; SKX:       # %bb.0:
   1590 ; SKX-NEXT:    vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1591 ; SKX-NEXT:    vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   1592 ; SKX-NEXT:    retq # sched: [7:1.00]
   1593 ;
   1594 ; BTVER2-SSE-LABEL: test_psignb:
   1595 ; BTVER2-SSE:       # %bb.0:
   1596 ; BTVER2-SSE-NEXT:    psignb %xmm1, %xmm0 # sched: [1:0.50]
   1597 ; BTVER2-SSE-NEXT:    psignb (%rdi), %xmm0 # sched: [6:1.00]
   1598 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1599 ;
   1600 ; BTVER2-LABEL: test_psignb:
   1601 ; BTVER2:       # %bb.0:
   1602 ; BTVER2-NEXT:    vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1603 ; BTVER2-NEXT:    vpsignb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   1604 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1605 ;
   1606 ; ZNVER1-SSE-LABEL: test_psignb:
   1607 ; ZNVER1-SSE:       # %bb.0:
   1608 ; ZNVER1-SSE-NEXT:    psignb %xmm1, %xmm0 # sched: [1:0.25]
   1609 ; ZNVER1-SSE-NEXT:    psignb (%rdi), %xmm0 # sched: [8:0.50]
   1610 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1611 ;
   1612 ; ZNVER1-LABEL: test_psignb:
   1613 ; ZNVER1:       # %bb.0:
   1614 ; ZNVER1-NEXT:    vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   1615 ; ZNVER1-NEXT:    vpsignb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   1616 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1617   %1 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1)
   1618   %2 = load <16 x i8>, <16 x i8> *%a2, align 16
   1619   %3 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %1, <16 x i8> %2)
   1620   ret <16 x i8> %3
   1621 }
   1622 declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
   1623 
   1624 define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
   1625 ; GENERIC-LABEL: test_psignd:
   1626 ; GENERIC:       # %bb.0:
   1627 ; GENERIC-NEXT:    psignd %xmm1, %xmm0 # sched: [1:0.50]
   1628 ; GENERIC-NEXT:    psignd (%rdi), %xmm0 # sched: [7:0.50]
   1629 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1630 ;
   1631 ; ATOM-LABEL: test_psignd:
   1632 ; ATOM:       # %bb.0:
   1633 ; ATOM-NEXT:    psignd %xmm1, %xmm0 # sched: [1:0.50]
   1634 ; ATOM-NEXT:    psignd (%rdi), %xmm0 # sched: [1:1.00]
   1635 ; ATOM-NEXT:    nop # sched: [1:0.50]
   1636 ; ATOM-NEXT:    nop # sched: [1:0.50]
   1637 ; ATOM-NEXT:    nop # sched: [1:0.50]
   1638 ; ATOM-NEXT:    nop # sched: [1:0.50]
   1639 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1640 ;
   1641 ; SLM-LABEL: test_psignd:
   1642 ; SLM:       # %bb.0:
   1643 ; SLM-NEXT:    psignd %xmm1, %xmm0 # sched: [1:0.50]
   1644 ; SLM-NEXT:    psignd (%rdi), %xmm0 # sched: [4:1.00]
   1645 ; SLM-NEXT:    retq # sched: [4:1.00]
   1646 ;
   1647 ; SANDY-SSE-LABEL: test_psignd:
   1648 ; SANDY-SSE:       # %bb.0:
   1649 ; SANDY-SSE-NEXT:    psignd %xmm1, %xmm0 # sched: [1:0.50]
   1650 ; SANDY-SSE-NEXT:    psignd (%rdi), %xmm0 # sched: [7:0.50]
   1651 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1652 ;
   1653 ; SANDY-LABEL: test_psignd:
   1654 ; SANDY:       # %bb.0:
   1655 ; SANDY-NEXT:    vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1656 ; SANDY-NEXT:    vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   1657 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1658 ;
   1659 ; HASWELL-SSE-LABEL: test_psignd:
   1660 ; HASWELL-SSE:       # %bb.0:
   1661 ; HASWELL-SSE-NEXT:    psignd %xmm1, %xmm0 # sched: [1:0.50]
   1662 ; HASWELL-SSE-NEXT:    psignd (%rdi), %xmm0 # sched: [7:0.50]
   1663 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1664 ;
   1665 ; HASWELL-LABEL: test_psignd:
   1666 ; HASWELL:       # %bb.0:
   1667 ; HASWELL-NEXT:    vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1668 ; HASWELL-NEXT:    vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   1669 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1670 ;
   1671 ; BROADWELL-SSE-LABEL: test_psignd:
   1672 ; BROADWELL-SSE:       # %bb.0:
   1673 ; BROADWELL-SSE-NEXT:    psignd %xmm1, %xmm0 # sched: [1:0.50]
   1674 ; BROADWELL-SSE-NEXT:    psignd (%rdi), %xmm0 # sched: [6:0.50]
   1675 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1676 ;
   1677 ; BROADWELL-LABEL: test_psignd:
   1678 ; BROADWELL:       # %bb.0:
   1679 ; BROADWELL-NEXT:    vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1680 ; BROADWELL-NEXT:    vpsignd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   1681 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1682 ;
   1683 ; SKYLAKE-SSE-LABEL: test_psignd:
   1684 ; SKYLAKE-SSE:       # %bb.0:
   1685 ; SKYLAKE-SSE-NEXT:    psignd %xmm1, %xmm0 # sched: [1:0.50]
   1686 ; SKYLAKE-SSE-NEXT:    psignd (%rdi), %xmm0 # sched: [7:0.50]
   1687 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1688 ;
   1689 ; SKYLAKE-LABEL: test_psignd:
   1690 ; SKYLAKE:       # %bb.0:
   1691 ; SKYLAKE-NEXT:    vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1692 ; SKYLAKE-NEXT:    vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   1693 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1694 ;
   1695 ; SKX-SSE-LABEL: test_psignd:
   1696 ; SKX-SSE:       # %bb.0:
   1697 ; SKX-SSE-NEXT:    psignd %xmm1, %xmm0 # sched: [1:0.50]
   1698 ; SKX-SSE-NEXT:    psignd (%rdi), %xmm0 # sched: [7:0.50]
   1699 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1700 ;
   1701 ; SKX-LABEL: test_psignd:
   1702 ; SKX:       # %bb.0:
   1703 ; SKX-NEXT:    vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1704 ; SKX-NEXT:    vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   1705 ; SKX-NEXT:    retq # sched: [7:1.00]
   1706 ;
   1707 ; BTVER2-SSE-LABEL: test_psignd:
   1708 ; BTVER2-SSE:       # %bb.0:
   1709 ; BTVER2-SSE-NEXT:    psignd %xmm1, %xmm0 # sched: [1:0.50]
   1710 ; BTVER2-SSE-NEXT:    psignd (%rdi), %xmm0 # sched: [6:1.00]
   1711 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1712 ;
   1713 ; BTVER2-LABEL: test_psignd:
   1714 ; BTVER2:       # %bb.0:
   1715 ; BTVER2-NEXT:    vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1716 ; BTVER2-NEXT:    vpsignd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   1717 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1718 ;
   1719 ; ZNVER1-SSE-LABEL: test_psignd:
   1720 ; ZNVER1-SSE:       # %bb.0:
   1721 ; ZNVER1-SSE-NEXT:    psignd %xmm1, %xmm0 # sched: [1:0.25]
   1722 ; ZNVER1-SSE-NEXT:    psignd (%rdi), %xmm0 # sched: [8:0.50]
   1723 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1724 ;
   1725 ; ZNVER1-LABEL: test_psignd:
   1726 ; ZNVER1:       # %bb.0:
   1727 ; ZNVER1-NEXT:    vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   1728 ; ZNVER1-NEXT:    vpsignd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   1729 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1730   %1 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1)
   1731   %2 = load <4 x i32>, <4 x i32> *%a2, align 16
   1732   %3 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %1, <4 x i32> %2)
   1733   ret <4 x i32> %3
   1734 }
   1735 declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
   1736 
   1737 define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   1738 ; GENERIC-LABEL: test_psignw:
   1739 ; GENERIC:       # %bb.0:
   1740 ; GENERIC-NEXT:    psignw %xmm1, %xmm0 # sched: [1:0.50]
   1741 ; GENERIC-NEXT:    psignw (%rdi), %xmm0 # sched: [7:0.50]
   1742 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1743 ;
   1744 ; ATOM-LABEL: test_psignw:
   1745 ; ATOM:       # %bb.0:
   1746 ; ATOM-NEXT:    psignw %xmm1, %xmm0 # sched: [1:0.50]
   1747 ; ATOM-NEXT:    psignw (%rdi), %xmm0 # sched: [1:1.00]
   1748 ; ATOM-NEXT:    nop # sched: [1:0.50]
   1749 ; ATOM-NEXT:    nop # sched: [1:0.50]
   1750 ; ATOM-NEXT:    nop # sched: [1:0.50]
   1751 ; ATOM-NEXT:    nop # sched: [1:0.50]
   1752 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1753 ;
   1754 ; SLM-LABEL: test_psignw:
   1755 ; SLM:       # %bb.0:
   1756 ; SLM-NEXT:    psignw %xmm1, %xmm0 # sched: [1:0.50]
   1757 ; SLM-NEXT:    psignw (%rdi), %xmm0 # sched: [4:1.00]
   1758 ; SLM-NEXT:    retq # sched: [4:1.00]
   1759 ;
   1760 ; SANDY-SSE-LABEL: test_psignw:
   1761 ; SANDY-SSE:       # %bb.0:
   1762 ; SANDY-SSE-NEXT:    psignw %xmm1, %xmm0 # sched: [1:0.50]
   1763 ; SANDY-SSE-NEXT:    psignw (%rdi), %xmm0 # sched: [7:0.50]
   1764 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1765 ;
   1766 ; SANDY-LABEL: test_psignw:
   1767 ; SANDY:       # %bb.0:
   1768 ; SANDY-NEXT:    vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1769 ; SANDY-NEXT:    vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   1770 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1771 ;
   1772 ; HASWELL-SSE-LABEL: test_psignw:
   1773 ; HASWELL-SSE:       # %bb.0:
   1774 ; HASWELL-SSE-NEXT:    psignw %xmm1, %xmm0 # sched: [1:0.50]
   1775 ; HASWELL-SSE-NEXT:    psignw (%rdi), %xmm0 # sched: [7:0.50]
   1776 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1777 ;
   1778 ; HASWELL-LABEL: test_psignw:
   1779 ; HASWELL:       # %bb.0:
   1780 ; HASWELL-NEXT:    vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1781 ; HASWELL-NEXT:    vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   1782 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1783 ;
   1784 ; BROADWELL-SSE-LABEL: test_psignw:
   1785 ; BROADWELL-SSE:       # %bb.0:
   1786 ; BROADWELL-SSE-NEXT:    psignw %xmm1, %xmm0 # sched: [1:0.50]
   1787 ; BROADWELL-SSE-NEXT:    psignw (%rdi), %xmm0 # sched: [6:0.50]
   1788 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1789 ;
   1790 ; BROADWELL-LABEL: test_psignw:
   1791 ; BROADWELL:       # %bb.0:
   1792 ; BROADWELL-NEXT:    vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1793 ; BROADWELL-NEXT:    vpsignw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   1794 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1795 ;
   1796 ; SKYLAKE-SSE-LABEL: test_psignw:
   1797 ; SKYLAKE-SSE:       # %bb.0:
   1798 ; SKYLAKE-SSE-NEXT:    psignw %xmm1, %xmm0 # sched: [1:0.50]
   1799 ; SKYLAKE-SSE-NEXT:    psignw (%rdi), %xmm0 # sched: [7:0.50]
   1800 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1801 ;
   1802 ; SKYLAKE-LABEL: test_psignw:
   1803 ; SKYLAKE:       # %bb.0:
   1804 ; SKYLAKE-NEXT:    vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1805 ; SKYLAKE-NEXT:    vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   1806 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1807 ;
   1808 ; SKX-SSE-LABEL: test_psignw:
   1809 ; SKX-SSE:       # %bb.0:
   1810 ; SKX-SSE-NEXT:    psignw %xmm1, %xmm0 # sched: [1:0.50]
   1811 ; SKX-SSE-NEXT:    psignw (%rdi), %xmm0 # sched: [7:0.50]
   1812 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1813 ;
   1814 ; SKX-LABEL: test_psignw:
   1815 ; SKX:       # %bb.0:
   1816 ; SKX-NEXT:    vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1817 ; SKX-NEXT:    vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   1818 ; SKX-NEXT:    retq # sched: [7:1.00]
   1819 ;
   1820 ; BTVER2-SSE-LABEL: test_psignw:
   1821 ; BTVER2-SSE:       # %bb.0:
   1822 ; BTVER2-SSE-NEXT:    psignw %xmm1, %xmm0 # sched: [1:0.50]
   1823 ; BTVER2-SSE-NEXT:    psignw (%rdi), %xmm0 # sched: [6:1.00]
   1824 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1825 ;
   1826 ; BTVER2-LABEL: test_psignw:
   1827 ; BTVER2:       # %bb.0:
   1828 ; BTVER2-NEXT:    vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1829 ; BTVER2-NEXT:    vpsignw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   1830 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1831 ;
   1832 ; ZNVER1-SSE-LABEL: test_psignw:
   1833 ; ZNVER1-SSE:       # %bb.0:
   1834 ; ZNVER1-SSE-NEXT:    psignw %xmm1, %xmm0 # sched: [1:0.25]
   1835 ; ZNVER1-SSE-NEXT:    psignw (%rdi), %xmm0 # sched: [8:0.50]
   1836 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1837 ;
   1838 ; ZNVER1-LABEL: test_psignw:
   1839 ; ZNVER1:       # %bb.0:
   1840 ; ZNVER1-NEXT:    vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   1841 ; ZNVER1-NEXT:    vpsignw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   1842 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1843   %1 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1)
   1844   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   1845   %3 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %1, <8 x i16> %2)
   1846   ret <8 x i16> %3
   1847 }
   1848 declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
   1849