Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSSE3
      3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx  | FileCheck %s --check-prefixes=AVX,AVX1
      4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
      5 
      6 ; The next 8 tests check for matching the horizontal op and eliminating the shuffle.
      7 ; PR34111 - https://bugs.llvm.org/show_bug.cgi?id=34111
      8 
      9 define <4 x float> @hadd_v4f32(<4 x float> %a) {
     10 ; SSSE3-LABEL: hadd_v4f32:
     11 ; SSSE3:       # %bb.0:
     12 ; SSSE3-NEXT:    haddps %xmm0, %xmm0
     13 ; SSSE3-NEXT:    retq
     14 ;
     15 ; AVX-LABEL: hadd_v4f32:
     16 ; AVX:       # %bb.0:
     17 ; AVX-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
     18 ; AVX-NEXT:    retq
     19   %a02 = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 2>
     20   %a13 = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 1, i32 3>
     21   %hop = fadd <2 x float> %a02, %a13
     22   %shuf = shufflevector <2 x float> %hop, <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
     23   ret <4 x float> %shuf
     24 }
     25 
     26 define <8 x float> @hadd_v8f32a(<8 x float> %a) {
     27 ; SSSE3-LABEL: hadd_v8f32a:
     28 ; SSSE3:       # %bb.0:
     29 ; SSSE3-NEXT:    movaps %xmm0, %xmm2
     30 ; SSSE3-NEXT:    haddps %xmm1, %xmm2
     31 ; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm2[0,0]
     32 ; SSSE3-NEXT:    movaps %xmm2, %xmm1
     33 ; SSSE3-NEXT:    retq
     34 ;
     35 ; AVX1-LABEL: hadd_v8f32a:
     36 ; AVX1:       # %bb.0:
     37 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
     38 ; AVX1-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
     39 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm0[0,0]
     40 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     41 ; AVX1-NEXT:    retq
     42 ;
     43 ; AVX2-LABEL: hadd_v8f32a:
     44 ; AVX2:       # %bb.0:
     45 ; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
     46 ; AVX2-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
     47 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,1]
     48 ; AVX2-NEXT:    retq
     49   %a0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
     50   %a1 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
     51   %hop = fadd <4 x float> %a0, %a1
     52   %shuf = shufflevector <4 x float> %hop, <4 x float> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 2, i32 3>
     53   ret <8 x float> %shuf
     54 }
     55 
     56 define <8 x float> @hadd_v8f32b(<8 x float> %a) {
     57 ; SSSE3-LABEL: hadd_v8f32b:
     58 ; SSSE3:       # %bb.0:
     59 ; SSSE3-NEXT:    haddps %xmm0, %xmm0
     60 ; SSSE3-NEXT:    haddps %xmm1, %xmm1
     61 ; SSSE3-NEXT:    retq
     62 ;
     63 ; AVX-LABEL: hadd_v8f32b:
     64 ; AVX:       # %bb.0:
     65 ; AVX-NEXT:    vhaddps %ymm0, %ymm0, %ymm0
     66 ; AVX-NEXT:    retq
     67   %a0 = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 4, i32 6, i32 undef, i32 undef>
     68   %a1 = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 5, i32 7, i32 undef, i32 undef>
     69   %hop = fadd <8 x float> %a0, %a1
     70   %shuf = shufflevector <8 x float> %hop, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
     71   ret <8 x float> %shuf
     72 }
     73 
     74 define <4 x float> @hsub_v4f32(<4 x float> %a) {
     75 ; SSSE3-LABEL: hsub_v4f32:
     76 ; SSSE3:       # %bb.0:
     77 ; SSSE3-NEXT:    hsubps %xmm0, %xmm0
     78 ; SSSE3-NEXT:    retq
     79 ;
     80 ; AVX-LABEL: hsub_v4f32:
     81 ; AVX:       # %bb.0:
     82 ; AVX-NEXT:    vhsubps %xmm0, %xmm0, %xmm0
     83 ; AVX-NEXT:    retq
     84   %a02 = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 2>
     85   %a13 = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 1, i32 3>
     86   %hop = fsub <2 x float> %a02, %a13
     87   %shuf = shufflevector <2 x float> %hop, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
     88   ret <4 x float> %shuf
     89 }
     90 
     91 define <8 x float> @hsub_v8f32a(<8 x float> %a) {
     92 ; SSSE3-LABEL: hsub_v8f32a:
     93 ; SSSE3:       # %bb.0:
     94 ; SSSE3-NEXT:    movaps %xmm0, %xmm2
     95 ; SSSE3-NEXT:    hsubps %xmm1, %xmm2
     96 ; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm2[0,0]
     97 ; SSSE3-NEXT:    movaps %xmm2, %xmm1
     98 ; SSSE3-NEXT:    retq
     99 ;
    100 ; AVX1-LABEL: hsub_v8f32a:
    101 ; AVX1:       # %bb.0:
    102 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    103 ; AVX1-NEXT:    vhsubps %xmm1, %xmm0, %xmm0
    104 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm0[0,0]
    105 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    106 ; AVX1-NEXT:    retq
    107 ;
    108 ; AVX2-LABEL: hsub_v8f32a:
    109 ; AVX2:       # %bb.0:
    110 ; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
    111 ; AVX2-NEXT:    vhsubps %xmm1, %xmm0, %xmm0
    112 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,1]
    113 ; AVX2-NEXT:    retq
    114   %a0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
    115   %a1 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    116   %hop = fsub <4 x float> %a0, %a1
    117   %shuf = shufflevector <4 x float> %hop, <4 x float> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 2, i32 3>
    118   ret <8 x float> %shuf
    119 }
    120 
    121 define <8 x float> @hsub_v8f32b(<8 x float> %a) {
    122 ; SSSE3-LABEL: hsub_v8f32b:
    123 ; SSSE3:       # %bb.0:
    124 ; SSSE3-NEXT:    hsubps %xmm0, %xmm0
    125 ; SSSE3-NEXT:    hsubps %xmm1, %xmm1
    126 ; SSSE3-NEXT:    retq
    127 ;
    128 ; AVX-LABEL: hsub_v8f32b:
    129 ; AVX:       # %bb.0:
    130 ; AVX-NEXT:    vhsubps %ymm0, %ymm0, %ymm0
    131 ; AVX-NEXT:    retq
    132   %a0 = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 4, i32 6, i32 undef, i32 undef>
    133   %a1 = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 5, i32 7, i32 undef, i32 undef>
    134   %hop = fsub <8 x float> %a0, %a1
    135   %shuf = shufflevector <8 x float> %hop, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
    136   ret <8 x float> %shuf
    137 }
    138 
    139 define <2 x double> @hadd_v2f64(<2 x double> %a) {
    140 ; SSSE3-LABEL: hadd_v2f64:
    141 ; SSSE3:       # %bb.0:
    142 ; SSSE3-NEXT:    haddpd %xmm0, %xmm0
    143 ; SSSE3-NEXT:    retq
    144 ;
    145 ; AVX-LABEL: hadd_v2f64:
    146 ; AVX:       # %bb.0:
    147 ; AVX-NEXT:    vhaddpd %xmm0, %xmm0, %xmm0
    148 ; AVX-NEXT:    retq
    149   %a0 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
    150   %a1 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
    151   %hop = fadd <2 x double> %a0, %a1
    152   %shuf = shufflevector <2 x double> %hop, <2 x double> undef, <2 x i32> <i32 0, i32 0>
    153   ret <2 x double> %shuf
    154 }
    155 
    156 define <4 x double> @hadd_v4f64(<4 x double> %a) {
    157 ; SSSE3-LABEL: hadd_v4f64:
    158 ; SSSE3:       # %bb.0:
    159 ; SSSE3-NEXT:    haddpd %xmm0, %xmm0
    160 ; SSSE3-NEXT:    haddpd %xmm1, %xmm1
    161 ; SSSE3-NEXT:    retq
    162 ;
    163 ; AVX-LABEL: hadd_v4f64:
    164 ; AVX:       # %bb.0:
    165 ; AVX-NEXT:    vhaddpd %ymm0, %ymm0, %ymm0
    166 ; AVX-NEXT:    retq
    167   %a0 = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> <i32 0, i32 undef, i32 2, i32 undef>
    168   %a1 = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 3, i32 undef>
    169   %hop = fadd <4 x double> %a0, %a1
    170   %shuf = shufflevector <4 x double> %hop, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
    171   ret <4 x double> %shuf
    172 }
    173 
    174 define <2 x double> @hsub_v2f64(<2 x double> %a) {
    175 ; SSSE3-LABEL: hsub_v2f64:
    176 ; SSSE3:       # %bb.0:
    177 ; SSSE3-NEXT:    hsubpd %xmm0, %xmm0
    178 ; SSSE3-NEXT:    retq
    179 ;
    180 ; AVX-LABEL: hsub_v2f64:
    181 ; AVX:       # %bb.0:
    182 ; AVX-NEXT:    vhsubpd %xmm0, %xmm0, %xmm0
    183 ; AVX-NEXT:    retq
    184   %a0 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
    185   %a1 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
    186   %hop = fsub <2 x double> %a0, %a1
    187   %shuf = shufflevector <2 x double> %hop, <2 x double> undef, <2 x i32> <i32 undef, i32 0>
    188   ret <2 x double> %shuf
    189 }
    190 
    191 define <4 x double> @hsub_v4f64(<4 x double> %a) {
    192 ; SSSE3-LABEL: hsub_v4f64:
    193 ; SSSE3:       # %bb.0:
    194 ; SSSE3-NEXT:    hsubpd %xmm0, %xmm0
    195 ; SSSE3-NEXT:    hsubpd %xmm1, %xmm1
    196 ; SSSE3-NEXT:    retq
    197 ;
    198 ; AVX-LABEL: hsub_v4f64:
    199 ; AVX:       # %bb.0:
    200 ; AVX-NEXT:    vhsubpd %ymm0, %ymm0, %ymm0
    201 ; AVX-NEXT:    retq
    202   %a0 = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> <i32 0, i32 undef, i32 2, i32 undef>
    203   %a1 = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 3, i32 undef>
    204   %hop = fsub <4 x double> %a0, %a1
    205   %shuf = shufflevector <4 x double> %hop, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
    206   ret <4 x double> %shuf
    207 }
    208 
    209 define <4 x i32> @hadd_v4i32(<4 x i32> %a) {
    210 ; SSSE3-LABEL: hadd_v4i32:
    211 ; SSSE3:       # %bb.0:
    212 ; SSSE3-NEXT:    phaddd %xmm0, %xmm0
    213 ; SSSE3-NEXT:    retq
    214 ;
    215 ; AVX-LABEL: hadd_v4i32:
    216 ; AVX:       # %bb.0:
    217 ; AVX-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
    218 ; AVX-NEXT:    retq
    219   %a02 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
    220   %a13 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
    221   %hop = add <4 x i32> %a02, %a13
    222   %shuf = shufflevector <4 x i32> %hop, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 1>
    223   ret <4 x i32> %shuf
    224 }
    225 
    226 define <8 x i32> @hadd_v8i32a(<8 x i32> %a) {
    227 ; SSSE3-LABEL: hadd_v8i32a:
    228 ; SSSE3:       # %bb.0:
    229 ; SSSE3-NEXT:    movdqa %xmm0, %xmm2
    230 ; SSSE3-NEXT:    phaddd %xmm1, %xmm2
    231 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,1,0,1]
    232 ; SSSE3-NEXT:    movdqa %xmm2, %xmm1
    233 ; SSSE3-NEXT:    retq
    234 ;
    235 ; AVX1-LABEL: hadd_v8i32a:
    236 ; AVX1:       # %bb.0:
    237 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    238 ; AVX1-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
    239 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
    240 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    241 ; AVX1-NEXT:    retq
    242 ;
    243 ; AVX2-LABEL: hadd_v8i32a:
    244 ; AVX2:       # %bb.0:
    245 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    246 ; AVX2-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
    247 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1]
    248 ; AVX2-NEXT:    retq
    249   %a0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
    250   %a1 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    251   %hop = add <4 x i32> %a0, %a1
    252   %shuf = shufflevector <4 x i32> %hop, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 2, i32 3>
    253   ret <8 x i32> %shuf
    254 }
    255 
    256 define <8 x i32> @hadd_v8i32b(<8 x i32> %a) {
    257 ; SSSE3-LABEL: hadd_v8i32b:
    258 ; SSSE3:       # %bb.0:
    259 ; SSSE3-NEXT:    phaddd %xmm0, %xmm0
    260 ; SSSE3-NEXT:    phaddd %xmm1, %xmm1
    261 ; SSSE3-NEXT:    retq
    262 ;
    263 ; AVX1-LABEL: hadd_v8i32b:
    264 ; AVX1:       # %bb.0:
    265 ; AVX1-NEXT:    vphaddd %xmm0, %xmm0, %xmm1
    266 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    267 ; AVX1-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
    268 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    269 ; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
    270 ; AVX1-NEXT:    retq
    271 ;
    272 ; AVX2-LABEL: hadd_v8i32b:
    273 ; AVX2:       # %bb.0:
    274 ; AVX2-NEXT:    vphaddd %ymm0, %ymm0, %ymm0
    275 ; AVX2-NEXT:    retq
    276   %a0 = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 4, i32 6, i32 undef, i32 undef>
    277   %a1 = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 5, i32 7, i32 undef, i32 undef>
    278   %hop = add <8 x i32> %a0, %a1
    279   %shuf = shufflevector <8 x i32> %hop, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
    280   ret <8 x i32> %shuf
    281 }
    282 
    283 define <4 x i32> @hsub_v4i32(<4 x i32> %a) {
    284 ; SSSE3-LABEL: hsub_v4i32:
    285 ; SSSE3:       # %bb.0:
    286 ; SSSE3-NEXT:    phsubd %xmm0, %xmm0
    287 ; SSSE3-NEXT:    retq
    288 ;
    289 ; AVX-LABEL: hsub_v4i32:
    290 ; AVX:       # %bb.0:
    291 ; AVX-NEXT:    vphsubd %xmm0, %xmm0, %xmm0
    292 ; AVX-NEXT:    retq
    293   %a02 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
    294   %a13 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
    295   %hop = sub <4 x i32> %a02, %a13
    296   %shuf = shufflevector <4 x i32> %hop, <4 x i32> undef, <4 x i32> <i32 undef, i32 1, i32 0, i32 undef>
    297   ret <4 x i32> %shuf
    298 }
    299 
    300 define <8 x i32> @hsub_v8i32a(<8 x i32> %a) {
    301 ; SSSE3-LABEL: hsub_v8i32a:
    302 ; SSSE3:       # %bb.0:
    303 ; SSSE3-NEXT:    movdqa %xmm0, %xmm2
    304 ; SSSE3-NEXT:    phsubd %xmm1, %xmm2
    305 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,1,0,1]
    306 ; SSSE3-NEXT:    movdqa %xmm2, %xmm1
    307 ; SSSE3-NEXT:    retq
    308 ;
    309 ; AVX1-LABEL: hsub_v8i32a:
    310 ; AVX1:       # %bb.0:
    311 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    312 ; AVX1-NEXT:    vphsubd %xmm1, %xmm0, %xmm0
    313 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
    314 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    315 ; AVX1-NEXT:    retq
    316 ;
    317 ; AVX2-LABEL: hsub_v8i32a:
    318 ; AVX2:       # %bb.0:
    319 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    320 ; AVX2-NEXT:    vphsubd %xmm1, %xmm0, %xmm0
    321 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1]
    322 ; AVX2-NEXT:    retq
    323   %a0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
    324   %a1 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    325   %hop = sub <4 x i32> %a0, %a1
    326   %shuf = shufflevector <4 x i32> %hop, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 2, i32 3>
    327   ret <8 x i32> %shuf
    328 }
    329 
    330 define <8 x i32> @hsub_v8i32b(<8 x i32> %a) {
    331 ; SSSE3-LABEL: hsub_v8i32b:
    332 ; SSSE3:       # %bb.0:
    333 ; SSSE3-NEXT:    phsubd %xmm0, %xmm0
    334 ; SSSE3-NEXT:    phsubd %xmm1, %xmm1
    335 ; SSSE3-NEXT:    retq
    336 ;
    337 ; AVX1-LABEL: hsub_v8i32b:
    338 ; AVX1:       # %bb.0:
    339 ; AVX1-NEXT:    vphsubd %xmm0, %xmm0, %xmm1
    340 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    341 ; AVX1-NEXT:    vphsubd %xmm0, %xmm0, %xmm0
    342 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    343 ; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
    344 ; AVX1-NEXT:    retq
    345 ;
    346 ; AVX2-LABEL: hsub_v8i32b:
    347 ; AVX2:       # %bb.0:
    348 ; AVX2-NEXT:    vphsubd %ymm0, %ymm0, %ymm0
    349 ; AVX2-NEXT:    retq
    350   %a0 = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 4, i32 6, i32 undef, i32 undef>
    351   %a1 = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 5, i32 7, i32 undef, i32 undef>
    352   %hop = sub <8 x i32> %a0, %a1
    353   %shuf = shufflevector <8 x i32> %hop, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
    354   ret <8 x i32> %shuf
    355 }
    356 
    357 define <8 x i16> @hadd_v8i16(<8 x i16> %a) {
    358 ; SSSE3-LABEL: hadd_v8i16:
    359 ; SSSE3:       # %bb.0:
    360 ; SSSE3-NEXT:    phaddw %xmm0, %xmm0
    361 ; SSSE3-NEXT:    retq
    362 ;
    363 ; AVX-LABEL: hadd_v8i16:
    364 ; AVX:       # %bb.0:
    365 ; AVX-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
    366 ; AVX-NEXT:    retq
    367   %a0246 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
    368   %a1357 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
    369   %hop = add <8 x i16> %a0246, %a1357
    370   %shuf = shufflevector <8 x i16> %hop, <8 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
    371   ret <8 x i16> %shuf
    372 }
    373 
    374 define <16 x i16> @hadd_v16i16a(<16 x i16> %a) {
    375 ; SSSE3-LABEL: hadd_v16i16a:
    376 ; SSSE3:       # %bb.0:
    377 ; SSSE3-NEXT:    movdqa %xmm0, %xmm2
    378 ; SSSE3-NEXT:    phaddw %xmm1, %xmm2
    379 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,1,0,1]
    380 ; SSSE3-NEXT:    movdqa %xmm2, %xmm1
    381 ; SSSE3-NEXT:    retq
    382 ;
    383 ; AVX1-LABEL: hadd_v16i16a:
    384 ; AVX1:       # %bb.0:
    385 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    386 ; AVX1-NEXT:    vphaddw %xmm1, %xmm0, %xmm0
    387 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
    388 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    389 ; AVX1-NEXT:    retq
    390 ;
    391 ; AVX2-LABEL: hadd_v16i16a:
    392 ; AVX2:       # %bb.0:
    393 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    394 ; AVX2-NEXT:    vphaddw %xmm1, %xmm0, %xmm0
    395 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1]
    396 ; AVX2-NEXT:    retq
    397   %a0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
    398   %a1 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
    399   %hop = add <8 x i16> %a0, %a1
    400   %shuf = shufflevector <8 x i16> %hop, <8 x i16> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 7>
    401   ret <16 x i16> %shuf
    402 }
    403 
    404 define <16 x i16> @hadd_v16i16b(<16 x i16> %a) {
    405 ; SSSE3-LABEL: hadd_v16i16b:
    406 ; SSSE3:       # %bb.0:
    407 ; SSSE3-NEXT:    phaddw %xmm0, %xmm0
    408 ; SSSE3-NEXT:    phaddw %xmm1, %xmm1
    409 ; SSSE3-NEXT:    retq
    410 ;
    411 ; AVX1-LABEL: hadd_v16i16b:
    412 ; AVX1:       # %bb.0:
    413 ; AVX1-NEXT:    vphaddw %xmm0, %xmm0, %xmm1
    414 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    415 ; AVX1-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
    416 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    417 ; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
    418 ; AVX1-NEXT:    retq
    419 ;
    420 ; AVX2-LABEL: hadd_v16i16b:
    421 ; AVX2:       # %bb.0:
    422 ; AVX2-NEXT:    vphaddw %ymm0, %ymm0, %ymm0
    423 ; AVX2-NEXT:    retq
    424   %a0 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 undef, i32 undef, i32 undef, i32 undef>
    425   %a1 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 9, i32 11, i32 13, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
    426   %hop = add <16 x i16> %a0, %a1
    427   %shuf = shufflevector <16 x i16> %hop, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11>
    428   ret <16 x i16> %shuf
    429 }
    430 
    431 define <8 x i16> @hsub_v8i16(<8 x i16> %a) {
    432 ; SSSE3-LABEL: hsub_v8i16:
    433 ; SSSE3:       # %bb.0:
    434 ; SSSE3-NEXT:    phsubw %xmm0, %xmm0
    435 ; SSSE3-NEXT:    retq
    436 ;
    437 ; AVX-LABEL: hsub_v8i16:
    438 ; AVX:       # %bb.0:
    439 ; AVX-NEXT:    vphsubw %xmm0, %xmm0, %xmm0
    440 ; AVX-NEXT:    retq
    441   %a0246 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
    442   %a1357 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
    443   %hop = sub <8 x i16> %a0246, %a1357
    444   %shuf = shufflevector <8 x i16> %hop, <8 x i16> undef, <8 x i32> <i32 0, i32 undef, i32 2, i32 undef, i32 undef, i32 1, i32 undef, i32 3>
    445   ret <8 x i16> %shuf
    446 }
    447 
    448 define <16 x i16> @hsub_v16i16a(<16 x i16> %a) {
    449 ; SSSE3-LABEL: hsub_v16i16a:
    450 ; SSSE3:       # %bb.0:
    451 ; SSSE3-NEXT:    movdqa %xmm0, %xmm2
    452 ; SSSE3-NEXT:    phsubw %xmm1, %xmm2
    453 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,1,0,1]
    454 ; SSSE3-NEXT:    movdqa %xmm2, %xmm1
    455 ; SSSE3-NEXT:    retq
    456 ;
    457 ; AVX1-LABEL: hsub_v16i16a:
    458 ; AVX1:       # %bb.0:
    459 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    460 ; AVX1-NEXT:    vphsubw %xmm1, %xmm0, %xmm0
    461 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
    462 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    463 ; AVX1-NEXT:    retq
    464 ;
    465 ; AVX2-LABEL: hsub_v16i16a:
    466 ; AVX2:       # %bb.0:
    467 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    468 ; AVX2-NEXT:    vphsubw %xmm1, %xmm0, %xmm0
    469 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1]
    470 ; AVX2-NEXT:    retq
    471   %a0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
    472   %a1 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
    473   %hop = sub <8 x i16> %a0, %a1
    474   %shuf = shufflevector <8 x i16> %hop, <8 x i16> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 7>
    475   ret <16 x i16> %shuf
    476 }
    477 
    478 define <16 x i16> @hsub_v16i16b(<16 x i16> %a) {
    479 ; SSSE3-LABEL: hsub_v16i16b:
    480 ; SSSE3:       # %bb.0:
    481 ; SSSE3-NEXT:    phsubw %xmm0, %xmm0
    482 ; SSSE3-NEXT:    phsubw %xmm1, %xmm1
    483 ; SSSE3-NEXT:    retq
    484 ;
    485 ; AVX1-LABEL: hsub_v16i16b:
    486 ; AVX1:       # %bb.0:
    487 ; AVX1-NEXT:    vphsubw %xmm0, %xmm0, %xmm1
    488 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    489 ; AVX1-NEXT:    vphsubw %xmm0, %xmm0, %xmm0
    490 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    491 ; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
    492 ; AVX1-NEXT:    retq
    493 ;
    494 ; AVX2-LABEL: hsub_v16i16b:
    495 ; AVX2:       # %bb.0:
    496 ; AVX2-NEXT:    vphsubw %ymm0, %ymm0, %ymm0
    497 ; AVX2-NEXT:    retq
    498   %a0 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 undef, i32 undef, i32 undef, i32 undef>
    499   %a1 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 9, i32 11, i32 13, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
    500   %hop = sub <16 x i16> %a0, %a1
    501   %shuf = shufflevector <16 x i16> %hop, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11>
    502   ret <16 x i16> %shuf
    503 }
    504