Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK-ALL --check-prefix=CHECK-SSE --check-prefix=CHECK-NOSSSE3
      3 ; RUN: llc < %s -mcpu=core2 | FileCheck %s --check-prefix=CHECK-ALL --check-prefix=CHECK-SSE --check-prefix=CHECK-SSSE3
      4 ; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK-ALL --check-prefix=CHECK-AVX --check-prefix=CHECK-AVX2
      5 ; RUN: llc < %s -mcpu=core-avx2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-ALL --check-prefix=CHECK-WIDE-AVX --check-prefix=CHECK-WIDE-AVX2
      6 
      7 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      8 target triple = "x86_64-unknown-linux-gnu"
      9 
     10 declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
     11 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
     12 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
     13 
     14 define <8 x i16> @test1(<8 x i16> %v) {
     15 ; CHECK-NOSSSE3-LABEL: test1:
     16 ; CHECK-NOSSSE3:       # BB#0: # %entry
     17 ; CHECK-NOSSSE3-NEXT:    pxor %xmm1, %xmm1
     18 ; CHECK-NOSSSE3-NEXT:    movdqa %xmm0, %xmm2
     19 ; CHECK-NOSSSE3-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
     20 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[1,0,3,2,4,5,6,7]
     21 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,4,7,6]
     22 ; CHECK-NOSSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
     23 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
     24 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
     25 ; CHECK-NOSSSE3-NEXT:    packuswb %xmm2, %xmm0
     26 ; CHECK-NOSSSE3-NEXT:    retq
     27 ;
     28 ; CHECK-SSSE3-LABEL: test1:
     29 ; CHECK-SSSE3:       # BB#0: # %entry
     30 ; CHECK-SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
     31 ; CHECK-SSSE3-NEXT:    retq
     32 ;
     33 ; CHECK-AVX-LABEL: test1:
     34 ; CHECK-AVX:       # BB#0: # %entry
     35 ; CHECK-AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
     36 ; CHECK-AVX-NEXT:    retq
     37 ;
     38 ; CHECK-WIDE-AVX-LABEL: test1:
     39 ; CHECK-WIDE-AVX:       # BB#0: # %entry
     40 ; CHECK-WIDE-AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
     41 ; CHECK-WIDE-AVX-NEXT:    retq
     42 entry:
     43   %r = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %v)
     44   ret <8 x i16> %r
     45 }
     46 
     47 define <4 x i32> @test2(<4 x i32> %v) {
     48 ; CHECK-NOSSSE3-LABEL: test2:
     49 ; CHECK-NOSSSE3:       # BB#0: # %entry
     50 ; CHECK-NOSSSE3-NEXT:    pxor %xmm1, %xmm1
     51 ; CHECK-NOSSSE3-NEXT:    movdqa %xmm0, %xmm2
     52 ; CHECK-NOSSSE3-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
     53 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
     54 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
     55 ; CHECK-NOSSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
     56 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
     57 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
     58 ; CHECK-NOSSSE3-NEXT:    packuswb %xmm2, %xmm0
     59 ; CHECK-NOSSSE3-NEXT:    retq
     60 ;
     61 ; CHECK-SSSE3-LABEL: test2:
     62 ; CHECK-SSSE3:       # BB#0: # %entry
     63 ; CHECK-SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
     64 ; CHECK-SSSE3-NEXT:    retq
     65 ;
     66 ; CHECK-AVX-LABEL: test2:
     67 ; CHECK-AVX:       # BB#0: # %entry
     68 ; CHECK-AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
     69 ; CHECK-AVX-NEXT:    retq
     70 ;
     71 ; CHECK-WIDE-AVX-LABEL: test2:
     72 ; CHECK-WIDE-AVX:       # BB#0: # %entry
     73 ; CHECK-WIDE-AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
     74 ; CHECK-WIDE-AVX-NEXT:    retq
     75 entry:
     76   %r = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %v)
     77   ret <4 x i32> %r
     78 }
     79 
     80 define <2 x i64> @test3(<2 x i64> %v) {
     81 ; CHECK-NOSSSE3-LABEL: test3:
     82 ; CHECK-NOSSSE3:       # BB#0: # %entry
     83 ; CHECK-NOSSSE3-NEXT:    pxor %xmm1, %xmm1
     84 ; CHECK-NOSSSE3-NEXT:    movdqa %xmm0, %xmm2
     85 ; CHECK-NOSSSE3-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
     86 ; CHECK-NOSSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
     87 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
     88 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
     89 ; CHECK-NOSSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
     90 ; CHECK-NOSSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
     91 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
     92 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
     93 ; CHECK-NOSSSE3-NEXT:    packuswb %xmm2, %xmm0
     94 ; CHECK-NOSSSE3-NEXT:    retq
     95 ;
     96 ; CHECK-SSSE3-LABEL: test3:
     97 ; CHECK-SSSE3:       # BB#0: # %entry
     98 ; CHECK-SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
     99 ; CHECK-SSSE3-NEXT:    retq
    100 ;
    101 ; CHECK-AVX-LABEL: test3:
    102 ; CHECK-AVX:       # BB#0: # %entry
    103 ; CHECK-AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
    104 ; CHECK-AVX-NEXT:    retq
    105 ;
    106 ; CHECK-WIDE-AVX-LABEL: test3:
    107 ; CHECK-WIDE-AVX:       # BB#0: # %entry
    108 ; CHECK-WIDE-AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
    109 ; CHECK-WIDE-AVX-NEXT:    retq
    110 entry:
    111   %r = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %v)
    112   ret <2 x i64> %r
    113 }
    114 
    115 declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
    116 declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
    117 declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
    118 
    119 define <16 x i16> @test4(<16 x i16> %v) {
    120 ; CHECK-NOSSSE3-LABEL: test4:
    121 ; CHECK-NOSSSE3:       # BB#0: # %entry
    122 ; CHECK-NOSSSE3-NEXT:    pxor %xmm2, %xmm2
    123 ; CHECK-NOSSSE3-NEXT:    movdqa %xmm0, %xmm3
    124 ; CHECK-NOSSSE3-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
    125 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[1,0,3,2,4,5,6,7]
    126 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,4,7,6]
    127 ; CHECK-NOSSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
    128 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
    129 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
    130 ; CHECK-NOSSSE3-NEXT:    packuswb %xmm3, %xmm0
    131 ; CHECK-NOSSSE3-NEXT:    movdqa %xmm1, %xmm3
    132 ; CHECK-NOSSSE3-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
    133 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[1,0,3,2,4,5,6,7]
    134 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,4,7,6]
    135 ; CHECK-NOSSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    136 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[1,0,3,2,4,5,6,7]
    137 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,4,7,6]
    138 ; CHECK-NOSSSE3-NEXT:    packuswb %xmm3, %xmm1
    139 ; CHECK-NOSSSE3-NEXT:    retq
    140 ;
    141 ; CHECK-SSSE3-LABEL: test4:
    142 ; CHECK-SSSE3:       # BB#0: # %entry
    143 ; CHECK-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
    144 ; CHECK-SSSE3-NEXT:    pshufb %xmm2, %xmm0
    145 ; CHECK-SSSE3-NEXT:    pshufb %xmm2, %xmm1
    146 ; CHECK-SSSE3-NEXT:    retq
    147 ;
    148 ; CHECK-AVX-LABEL: test4:
    149 ; CHECK-AVX:       # BB#0: # %entry
    150 ; CHECK-AVX-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30]
    151 ; CHECK-AVX-NEXT:    retq
    152 ;
    153 ; CHECK-WIDE-AVX-LABEL: test4:
    154 ; CHECK-WIDE-AVX:       # BB#0: # %entry
    155 ; CHECK-WIDE-AVX-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30]
    156 ; CHECK-WIDE-AVX-NEXT:    retq
    157 entry:
    158   %r = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %v)
    159   ret <16 x i16> %r
    160 }
    161 
    162 define <8 x i32> @test5(<8 x i32> %v) {
    163 ; CHECK-NOSSSE3-LABEL: test5:
    164 ; CHECK-NOSSSE3:       # BB#0: # %entry
    165 ; CHECK-NOSSSE3-NEXT:    pxor %xmm2, %xmm2
    166 ; CHECK-NOSSSE3-NEXT:    movdqa %xmm0, %xmm3
    167 ; CHECK-NOSSSE3-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
    168 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
    169 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4]
    170 ; CHECK-NOSSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
    171 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
    172 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
    173 ; CHECK-NOSSSE3-NEXT:    packuswb %xmm3, %xmm0
    174 ; CHECK-NOSSSE3-NEXT:    movdqa %xmm1, %xmm3
    175 ; CHECK-NOSSSE3-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
    176 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
    177 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4]
    178 ; CHECK-NOSSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    179 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
    180 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
    181 ; CHECK-NOSSSE3-NEXT:    packuswb %xmm3, %xmm1
    182 ; CHECK-NOSSSE3-NEXT:    retq
    183 ;
    184 ; CHECK-SSSE3-LABEL: test5:
    185 ; CHECK-SSSE3:       # BB#0: # %entry
    186 ; CHECK-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
    187 ; CHECK-SSSE3-NEXT:    pshufb %xmm2, %xmm0
    188 ; CHECK-SSSE3-NEXT:    pshufb %xmm2, %xmm1
    189 ; CHECK-SSSE3-NEXT:    retq
    190 ;
    191 ; CHECK-AVX-LABEL: test5:
    192 ; CHECK-AVX:       # BB#0: # %entry
    193 ; CHECK-AVX-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28]
    194 ; CHECK-AVX-NEXT:    retq
    195 ;
    196 ; CHECK-WIDE-AVX-LABEL: test5:
    197 ; CHECK-WIDE-AVX:       # BB#0: # %entry
    198 ; CHECK-WIDE-AVX-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28]
    199 ; CHECK-WIDE-AVX-NEXT:    retq
    200 entry:
    201   %r = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %v)
    202   ret <8 x i32> %r
    203 }
    204 
    205 define <4 x i64> @test6(<4 x i64> %v) {
    206 ; CHECK-NOSSSE3-LABEL: test6:
    207 ; CHECK-NOSSSE3:       # BB#0: # %entry
    208 ; CHECK-NOSSSE3-NEXT:    pxor %xmm2, %xmm2
    209 ; CHECK-NOSSSE3-NEXT:    movdqa %xmm0, %xmm3
    210 ; CHECK-NOSSSE3-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
    211 ; CHECK-NOSSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
    212 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
    213 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4]
    214 ; CHECK-NOSSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
    215 ; CHECK-NOSSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    216 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
    217 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
    218 ; CHECK-NOSSSE3-NEXT:    packuswb %xmm3, %xmm0
    219 ; CHECK-NOSSSE3-NEXT:    movdqa %xmm1, %xmm3
    220 ; CHECK-NOSSSE3-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
    221 ; CHECK-NOSSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
    222 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
    223 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4]
    224 ; CHECK-NOSSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    225 ; CHECK-NOSSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
    226 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
    227 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
    228 ; CHECK-NOSSSE3-NEXT:    packuswb %xmm3, %xmm1
    229 ; CHECK-NOSSSE3-NEXT:    retq
    230 ;
    231 ; CHECK-SSSE3-LABEL: test6:
    232 ; CHECK-SSSE3:       # BB#0: # %entry
    233 ; CHECK-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
    234 ; CHECK-SSSE3-NEXT:    pshufb %xmm2, %xmm0
    235 ; CHECK-SSSE3-NEXT:    pshufb %xmm2, %xmm1
    236 ; CHECK-SSSE3-NEXT:    retq
    237 ;
    238 ; CHECK-AVX-LABEL: test6:
    239 ; CHECK-AVX:       # BB#0: # %entry
    240 ; CHECK-AVX-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24]
    241 ; CHECK-AVX-NEXT:    retq
    242 ;
    243 ; CHECK-WIDE-AVX-LABEL: test6:
    244 ; CHECK-WIDE-AVX:       # BB#0: # %entry
    245 ; CHECK-WIDE-AVX-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24]
    246 ; CHECK-WIDE-AVX-NEXT:    retq
    247 entry:
    248   %r = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %v)
    249   ret <4 x i64> %r
    250 }
    251 
    252 declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
    253 
    254 define <4 x i16> @test7(<4 x i16> %v) {
    255 ; CHECK-NOSSSE3-LABEL: test7:
    256 ; CHECK-NOSSSE3:       # BB#0: # %entry
    257 ; CHECK-NOSSSE3-NEXT:    pxor %xmm1, %xmm1
    258 ; CHECK-NOSSSE3-NEXT:    movdqa %xmm0, %xmm2
    259 ; CHECK-NOSSSE3-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
    260 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
    261 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
    262 ; CHECK-NOSSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    263 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
    264 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
    265 ; CHECK-NOSSSE3-NEXT:    packuswb %xmm2, %xmm0
    266 ; CHECK-NOSSSE3-NEXT:    psrld $16, %xmm0
    267 ; CHECK-NOSSSE3-NEXT:    retq
    268 ;
    269 ; CHECK-SSSE3-LABEL: test7:
    270 ; CHECK-SSSE3:       # BB#0: # %entry
    271 ; CHECK-SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
    272 ; CHECK-SSSE3-NEXT:    psrld $16, %xmm0
    273 ; CHECK-SSSE3-NEXT:    retq
    274 ;
    275 ; CHECK-AVX-LABEL: test7:
    276 ; CHECK-AVX:       # BB#0: # %entry
    277 ; CHECK-AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
    278 ; CHECK-AVX-NEXT:    vpsrld $16, %xmm0, %xmm0
    279 ; CHECK-AVX-NEXT:    retq
    280 ;
    281 ; CHECK-WIDE-AVX-LABEL: test7:
    282 ; CHECK-WIDE-AVX:       # BB#0: # %entry
    283 ; CHECK-WIDE-AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
    284 ; CHECK-WIDE-AVX-NEXT:    retq
    285 entry:
    286   %r = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %v)
    287   ret <4 x i16> %r
    288 }
    289 
    290 ;
    291 ; Double BSWAP -> Identity
    292 ;
    293 
    294 define <8 x i16> @identity_v8i16(<8 x i16> %v) {
    295 ; CHECK-ALL-LABEL: identity_v8i16:
    296 ; CHECK-ALL:       # BB#0: # %entry
    297 ; CHECK-ALL-NEXT:    retq
    298 entry:
    299   %bs1 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %v)
    300   %bs2 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %bs1)
    301   ret <8 x i16> %bs2
    302 }
    303 
    304 define <4 x i32> @identity_v4i32(<4 x i32> %v) {
    305 ; CHECK-ALL-LABEL: identity_v4i32:
    306 ; CHECK-ALL:       # BB#0: # %entry
    307 ; CHECK-ALL-NEXT:    retq
    308 entry:
    309   %bs1 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %v)
    310   %bs2 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %bs1)
    311   ret <4 x i32> %bs2
    312 }
    313 
    314 define <2 x i64> @identity_v2i64(<2 x i64> %v) {
    315 ; CHECK-ALL-LABEL: identity_v2i64:
    316 ; CHECK-ALL:       # BB#0: # %entry
    317 ; CHECK-ALL-NEXT:    retq
    318 entry:
    319   %bs1 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %v)
    320   %bs2 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %bs1)
    321   ret <2 x i64> %bs2
    322 }
    323 
    324 define <16 x i16> @identity_v16i16(<16 x i16> %v) {
    325 ; CHECK-ALL-LABEL: identity_v16i16:
    326 ; CHECK-ALL:       # BB#0: # %entry
    327 ; CHECK-ALL-NEXT:    retq
    328 entry:
    329   %bs1 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %v)
    330   %bs2 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %bs1)
    331   ret <16 x i16> %bs2
    332 }
    333 
    334 define <8 x i32> @identity_v8i32(<8 x i32> %v) {
    335 ; CHECK-ALL-LABEL: identity_v8i32:
    336 ; CHECK-ALL:       # BB#0: # %entry
    337 ; CHECK-ALL-NEXT:    retq
    338 entry:
    339   %bs1 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %v)
    340   %bs2 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %bs1)
    341   ret <8 x i32> %bs2
    342 }
    343 
    344 define <4 x i64> @identity_v4i64(<4 x i64> %v) {
    345 ; CHECK-ALL-LABEL: identity_v4i64:
    346 ; CHECK-ALL:       # BB#0: # %entry
    347 ; CHECK-ALL-NEXT:    retq
    348 entry:
    349   %bs1 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %v)
    350   %bs2 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %bs1)
    351   ret <4 x i64> %bs2
    352 }
    353 
    354 define <4 x i16> @identity_v4i16(<4 x i16> %v) {
    355 ; CHECK-ALL-LABEL: identity_v4i16:
    356 ; CHECK-ALL:       # BB#0: # %entry
    357 ; CHECK-ALL-NEXT:    retq
    358 entry:
    359   %bs1 = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %v)
    360   %bs2 = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %bs1)
    361   ret <4 x i16> %bs2
    362 }
    363 
    364 ;
    365 ; Constant Folding
    366 ;
    367 
    368 define <8 x i16> @fold_v8i16() {
    369 ; CHECK-SSE-LABEL: fold_v8i16:
    370 ; CHECK-SSE:       # BB#0: # %entry
    371 ; CHECK-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,256,65535,512,65023,1024,64511,1536]
    372 ; CHECK-SSE-NEXT:    retq
    373 ;
    374 ; CHECK-AVX-LABEL: fold_v8i16:
    375 ; CHECK-AVX:       # BB#0: # %entry
    376 ; CHECK-AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,256,65535,512,65023,1024,64511,1536]
    377 ; CHECK-AVX-NEXT:    retq
    378 ;
    379 ; CHECK-WIDE-AVX-LABEL: fold_v8i16:
    380 ; CHECK-WIDE-AVX:       # BB#0: # %entry
    381 ; CHECK-WIDE-AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,256,65535,512,65023,1024,64511,1536]
    382 ; CHECK-WIDE-AVX-NEXT:    retq
    383 entry:
    384   %r = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> <i16 0, i16 1, i16 -1, i16 2, i16 -3, i16 4, i16 -5, i16 6>)
    385   ret <8 x i16> %r
    386 }
    387 
    388 define <4 x i32> @fold_v4i32() {
    389 ; CHECK-SSE-LABEL: fold_v4i32:
    390 ; CHECK-SSE:       # BB#0: # %entry
    391 ; CHECK-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,4294967295,33554432,4261412863]
    392 ; CHECK-SSE-NEXT:    retq
    393 ;
    394 ; CHECK-AVX-LABEL: fold_v4i32:
    395 ; CHECK-AVX:       # BB#0: # %entry
    396 ; CHECK-AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,4294967295,33554432,4261412863]
    397 ; CHECK-AVX-NEXT:    retq
    398 ;
    399 ; CHECK-WIDE-AVX-LABEL: fold_v4i32:
    400 ; CHECK-WIDE-AVX:       # BB#0: # %entry
    401 ; CHECK-WIDE-AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,4294967295,33554432,4261412863]
    402 ; CHECK-WIDE-AVX-NEXT:    retq
    403 entry:
    404   %r = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> <i32 0, i32 -1, i32 2, i32 -3>)
    405   ret <4 x i32> %r
    406 }
    407 
    408 define <2 x i64> @fold_v2i64() {
    409 ; CHECK-SSE-LABEL: fold_v2i64:
    410 ; CHECK-SSE:       # BB#0: # %entry
    411 ; CHECK-SSE-NEXT:    movaps {{.*#+}} xmm0 = [18374686479671623680,18446744073709551615]
    412 ; CHECK-SSE-NEXT:    retq
    413 ;
    414 ; CHECK-AVX-LABEL: fold_v2i64:
    415 ; CHECK-AVX:       # BB#0: # %entry
    416 ; CHECK-AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18374686479671623680,18446744073709551615]
    417 ; CHECK-AVX-NEXT:    retq
    418 ;
    419 ; CHECK-WIDE-AVX-LABEL: fold_v2i64:
    420 ; CHECK-WIDE-AVX:       # BB#0: # %entry
    421 ; CHECK-WIDE-AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18374686479671623680,18446744073709551615]
    422 ; CHECK-WIDE-AVX-NEXT:    retq
    423 entry:
    424   %r = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> <i64 255, i64 -1>)
    425   ret <2 x i64> %r
    426 }
    427 
    428 define <16 x i16> @fold_v16i16() {
    429 ; CHECK-SSE-LABEL: fold_v16i16:
    430 ; CHECK-SSE:       # BB#0: # %entry
    431 ; CHECK-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,256,65535,512,65023,1024,64511,1536]
    432 ; CHECK-SSE-NEXT:    movaps {{.*#+}} xmm1 = [63999,2048,63487,2560,62975,3072,62463,3584]
    433 ; CHECK-SSE-NEXT:    retq
    434 ;
    435 ; CHECK-AVX-LABEL: fold_v16i16:
    436 ; CHECK-AVX:       # BB#0: # %entry
    437 ; CHECK-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,256,65535,512,65023,1024,64511,1536,63999,2048,63487,2560,62975,3072,62463,3584]
    438 ; CHECK-AVX-NEXT:    retq
    439 ;
    440 ; CHECK-WIDE-AVX-LABEL: fold_v16i16:
    441 ; CHECK-WIDE-AVX:       # BB#0: # %entry
    442 ; CHECK-WIDE-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,256,65535,512,65023,1024,64511,1536,63999,2048,63487,2560,62975,3072,62463,3584]
    443 ; CHECK-WIDE-AVX-NEXT:    retq
    444 entry:
    445   %r = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> <i16 0, i16 1, i16 -1, i16 2, i16 -3, i16 4, i16 -5, i16 6, i16 -7, i16 8, i16 -9, i16 10, i16 -11, i16 12, i16 -13, i16 14>)
    446   ret <16 x i16> %r
    447 }
    448 
    449 define <8 x i32> @fold_v8i32() {
    450 ; CHECK-SSE-LABEL: fold_v8i32:
    451 ; CHECK-SSE:       # BB#0: # %entry
    452 ; CHECK-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,16777216,4294967295,33554432]
    453 ; CHECK-SSE-NEXT:    movaps {{.*#+}} xmm1 = [4261412863,67108864,4227858431,100663296]
    454 ; CHECK-SSE-NEXT:    retq
    455 ;
    456 ; CHECK-AVX-LABEL: fold_v8i32:
    457 ; CHECK-AVX:       # BB#0: # %entry
    458 ; CHECK-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,16777216,4294967295,33554432,4261412863,67108864,4227858431,100663296]
    459 ; CHECK-AVX-NEXT:    retq
    460 ;
    461 ; CHECK-WIDE-AVX-LABEL: fold_v8i32:
    462 ; CHECK-WIDE-AVX:       # BB#0: # %entry
    463 ; CHECK-WIDE-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,16777216,4294967295,33554432,4261412863,67108864,4227858431,100663296]
    464 ; CHECK-WIDE-AVX-NEXT:    retq
    465 entry:
    466   %r = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> <i32 0, i32 1, i32 -1, i32 2, i32 -3, i32 4, i32 -5, i32 6>)
    467   ret <8 x i32> %r
    468 }
    469 
    470 define <4 x i64> @fold_v4i64() {
    471 ; CHECK-SSE-LABEL: fold_v4i64:
    472 ; CHECK-SSE:       # BB#0: # %entry
    473 ; CHECK-SSE-NEXT:    movaps {{.*#+}} xmm0 = [18374686479671623680,18446744073709551615]
    474 ; CHECK-SSE-NEXT:    movaps {{.*#+}} xmm1 = [18446462598732840960,72056494526300160]
    475 ; CHECK-SSE-NEXT:    retq
    476 ;
    477 ; CHECK-AVX-LABEL: fold_v4i64:
    478 ; CHECK-AVX:       # BB#0: # %entry
    479 ; CHECK-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [18374686479671623680,18446744073709551615,18446462598732840960,72056494526300160]
    480 ; CHECK-AVX-NEXT:    retq
    481 ;
    482 ; CHECK-WIDE-AVX-LABEL: fold_v4i64:
    483 ; CHECK-WIDE-AVX:       # BB#0: # %entry
    484 ; CHECK-WIDE-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [18374686479671623680,18446744073709551615,18446462598732840960,72056494526300160]
    485 ; CHECK-WIDE-AVX-NEXT:    retq
    486 entry:
    487   %r = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> <i64 255, i64 -1, i64 65535, i64 16776960>)
    488   ret <4 x i64> %r
    489 }
    490