Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=CHECK-ALL --check-prefix=CHECK-SSE --check-prefix=CHECK-NOSSSE3
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK-ALL --check-prefix=CHECK-SSE --check-prefix=CHECK-SSSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefix=CHECK-ALL --check-prefix=CHECK-AVX --check-prefix=CHECK-AVX2
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-ALL --check-prefix=CHECK-WIDE-AVX --check-prefix=CHECK-WIDE-AVX2
      6 
      7 declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
      8 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
      9 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
     10 
     11 define <8 x i16> @test1(<8 x i16> %v) {
     12 ; CHECK-NOSSSE3-LABEL: test1:
     13 ; CHECK-NOSSSE3:       # %bb.0: # %entry
     14 ; CHECK-NOSSSE3-NEXT:    pxor %xmm1, %xmm1
     15 ; CHECK-NOSSSE3-NEXT:    movdqa %xmm0, %xmm2
     16 ; CHECK-NOSSSE3-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
     17 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[1,0,3,2,4,5,6,7]
     18 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,4,7,6]
     19 ; CHECK-NOSSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
     20 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
     21 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
     22 ; CHECK-NOSSSE3-NEXT:    packuswb %xmm2, %xmm0
     23 ; CHECK-NOSSSE3-NEXT:    retq
     24 ;
     25 ; CHECK-SSSE3-LABEL: test1:
     26 ; CHECK-SSSE3:       # %bb.0: # %entry
     27 ; CHECK-SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
     28 ; CHECK-SSSE3-NEXT:    retq
     29 ;
     30 ; CHECK-AVX-LABEL: test1:
     31 ; CHECK-AVX:       # %bb.0: # %entry
     32 ; CHECK-AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
     33 ; CHECK-AVX-NEXT:    retq
     34 ;
     35 ; CHECK-WIDE-AVX-LABEL: test1:
     36 ; CHECK-WIDE-AVX:       # %bb.0: # %entry
     37 ; CHECK-WIDE-AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
     38 ; CHECK-WIDE-AVX-NEXT:    retq
     39 entry:
     40   %r = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %v)
     41   ret <8 x i16> %r
     42 }
     43 
     44 define <4 x i32> @test2(<4 x i32> %v) {
     45 ; CHECK-NOSSSE3-LABEL: test2:
     46 ; CHECK-NOSSSE3:       # %bb.0: # %entry
     47 ; CHECK-NOSSSE3-NEXT:    pxor %xmm1, %xmm1
     48 ; CHECK-NOSSSE3-NEXT:    movdqa %xmm0, %xmm2
     49 ; CHECK-NOSSSE3-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
     50 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
     51 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
     52 ; CHECK-NOSSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
     53 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
     54 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
     55 ; CHECK-NOSSSE3-NEXT:    packuswb %xmm2, %xmm0
     56 ; CHECK-NOSSSE3-NEXT:    retq
     57 ;
     58 ; CHECK-SSSE3-LABEL: test2:
     59 ; CHECK-SSSE3:       # %bb.0: # %entry
     60 ; CHECK-SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
     61 ; CHECK-SSSE3-NEXT:    retq
     62 ;
     63 ; CHECK-AVX-LABEL: test2:
     64 ; CHECK-AVX:       # %bb.0: # %entry
     65 ; CHECK-AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
     66 ; CHECK-AVX-NEXT:    retq
     67 ;
     68 ; CHECK-WIDE-AVX-LABEL: test2:
     69 ; CHECK-WIDE-AVX:       # %bb.0: # %entry
     70 ; CHECK-WIDE-AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
     71 ; CHECK-WIDE-AVX-NEXT:    retq
     72 entry:
     73   %r = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %v)
     74   ret <4 x i32> %r
     75 }
     76 
     77 define <2 x i64> @test3(<2 x i64> %v) {
     78 ; CHECK-NOSSSE3-LABEL: test3:
     79 ; CHECK-NOSSSE3:       # %bb.0: # %entry
     80 ; CHECK-NOSSSE3-NEXT:    pxor %xmm1, %xmm1
     81 ; CHECK-NOSSSE3-NEXT:    movdqa %xmm0, %xmm2
     82 ; CHECK-NOSSSE3-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
     83 ; CHECK-NOSSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
     84 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
     85 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
     86 ; CHECK-NOSSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
     87 ; CHECK-NOSSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
     88 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
     89 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
     90 ; CHECK-NOSSSE3-NEXT:    packuswb %xmm2, %xmm0
     91 ; CHECK-NOSSSE3-NEXT:    retq
     92 ;
     93 ; CHECK-SSSE3-LABEL: test3:
     94 ; CHECK-SSSE3:       # %bb.0: # %entry
     95 ; CHECK-SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
     96 ; CHECK-SSSE3-NEXT:    retq
     97 ;
     98 ; CHECK-AVX-LABEL: test3:
     99 ; CHECK-AVX:       # %bb.0: # %entry
    100 ; CHECK-AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
    101 ; CHECK-AVX-NEXT:    retq
    102 ;
    103 ; CHECK-WIDE-AVX-LABEL: test3:
    104 ; CHECK-WIDE-AVX:       # %bb.0: # %entry
    105 ; CHECK-WIDE-AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
    106 ; CHECK-WIDE-AVX-NEXT:    retq
    107 entry:
    108   %r = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %v)
    109   ret <2 x i64> %r
    110 }
    111 
    112 declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
    113 declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
    114 declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
    115 
    116 define <16 x i16> @test4(<16 x i16> %v) {
    117 ; CHECK-NOSSSE3-LABEL: test4:
    118 ; CHECK-NOSSSE3:       # %bb.0: # %entry
    119 ; CHECK-NOSSSE3-NEXT:    pxor %xmm2, %xmm2
    120 ; CHECK-NOSSSE3-NEXT:    movdqa %xmm0, %xmm3
    121 ; CHECK-NOSSSE3-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
    122 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[1,0,3,2,4,5,6,7]
    123 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,4,7,6]
    124 ; CHECK-NOSSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
    125 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
    126 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
    127 ; CHECK-NOSSSE3-NEXT:    packuswb %xmm3, %xmm0
    128 ; CHECK-NOSSSE3-NEXT:    movdqa %xmm1, %xmm3
    129 ; CHECK-NOSSSE3-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
    130 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[1,0,3,2,4,5,6,7]
    131 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,4,7,6]
    132 ; CHECK-NOSSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    133 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[1,0,3,2,4,5,6,7]
    134 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,4,7,6]
    135 ; CHECK-NOSSSE3-NEXT:    packuswb %xmm3, %xmm1
    136 ; CHECK-NOSSSE3-NEXT:    retq
    137 ;
    138 ; CHECK-SSSE3-LABEL: test4:
    139 ; CHECK-SSSE3:       # %bb.0: # %entry
    140 ; CHECK-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
    141 ; CHECK-SSSE3-NEXT:    pshufb %xmm2, %xmm0
    142 ; CHECK-SSSE3-NEXT:    pshufb %xmm2, %xmm1
    143 ; CHECK-SSSE3-NEXT:    retq
    144 ;
    145 ; CHECK-AVX-LABEL: test4:
    146 ; CHECK-AVX:       # %bb.0: # %entry
    147 ; CHECK-AVX-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30]
    148 ; CHECK-AVX-NEXT:    retq
    149 ;
    150 ; CHECK-WIDE-AVX-LABEL: test4:
    151 ; CHECK-WIDE-AVX:       # %bb.0: # %entry
    152 ; CHECK-WIDE-AVX-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30]
    153 ; CHECK-WIDE-AVX-NEXT:    retq
    154 entry:
    155   %r = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %v)
    156   ret <16 x i16> %r
    157 }
    158 
    159 define <8 x i32> @test5(<8 x i32> %v) {
    160 ; CHECK-NOSSSE3-LABEL: test5:
    161 ; CHECK-NOSSSE3:       # %bb.0: # %entry
    162 ; CHECK-NOSSSE3-NEXT:    pxor %xmm2, %xmm2
    163 ; CHECK-NOSSSE3-NEXT:    movdqa %xmm0, %xmm3
    164 ; CHECK-NOSSSE3-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
    165 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
    166 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4]
    167 ; CHECK-NOSSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
    168 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
    169 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
    170 ; CHECK-NOSSSE3-NEXT:    packuswb %xmm3, %xmm0
    171 ; CHECK-NOSSSE3-NEXT:    movdqa %xmm1, %xmm3
    172 ; CHECK-NOSSSE3-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
    173 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
    174 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4]
    175 ; CHECK-NOSSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    176 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
    177 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
    178 ; CHECK-NOSSSE3-NEXT:    packuswb %xmm3, %xmm1
    179 ; CHECK-NOSSSE3-NEXT:    retq
    180 ;
    181 ; CHECK-SSSE3-LABEL: test5:
    182 ; CHECK-SSSE3:       # %bb.0: # %entry
    183 ; CHECK-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
    184 ; CHECK-SSSE3-NEXT:    pshufb %xmm2, %xmm0
    185 ; CHECK-SSSE3-NEXT:    pshufb %xmm2, %xmm1
    186 ; CHECK-SSSE3-NEXT:    retq
    187 ;
    188 ; CHECK-AVX-LABEL: test5:
    189 ; CHECK-AVX:       # %bb.0: # %entry
    190 ; CHECK-AVX-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28]
    191 ; CHECK-AVX-NEXT:    retq
    192 ;
    193 ; CHECK-WIDE-AVX-LABEL: test5:
    194 ; CHECK-WIDE-AVX:       # %bb.0: # %entry
    195 ; CHECK-WIDE-AVX-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28]
    196 ; CHECK-WIDE-AVX-NEXT:    retq
    197 entry:
    198   %r = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %v)
    199   ret <8 x i32> %r
    200 }
    201 
    202 define <4 x i64> @test6(<4 x i64> %v) {
    203 ; CHECK-NOSSSE3-LABEL: test6:
    204 ; CHECK-NOSSSE3:       # %bb.0: # %entry
    205 ; CHECK-NOSSSE3-NEXT:    pxor %xmm2, %xmm2
    206 ; CHECK-NOSSSE3-NEXT:    movdqa %xmm0, %xmm3
    207 ; CHECK-NOSSSE3-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
    208 ; CHECK-NOSSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
    209 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
    210 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4]
    211 ; CHECK-NOSSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
    212 ; CHECK-NOSSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    213 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
    214 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
    215 ; CHECK-NOSSSE3-NEXT:    packuswb %xmm3, %xmm0
    216 ; CHECK-NOSSSE3-NEXT:    movdqa %xmm1, %xmm3
    217 ; CHECK-NOSSSE3-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
    218 ; CHECK-NOSSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
    219 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
    220 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4]
    221 ; CHECK-NOSSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    222 ; CHECK-NOSSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
    223 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
    224 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
    225 ; CHECK-NOSSSE3-NEXT:    packuswb %xmm3, %xmm1
    226 ; CHECK-NOSSSE3-NEXT:    retq
    227 ;
    228 ; CHECK-SSSE3-LABEL: test6:
    229 ; CHECK-SSSE3:       # %bb.0: # %entry
    230 ; CHECK-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
    231 ; CHECK-SSSE3-NEXT:    pshufb %xmm2, %xmm0
    232 ; CHECK-SSSE3-NEXT:    pshufb %xmm2, %xmm1
    233 ; CHECK-SSSE3-NEXT:    retq
    234 ;
    235 ; CHECK-AVX-LABEL: test6:
    236 ; CHECK-AVX:       # %bb.0: # %entry
    237 ; CHECK-AVX-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24]
    238 ; CHECK-AVX-NEXT:    retq
    239 ;
    240 ; CHECK-WIDE-AVX-LABEL: test6:
    241 ; CHECK-WIDE-AVX:       # %bb.0: # %entry
    242 ; CHECK-WIDE-AVX-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24]
    243 ; CHECK-WIDE-AVX-NEXT:    retq
    244 entry:
    245   %r = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %v)
    246   ret <4 x i64> %r
    247 }
    248 
    249 declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
    250 
    251 define <4 x i16> @test7(<4 x i16> %v) {
    252 ; CHECK-NOSSSE3-LABEL: test7:
    253 ; CHECK-NOSSSE3:       # %bb.0: # %entry
    254 ; CHECK-NOSSSE3-NEXT:    pxor %xmm1, %xmm1
    255 ; CHECK-NOSSSE3-NEXT:    movdqa %xmm0, %xmm2
    256 ; CHECK-NOSSSE3-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
    257 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
    258 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
    259 ; CHECK-NOSSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    260 ; CHECK-NOSSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
    261 ; CHECK-NOSSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
    262 ; CHECK-NOSSSE3-NEXT:    packuswb %xmm2, %xmm0
    263 ; CHECK-NOSSSE3-NEXT:    psrld $16, %xmm0
    264 ; CHECK-NOSSSE3-NEXT:    retq
    265 ;
    266 ; CHECK-SSSE3-LABEL: test7:
    267 ; CHECK-SSSE3:       # %bb.0: # %entry
    268 ; CHECK-SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,0],zero,zero,xmm0[5,4],zero,zero,xmm0[9,8],zero,zero,xmm0[13,12],zero,zero
    269 ; CHECK-SSSE3-NEXT:    retq
    270 ;
    271 ; CHECK-AVX-LABEL: test7:
    272 ; CHECK-AVX:       # %bb.0: # %entry
    273 ; CHECK-AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,0],zero,zero,xmm0[5,4],zero,zero,xmm0[9,8],zero,zero,xmm0[13,12],zero,zero
    274 ; CHECK-AVX-NEXT:    retq
    275 ;
    276 ; CHECK-WIDE-AVX-LABEL: test7:
    277 ; CHECK-WIDE-AVX:       # %bb.0: # %entry
    278 ; CHECK-WIDE-AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
    279 ; CHECK-WIDE-AVX-NEXT:    retq
    280 entry:
    281   %r = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %v)
    282   ret <4 x i16> %r
    283 }
    284 
    285 ;
    286 ; Double BSWAP -> Identity
    287 ;
    288 
    289 define <8 x i16> @identity_v8i16(<8 x i16> %v) {
    290 ; CHECK-ALL-LABEL: identity_v8i16:
    291 ; CHECK-ALL:       # %bb.0: # %entry
    292 ; CHECK-ALL-NEXT:    retq
    293 entry:
    294   %bs1 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %v)
    295   %bs2 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %bs1)
    296   ret <8 x i16> %bs2
    297 }
    298 
    299 define <4 x i32> @identity_v4i32(<4 x i32> %v) {
    300 ; CHECK-ALL-LABEL: identity_v4i32:
    301 ; CHECK-ALL:       # %bb.0: # %entry
    302 ; CHECK-ALL-NEXT:    retq
    303 entry:
    304   %bs1 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %v)
    305   %bs2 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %bs1)
    306   ret <4 x i32> %bs2
    307 }
    308 
    309 define <2 x i64> @identity_v2i64(<2 x i64> %v) {
    310 ; CHECK-ALL-LABEL: identity_v2i64:
    311 ; CHECK-ALL:       # %bb.0: # %entry
    312 ; CHECK-ALL-NEXT:    retq
    313 entry:
    314   %bs1 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %v)
    315   %bs2 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %bs1)
    316   ret <2 x i64> %bs2
    317 }
    318 
    319 define <16 x i16> @identity_v16i16(<16 x i16> %v) {
    320 ; CHECK-ALL-LABEL: identity_v16i16:
    321 ; CHECK-ALL:       # %bb.0: # %entry
    322 ; CHECK-ALL-NEXT:    retq
    323 entry:
    324   %bs1 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %v)
    325   %bs2 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %bs1)
    326   ret <16 x i16> %bs2
    327 }
    328 
    329 define <8 x i32> @identity_v8i32(<8 x i32> %v) {
    330 ; CHECK-ALL-LABEL: identity_v8i32:
    331 ; CHECK-ALL:       # %bb.0: # %entry
    332 ; CHECK-ALL-NEXT:    retq
    333 entry:
    334   %bs1 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %v)
    335   %bs2 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %bs1)
    336   ret <8 x i32> %bs2
    337 }
    338 
    339 define <4 x i64> @identity_v4i64(<4 x i64> %v) {
    340 ; CHECK-ALL-LABEL: identity_v4i64:
    341 ; CHECK-ALL:       # %bb.0: # %entry
    342 ; CHECK-ALL-NEXT:    retq
    343 entry:
    344   %bs1 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %v)
    345   %bs2 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %bs1)
    346   ret <4 x i64> %bs2
    347 }
    348 
    349 define <4 x i16> @identity_v4i16(<4 x i16> %v) {
    350 ; CHECK-ALL-LABEL: identity_v4i16:
    351 ; CHECK-ALL:       # %bb.0: # %entry
    352 ; CHECK-ALL-NEXT:    retq
    353 entry:
    354   %bs1 = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %v)
    355   %bs2 = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %bs1)
    356   ret <4 x i16> %bs2
    357 }
    358 
    359 ;
    360 ; Constant Folding
    361 ;
    362 
    363 define <8 x i16> @fold_v8i16() {
    364 ; CHECK-SSE-LABEL: fold_v8i16:
    365 ; CHECK-SSE:       # %bb.0: # %entry
    366 ; CHECK-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,256,65535,512,65023,1024,64511,1536]
    367 ; CHECK-SSE-NEXT:    retq
    368 ;
    369 ; CHECK-AVX-LABEL: fold_v8i16:
    370 ; CHECK-AVX:       # %bb.0: # %entry
    371 ; CHECK-AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,256,65535,512,65023,1024,64511,1536]
    372 ; CHECK-AVX-NEXT:    retq
    373 ;
    374 ; CHECK-WIDE-AVX-LABEL: fold_v8i16:
    375 ; CHECK-WIDE-AVX:       # %bb.0: # %entry
    376 ; CHECK-WIDE-AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,256,65535,512,65023,1024,64511,1536]
    377 ; CHECK-WIDE-AVX-NEXT:    retq
    378 entry:
    379   %r = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> <i16 0, i16 1, i16 -1, i16 2, i16 -3, i16 4, i16 -5, i16 6>)
    380   ret <8 x i16> %r
    381 }
    382 
    383 define <4 x i32> @fold_v4i32() {
    384 ; CHECK-SSE-LABEL: fold_v4i32:
    385 ; CHECK-SSE:       # %bb.0: # %entry
    386 ; CHECK-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,4294967295,33554432,4261412863]
    387 ; CHECK-SSE-NEXT:    retq
    388 ;
    389 ; CHECK-AVX-LABEL: fold_v4i32:
    390 ; CHECK-AVX:       # %bb.0: # %entry
    391 ; CHECK-AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,4294967295,33554432,4261412863]
    392 ; CHECK-AVX-NEXT:    retq
    393 ;
    394 ; CHECK-WIDE-AVX-LABEL: fold_v4i32:
    395 ; CHECK-WIDE-AVX:       # %bb.0: # %entry
    396 ; CHECK-WIDE-AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,4294967295,33554432,4261412863]
    397 ; CHECK-WIDE-AVX-NEXT:    retq
    398 entry:
    399   %r = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> <i32 0, i32 -1, i32 2, i32 -3>)
    400   ret <4 x i32> %r
    401 }
    402 
    403 define <2 x i64> @fold_v2i64() {
    404 ; CHECK-SSE-LABEL: fold_v2i64:
    405 ; CHECK-SSE:       # %bb.0: # %entry
    406 ; CHECK-SSE-NEXT:    movaps {{.*#+}} xmm0 = [18374686479671623680,18446744073709551615]
    407 ; CHECK-SSE-NEXT:    retq
    408 ;
    409 ; CHECK-AVX-LABEL: fold_v2i64:
    410 ; CHECK-AVX:       # %bb.0: # %entry
    411 ; CHECK-AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18374686479671623680,18446744073709551615]
    412 ; CHECK-AVX-NEXT:    retq
    413 ;
    414 ; CHECK-WIDE-AVX-LABEL: fold_v2i64:
    415 ; CHECK-WIDE-AVX:       # %bb.0: # %entry
    416 ; CHECK-WIDE-AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18374686479671623680,18446744073709551615]
    417 ; CHECK-WIDE-AVX-NEXT:    retq
    418 entry:
    419   %r = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> <i64 255, i64 -1>)
    420   ret <2 x i64> %r
    421 }
    422 
    423 define <16 x i16> @fold_v16i16() {
    424 ; CHECK-SSE-LABEL: fold_v16i16:
    425 ; CHECK-SSE:       # %bb.0: # %entry
    426 ; CHECK-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,256,65535,512,65023,1024,64511,1536]
    427 ; CHECK-SSE-NEXT:    movaps {{.*#+}} xmm1 = [63999,2048,63487,2560,62975,3072,62463,3584]
    428 ; CHECK-SSE-NEXT:    retq
    429 ;
    430 ; CHECK-AVX-LABEL: fold_v16i16:
    431 ; CHECK-AVX:       # %bb.0: # %entry
    432 ; CHECK-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,256,65535,512,65023,1024,64511,1536,63999,2048,63487,2560,62975,3072,62463,3584]
    433 ; CHECK-AVX-NEXT:    retq
    434 ;
    435 ; CHECK-WIDE-AVX-LABEL: fold_v16i16:
    436 ; CHECK-WIDE-AVX:       # %bb.0: # %entry
    437 ; CHECK-WIDE-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,256,65535,512,65023,1024,64511,1536,63999,2048,63487,2560,62975,3072,62463,3584]
    438 ; CHECK-WIDE-AVX-NEXT:    retq
    439 entry:
    440   %r = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> <i16 0, i16 1, i16 -1, i16 2, i16 -3, i16 4, i16 -5, i16 6, i16 -7, i16 8, i16 -9, i16 10, i16 -11, i16 12, i16 -13, i16 14>)
    441   ret <16 x i16> %r
    442 }
    443 
    444 define <8 x i32> @fold_v8i32() {
    445 ; CHECK-SSE-LABEL: fold_v8i32:
    446 ; CHECK-SSE:       # %bb.0: # %entry
    447 ; CHECK-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,16777216,4294967295,33554432]
    448 ; CHECK-SSE-NEXT:    movaps {{.*#+}} xmm1 = [4261412863,67108864,4227858431,100663296]
    449 ; CHECK-SSE-NEXT:    retq
    450 ;
    451 ; CHECK-AVX-LABEL: fold_v8i32:
    452 ; CHECK-AVX:       # %bb.0: # %entry
    453 ; CHECK-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,16777216,4294967295,33554432,4261412863,67108864,4227858431,100663296]
    454 ; CHECK-AVX-NEXT:    retq
    455 ;
    456 ; CHECK-WIDE-AVX-LABEL: fold_v8i32:
    457 ; CHECK-WIDE-AVX:       # %bb.0: # %entry
    458 ; CHECK-WIDE-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,16777216,4294967295,33554432,4261412863,67108864,4227858431,100663296]
    459 ; CHECK-WIDE-AVX-NEXT:    retq
    460 entry:
    461   %r = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> <i32 0, i32 1, i32 -1, i32 2, i32 -3, i32 4, i32 -5, i32 6>)
    462   ret <8 x i32> %r
    463 }
    464 
    465 define <4 x i64> @fold_v4i64() {
    466 ; CHECK-SSE-LABEL: fold_v4i64:
    467 ; CHECK-SSE:       # %bb.0: # %entry
    468 ; CHECK-SSE-NEXT:    movaps {{.*#+}} xmm0 = [18374686479671623680,18446744073709551615]
    469 ; CHECK-SSE-NEXT:    movaps {{.*#+}} xmm1 = [18446462598732840960,72056494526300160]
    470 ; CHECK-SSE-NEXT:    retq
    471 ;
    472 ; CHECK-AVX-LABEL: fold_v4i64:
    473 ; CHECK-AVX:       # %bb.0: # %entry
    474 ; CHECK-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [18374686479671623680,18446744073709551615,18446462598732840960,72056494526300160]
    475 ; CHECK-AVX-NEXT:    retq
    476 ;
    477 ; CHECK-WIDE-AVX-LABEL: fold_v4i64:
    478 ; CHECK-WIDE-AVX:       # %bb.0: # %entry
    479 ; CHECK-WIDE-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [18374686479671623680,18446744073709551615,18446462598732840960,72056494526300160]
    480 ; CHECK-WIDE-AVX-NEXT:    retq
    481 entry:
    482   %r = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> <i64 255, i64 -1, i64 65535, i64 16776960>)
    483   ret <4 x i64> %r
    484 }
    485