Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s
      3 
      4 declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)
      5 declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)
      6 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
      7 declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
      8 
      9 define <32 x i8> @combine_pshufb_pslldq(<32 x i8> %a0) {
     10 ; CHECK-LABEL: combine_pshufb_pslldq:
     11 ; CHECK:       # BB#0:
     12 ; CHECK-NEXT:    vxorps %ymm0, %ymm0, %ymm0
     13 ; CHECK-NEXT:    retq
     14   %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
     15   %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
     16   ret <32 x i8> %2
     17 }
     18 
     19 define <32 x i8> @combine_pshufb_psrldq(<32 x i8> %a0) {
     20 ; CHECK-LABEL: combine_pshufb_psrldq:
     21 ; CHECK:       # BB#0:
     22 ; CHECK-NEXT:    vxorps %ymm0, %ymm0, %ymm0
     23 ; CHECK-NEXT:    retq
     24   %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
     25   %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
     26   ret <32 x i8> %2
     27 }
     28 
     29 define <32 x i8> @combine_pshufb_vpermd(<8 x i32> %a) {
     30 ; CHECK-LABEL: combine_pshufb_vpermd:
     31 ; CHECK:       # BB#0:
     32 ; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18]
     33 ; CHECK-NEXT:    retq
     34   %tmp0 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>)
     35   %tmp1 = bitcast <8 x i32> %tmp0 to <32 x i8>
     36   %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30>
     37   ret <32 x i8> %tmp2
     38 }
     39 
     40 define <32 x i8> @combine_pshufb_vpermps(<8 x float> %a) {
     41 ; CHECK-LABEL: combine_pshufb_vpermps:
     42 ; CHECK:       # BB#0:
     43 ; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18]
     44 ; CHECK-NEXT:    retq
     45   %tmp0 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>)
     46   %tmp1 = bitcast <8 x float> %tmp0 to <32 x i8>
     47   %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30>
     48   ret <32 x i8> %tmp2
     49 }
     50 
     51 define <4 x i64> @combine_permq_pshufb_as_vperm2i128(<4 x i64> %a0) {
     52 ; CHECK-LABEL: combine_permq_pshufb_as_vperm2i128:
     53 ; CHECK:       # BB#0:
     54 ; CHECK-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
     55 ; CHECK-NEXT:    vpaddq {{.*}}(%rip), %ymm0, %ymm0
     56 ; CHECK-NEXT:    retq
     57   %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
     58   %2 = bitcast <4 x i64> %1 to <32 x i8>
     59   %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255>)
     60   %4 = bitcast <32 x i8> %3 to <4 x i64>
     61   %5 = add <4 x i64> %4, <i64 1, i64 1, i64 3, i64 3>
     62   ret <4 x i64> %5
     63 }
     64 
     65 define <32 x i8> @combine_permq_pshufb_as_vpblendd(<4 x i64> %a0) {
     66 ; CHECK-LABEL: combine_permq_pshufb_as_vpblendd:
     67 ; CHECK:       # BB#0:
     68 ; CHECK-NEXT:    vpxor %ymm1, %ymm1, %ymm1
     69 ; CHECK-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
     70 ; CHECK-NEXT:    retq
     71   %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
     72   %2 = bitcast <4 x i64> %1 to <32 x i8>
     73   %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255>)
     74   ret <32 x i8> %3
     75 }
     76 
     77 define <16 x i8> @combine_pshufb_as_vpbroadcastb128(<16 x i8> %a) {
     78 ; CHECK-LABEL: combine_pshufb_as_vpbroadcastb128:
     79 ; CHECK:       # BB#0:
     80 ; CHECK-NEXT:    vpbroadcastb %xmm0, %xmm0
     81 ; CHECK-NEXT:    retq
     82   %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> zeroinitializer)
     83   ret <16 x i8> %1
     84 }
     85 
     86 define <32 x i8> @combine_pshufb_as_vpbroadcastb256(<2 x i64> %a) {
     87 ; CHECK-LABEL: combine_pshufb_as_vpbroadcastb256:
     88 ; CHECK:       # BB#0:
     89 ; CHECK-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
     90 ; CHECK-NEXT:    vpbroadcastb %xmm0, %ymm0
     91 ; CHECK-NEXT:    retq
     92   %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
     93   %2 = bitcast <4 x i64> %1 to <32 x i8>
     94   %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> zeroinitializer)
     95   %4 = bitcast <32 x i8> %3 to <8 x i32>
     96   %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer)
     97   %6 = bitcast <8 x i32> %5 to <32 x i8>
     98   ret <32 x i8> %6
     99 }
    100 
    101 define <16 x i8> @combine_pshufb_as_vpbroadcastw128(<16 x i8> %a) {
    102 ; CHECK-LABEL: combine_pshufb_as_vpbroadcastw128:
    103 ; CHECK:       # BB#0:
    104 ; CHECK-NEXT:    vpbroadcastw %xmm0, %xmm0
    105 ; CHECK-NEXT:    retq
    106   %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
    107   ret <16 x i8> %1
    108 }
    109 
    110 define <32 x i8> @combine_pshufb_as_vpbroadcastw256(<2 x i64> %a) {
    111 ; CHECK-LABEL: combine_pshufb_as_vpbroadcastw256:
    112 ; CHECK:       # BB#0:
    113 ; CHECK-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
    114 ; CHECK-NEXT:    vpbroadcastw %xmm0, %ymm0
    115 ; CHECK-NEXT:    retq
    116   %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
    117   %2 = bitcast <4 x i64> %1 to <32 x i8>
    118   %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
    119   %4 = bitcast <32 x i8> %3 to <8 x i32>
    120   %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer)
    121   %6 = bitcast <8 x i32> %5 to <32 x i8>
    122   ret <32 x i8> %6
    123 }
    124 
    125 define <16 x i8> @combine_pshufb_as_vpbroadcastd128(<16 x i8> %a) {
    126 ; CHECK-LABEL: combine_pshufb_as_vpbroadcastd128:
    127 ; CHECK:       # BB#0:
    128 ; CHECK-NEXT:    vpbroadcastd %xmm0, %xmm0
    129 ; CHECK-NEXT:    vpaddb {{.*}}(%rip), %xmm0, %xmm0
    130 ; CHECK-NEXT:    retq
    131   %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>)
    132   %2 = add <16 x i8> %1, <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>
    133   ret <16 x i8> %2
    134 }
    135 
    136 define <8 x i32> @combine_permd_as_vpbroadcastd256(<4 x i32> %a) {
    137 ; CHECK-LABEL: combine_permd_as_vpbroadcastd256:
    138 ; CHECK:       # BB#0:
    139 ; CHECK-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
    140 ; CHECK-NEXT:    vpbroadcastd %xmm0, %ymm0
    141 ; CHECK-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
    142 ; CHECK-NEXT:    retq
    143   %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    144   %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> zeroinitializer)
    145   %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    146   ret <8 x i32> %3
    147 }
    148 
    149 define <16 x i8> @combine_pshufb_as_vpbroadcastq128(<16 x i8> %a) {
    150 ; CHECK-LABEL: combine_pshufb_as_vpbroadcastq128:
    151 ; CHECK:       # BB#0:
    152 ; CHECK-NEXT:    vpbroadcastq %xmm0, %xmm0
    153 ; CHECK-NEXT:    retq
    154   %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
    155   ret <16 x i8> %1
    156 }
    157 
    158 define <8 x i32> @combine_permd_as_vpbroadcastq256(<4 x i32> %a) {
    159 ; CHECK-LABEL: combine_permd_as_vpbroadcastq256:
    160 ; CHECK:       # BB#0:
    161 ; CHECK-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
    162 ; CHECK-NEXT:    vpbroadcastq %xmm0, %ymm0
    163 ; CHECK-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
    164 ; CHECK-NEXT:    retq
    165   %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    166   %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>)
    167   %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    168   ret <8 x i32> %3
    169 }
    170 
    171 define <4 x float> @combine_pshufb_as_vpbroadcastss128(<4 x float> %a) {
    172 ; CHECK-LABEL: combine_pshufb_as_vpbroadcastss128:
    173 ; CHECK:       # BB#0:
    174 ; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0
    175 ; CHECK-NEXT:    retq
    176   %1 = bitcast <4 x float> %a to <16 x i8>
    177   %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>)
    178   %3 = bitcast <16 x i8> %2 to <4 x float>
    179   ret <4 x float> %3
    180 }
    181 
    182 define <8 x float> @combine_permd_as_vpbroadcastss256(<4 x float> %a) {
    183 ; CHECK-LABEL: combine_permd_as_vpbroadcastss256:
    184 ; CHECK:       # BB#0:
    185 ; CHECK-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
    186 ; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
    187 ; CHECK-NEXT:    retq
    188   %1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    189   %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %1, <8 x i32> zeroinitializer)
    190   ret <8 x float> %2
    191 }
    192 
    193 define <4 x double> @combine_permd_as_vpbroadcastsd256(<2 x double> %a) {
    194 ; CHECK-LABEL: combine_permd_as_vpbroadcastsd256:
    195 ; CHECK:       # BB#0:
    196 ; CHECK-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
    197 ; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
    198 ; CHECK-NEXT:    retq
    199   %1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
    200   %2 = bitcast <4 x double> %1 to <8 x float>
    201   %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>)
    202   %4 = bitcast <8 x float> %3 to <4 x double>
    203   ret <4 x double> %4
    204 }
    205 
    206 define <8 x i32> @combine_permd_as_permq(<8 x i32> %a) {
    207 ; CHECK-LABEL: combine_permd_as_permq:
    208 ; CHECK:       # BB#0:
    209 ; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,1]
    210 ; CHECK-NEXT:    retq
    211   %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 2, i32 3>)
    212   ret <8 x i32> %1
    213 }
    214 
    215 define <8 x float> @combine_permps_as_permpd(<8 x float> %a) {
    216 ; CHECK-LABEL: combine_permps_as_permpd:
    217 ; CHECK:       # BB#0:
    218 ; CHECK-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,0,1]
    219 ; CHECK-NEXT:    retq
    220   %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 0, i32 1, i32 2, i32 3>)
    221   ret <8 x float> %1
    222 }
    223 
    224 define <32 x i8> @combine_pshufb_as_pslldq(<32 x i8> %a0) {
    225 ; CHECK-LABEL: combine_pshufb_as_pslldq:
    226 ; CHECK:       # BB#0:
    227 ; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21]
    228 ; CHECK-NEXT:    retq
    229   %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>)
    230   ret <32 x i8> %res0
    231 }
    232 
    233 define <32 x i8> @combine_pshufb_as_psrldq(<32 x i8> %a0) {
    234 ; CHECK-LABEL: combine_pshufb_as_psrldq:
    235 ; CHECK:       # BB#0:
    236 ; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    237 ; CHECK-NEXT:    retq
    238   %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
    239   ret <32 x i8> %res0
    240 }
    241 
    242 define <32 x i8> @combine_pshufb_as_pshuflw(<32 x i8> %a0) {
    243 ; CHECK-LABEL: combine_pshufb_as_pshuflw:
    244 ; CHECK:       # BB#0:
    245 ; CHECK-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
    246 ; CHECK-NEXT:    retq
    247   %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
    248   ret <32 x i8> %res0
    249 }
    250 
    251 define <32 x i8> @combine_pshufb_as_pshufhw(<32 x i8> %a0) {
    252 ; CHECK-LABEL: combine_pshufb_as_pshufhw:
    253 ; CHECK:       # BB#0:
    254 ; CHECK-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14]
    255 ; CHECK-NEXT:    retq
    256   %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>)
    257   ret <32 x i8> %res0
    258 }
    259 
    260 define <32 x i8> @combine_pshufb_not_as_pshufw(<32 x i8> %a0) {
    261 ; CHECK-LABEL: combine_pshufb_not_as_pshufw:
    262 ; CHECK:       # BB#0:
    263 ; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13,18,19,16,17,22,23,20,21,26,27,24,25,30,31,28,29]
    264 ; CHECK-NEXT:    retq
    265   %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
    266   %res1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %res0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>)
    267   ret <32 x i8> %res1
    268 }
    269