Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
      3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE42
      4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,+sse4a| FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
      5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
      6 ;
      7 ; Combine tests involving SSE4A target shuffles (EXTRQI,INSERTQI)
      8 
      9 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
     10 
     11 define <16 x i8> @combine_extrqi_pshufb_16i8(<16 x i8> %a0) {
     12 ; ALL-LABEL: combine_extrqi_pshufb_16i8:
     13 ; ALL:       # %bb.0:
     14 ; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
     15 ; ALL-NEXT:    retq
     16   %1 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     17   %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
     18   ret <16 x i8> %2
     19 }
     20 
     21 define <8 x i16> @combine_extrqi_pshufb_8i16(<8 x i16> %a0) {
     22 ; ALL-LABEL: combine_extrqi_pshufb_8i16:
     23 ; ALL:       # %bb.0:
     24 ; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
     25 ; ALL-NEXT:    retq
     26   %1 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
     27   %2 = bitcast <8 x i16> %1 to <16 x i8>
     28   %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 0, i8 1, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
     29   %4 = bitcast <16 x i8> %3 to <8 x i16>
     30   ret <8 x i16> %4
     31 }
     32 
     33 define <16 x i8> @combine_insertqi_pshufb_16i8(<16 x i8> %a0, <16 x i8> %a1) {
     34 ; SSSE3-LABEL: combine_insertqi_pshufb_16i8:
     35 ; SSSE3:       # %bb.0:
     36 ; SSSE3-NEXT:    extrq {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
     37 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
     38 ; SSSE3-NEXT:    retq
     39 ;
     40 ; SSE42-LABEL: combine_insertqi_pshufb_16i8:
     41 ; SSE42:       # %bb.0:
     42 ; SSE42-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
     43 ; SSE42-NEXT:    retq
     44 ;
     45 ; AVX-LABEL: combine_insertqi_pshufb_16i8:
     46 ; AVX:       # %bb.0:
     47 ; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
     48 ; AVX-NEXT:    retq
     49   %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 16, i32 17, i32 18, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     50   %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
     51   ret <16 x i8> %2
     52 }
     53 
     54 define <8 x i16> @combine_insertqi_pshufb_8i16(<8 x i16> %a0, <8 x i16> %a1) {
     55 ; SSSE3-LABEL: combine_insertqi_pshufb_8i16:
     56 ; SSSE3:       # %bb.0:
     57 ; SSSE3-NEXT:    extrq {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
     58 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
     59 ; SSSE3-NEXT:    retq
     60 ;
     61 ; SSE42-LABEL: combine_insertqi_pshufb_8i16:
     62 ; SSE42:       # %bb.0:
     63 ; SSE42-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
     64 ; SSE42-NEXT:    retq
     65 ;
     66 ; AVX-LABEL: combine_insertqi_pshufb_8i16:
     67 ; AVX:       # %bb.0:
     68 ; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
     69 ; AVX-NEXT:    retq
     70   %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
     71   %2 = bitcast <8 x i16> %1 to <16 x i8>
     72   %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 0, i8 1, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
     73   %4 = bitcast <16 x i8> %3 to <8 x i16>
     74   ret <8 x i16> %4
     75 }
     76 
     77 define <16 x i8> @combine_pshufb_insertqi_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
     78 ; ALL-LABEL: combine_pshufb_insertqi_pshufb:
     79 ; ALL:       # %bb.0:
     80 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0,1],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
     81 ; ALL-NEXT:    retq
     82   %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
     83   %2 = shufflevector <16 x i8> %1, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 17, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     84   %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 7, i8 1, i8 2, i8 4, i8 3, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
     85   ret <16 x i8> %3
     86 }
     87