Home | History | Annotate | Download | only in InstCombine
      1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
      2 ; RUN: opt < %s -instcombine -S | FileCheck %s
      3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
      4 
      5 ; Verify that instcombine is able to fold identity shuffles.
      6 
      7 define <8 x i32> @identity_test_vpermd(<8 x i32> %a0) {
      8 ; CHECK-LABEL: @identity_test_vpermd(
      9 ; CHECK-NEXT:    ret <8 x i32> %a0
     10 ;
     11   %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
     12   ret <8 x i32> %a
     13 }
     14 
     15 define <8 x float> @identity_test_vpermps(<8 x float> %a0) {
     16 ; CHECK-LABEL: @identity_test_vpermps(
     17 ; CHECK-NEXT:    ret <8 x float> %a0
     18 ;
     19   %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
     20   ret <8 x float> %a
     21 }
     22 
     23 ; Instcombine should be able to fold the following shuffle to a builtin shufflevector
     24 ; with a shuffle mask of all zeroes.
     25 
     26 define <8 x i32> @zero_test_vpermd(<8 x i32> %a0) {
     27 ; CHECK-LABEL: @zero_test_vpermd(
     28 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer
     29 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
     30 ;
     31   %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer)
     32   ret <8 x i32> %a
     33 }
     34 
     35 define <8 x float> @zero_test_vpermps(<8 x float> %a0) {
     36 ; CHECK-LABEL: @zero_test_vpermps(
     37 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> zeroinitializer
     38 ; CHECK-NEXT:    ret <8 x float> [[TMP1]]
     39 ;
     40   %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer)
     41   ret <8 x float> %a
     42 }
     43 
     44 ; Verify that instcombine is able to fold constant shuffles.
     45 
     46 define <8 x i32> @shuffle_test_vpermd(<8 x i32> %a0) {
     47 ; CHECK-LABEL: @shuffle_test_vpermd(
     48 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
     49 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
     50 ;
     51   %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
     52   ret <8 x i32> %a
     53 }
     54 
     55 define <8 x float> @shuffle_test_vpermps(<8 x float> %a0) {
     56 ; CHECK-LABEL: @shuffle_test_vpermps(
     57 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
     58 ; CHECK-NEXT:    ret <8 x float> [[TMP1]]
     59 ;
     60   %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
     61   ret <8 x float> %a
     62 }
     63 
     64 ; Verify that instcombine is able to fold constant shuffles with undef mask elements.
     65 
     66 define <8 x i32> @undef_test_vpermd(<8 x i32> %a0) {
     67 ; CHECK-LABEL: @undef_test_vpermd(
     68 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
     69 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
     70 ;
     71   %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
     72   ret <8 x i32> %a
     73 }
     74 
     75 define <8 x float> @undef_test_vpermps(<8 x float> %a0) {
     76 ; CHECK-LABEL: @undef_test_vpermps(
     77 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
     78 ; CHECK-NEXT:    ret <8 x float> [[TMP1]]
     79 ;
     80   %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
     81   ret <8 x float> %a
     82 }
     83 
     84 declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)
     85 declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)
     86