1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2 ; RUN: opt < %s -instcombine -S | FileCheck %s 3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 5 ; Verify that instcombine is able to fold identity shuffles. 6 7 define <8 x i32> @identity_test_vpermd(<8 x i32> %a0) { 8 ; CHECK-LABEL: @identity_test_vpermd( 9 ; CHECK-NEXT: ret <8 x i32> %a0 10 ; 11 %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>) 12 ret <8 x i32> %a 13 } 14 15 define <8 x float> @identity_test_vpermps(<8 x float> %a0) { 16 ; CHECK-LABEL: @identity_test_vpermps( 17 ; CHECK-NEXT: ret <8 x float> %a0 18 ; 19 %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>) 20 ret <8 x float> %a 21 } 22 23 ; Instcombine should be able to fold the following shuffle to a builtin shufflevector 24 ; with a shuffle mask of all zeroes. 25 26 define <8 x i32> @zero_test_vpermd(<8 x i32> %a0) { 27 ; CHECK-LABEL: @zero_test_vpermd( 28 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer 29 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 30 ; 31 %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer) 32 ret <8 x i32> %a 33 } 34 35 define <8 x float> @zero_test_vpermps(<8 x float> %a0) { 36 ; CHECK-LABEL: @zero_test_vpermps( 37 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> zeroinitializer 38 ; CHECK-NEXT: ret <8 x float> [[TMP1]] 39 ; 40 %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer) 41 ret <8 x float> %a 42 } 43 44 ; Verify that instcombine is able to fold constant shuffles. 45 46 define <8 x i32> @shuffle_test_vpermd(<8 x i32> %a0) { 47 ; CHECK-LABEL: @shuffle_test_vpermd( 48 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 49 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 50 ; 51 %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>) 52 ret <8 x i32> %a 53 } 54 55 define <8 x float> @shuffle_test_vpermps(<8 x float> %a0) { 56 ; CHECK-LABEL: @shuffle_test_vpermps( 57 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 58 ; CHECK-NEXT: ret <8 x float> [[TMP1]] 59 ; 60 %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>) 61 ret <8 x float> %a 62 } 63 64 ; Verify that instcombine is able to fold constant shuffles with undef mask elements. 65 66 define <8 x i32> @undef_test_vpermd(<8 x i32> %a0) { 67 ; CHECK-LABEL: @undef_test_vpermd( 68 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 69 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 70 ; 71 %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>) 72 ret <8 x i32> %a 73 } 74 75 define <8 x float> @undef_test_vpermps(<8 x float> %a0) { 76 ; CHECK-LABEL: @undef_test_vpermps( 77 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 78 ; CHECK-NEXT: ret <8 x float> [[TMP1]] 79 ; 80 %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>) 81 ret <8 x float> %a 82 } 83 84 declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) 85 declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) 86