1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s 2 3 4 ; CHECK: vpunpcklbw %xmm 5 ; CHECK-NEXT: vpunpckhbw %xmm 6 ; CHECK-NEXT: vinsertf128 $1 7 ; CHECK-NEXT: vpermilps $85 8 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp { 9 entry: 10 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 11 ret <32 x i8> %shuffle 12 } 13 14 ; CHECK: vpunpckhwd %xmm 15 ; CHECK-NEXT: vinsertf128 $1 16 ; CHECK-NEXT: vpermilps $85 17 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp { 18 entry: 19 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 20 ret <16 x i16> %shuffle 21 } 22 23 ; CHECK: vmovd 24 ; CHECK-NEXT: vmovlhps %xmm 25 ; CHECK-NEXT: vinsertf128 $1 26 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp { 27 entry: 28 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 29 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 30 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 31 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 32 ret <4 x i64> %vecinit6.i 33 } 34 35 ; CHECK: vshufpd $0 36 ; CHECK-NEXT: vinsertf128 $1 37 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp { 38 entry: 39 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 40 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 41 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 42 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 43 ret <4 x double> %vecinit6.i 44 } 45 46 ; Test this simple opt: 47 ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> 48 ; To: 49 ; shuffle (vload ptr)), undef, <1, 1, 1, 1> 50 ; CHECK: vmovdqa 51 ; CHECK-NEXT: vinsertf128 $1 52 ; CHECK-NEXT: vpermilps $-1 53 define <8 x float> @funcE() nounwind { 54 allocas: 55 %udx495 = alloca [18 x [18 x float]], align 32 56 br label %for_test505.preheader 57 58 for_test505.preheader: ; preds = %for_test505.preheader, %allocas 59 br i1 undef, label %for_exit499, label %for_test505.preheader 60 61 for_exit499: ; preds = %for_test505.preheader 62 br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247 63 64 load.i1247: ; preds = %for_exit499 65 %ptr1227 = getelementptr [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1 66 %ptr.i1237 = bitcast float* %ptr1227 to i32* 67 %val.i1238 = load i32* %ptr.i1237, align 4 68 %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6 69 %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7 70 %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float> 71 br label %__load_and_broadcast_32.exit1249 72 73 __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499 74 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ] 75 ret <8 x float> %load_broadcast12281250 76 } 77 78 ; CHECK: vinsertf128 $1 79 ; CHECK-NEXT: vpermilps $0 80 define <8 x float> @funcF(i32 %val) nounwind { 81 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6 82 %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7 83 %tmp = bitcast <8 x i32> %ret7 to <8 x float> 84 ret <8 x float> %tmp 85 } 86 87 ; CHECK: vinsertf128 $1 88 ; CHECK-NEXT: vpermilps $0 89 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp { 90 entry: 91 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 92 ret <8 x float> %shuffle 93 } 94 95 ; CHECK: vextractf128 $1 96 ; CHECK-NEXT: vinsertf128 $1 97 ; CHECK-NEXT: vpermilps $85 98 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp { 99 entry: 100 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 101 ret <8 x float> %shuffle 102 } 103 104