1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=CHECK-SSE %s 3 4 ; CHECK-NOT: vunpck 5 ; CHECK: vinsertf128 $1 6 define <8 x float> @A(<8 x float> %a) nounwind uwtable readnone ssp { 7 entry: 8 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 0, i32 1, i32 2, i32 3> 9 ret <8 x float> %shuffle 10 } 11 12 ; CHECK-NOT: vunpck 13 ; CHECK: vinsertf128 $1 14 define <4 x double> @B(<4 x double> %a) nounwind uwtable readnone ssp { 15 entry: 16 %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> <i32 4, i32 4, i32 0, i32 1> 17 ret <4 x double> %shuffle 18 } 19 20 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 21 22 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 23 24 ; Just check that no crash happens 25 ; CHECK-SSE: _insert_crash 26 define void @insert_crash() nounwind { 27 allocas: 28 %v1.i.i451 = shufflevector <4 x double> zeroinitializer, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 29 %ret_0a.i.i.i452 = shufflevector <4 x double> %v1.i.i451, <4 x double> undef, <2 x i32> <i32 0, i32 1> 30 %vret_0.i.i.i454 = tail call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %ret_0a.i.i.i452, <2 x double> undef) nounwind 31 %ret_val.i.i.i463 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %vret_0.i.i.i454, <2 x double> undef) nounwind 32 %ret.i1.i.i464 = extractelement <2 x double> %ret_val.i.i.i463, i32 0 33 %double2float = fptrunc double %ret.i1.i.i464 to float 34 %smearinsert50 = insertelement <4 x float> undef, float %double2float, i32 3 35 %blendAsInt.i503 = bitcast <4 x float> %smearinsert50 to <4 x i32> 36 store <4 x i32> %blendAsInt.i503, <4 x i32>* undef, align 4 37 ret void 38 } 39 40 ;; DAG Combine must remove useless vinsertf128 instructions 41 42 ; CHECK: DAGCombineA 43 ; CHECK-NOT: vinsertf128 $1 44 define <4 x i32> @DAGCombineA(<4 x i32> %v1) nounwind readonly { 45 %1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 46 %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 47 ret <4 x i32> %2 48 } 49 50 ; CHECK: DAGCombineB 51 ; CHECK: vpaddd %xmm 52 ; CHECK-NOT: vinsertf128 $1 53 ; CHECK: vpaddd %xmm 54 define <8 x i32> @DAGCombineB(<8 x i32> %v1, <8 x i32> %v2) nounwind readonly { 55 %1 = add <8 x i32> %v1, %v2 56 %2 = add <8 x i32> %1, %v1 57 ret <8 x i32> %2 58 } 59 60 ; CHECK: insert_pd 61 define <4 x double> @insert_pd(<4 x double> %a0, <2 x double> %a1) { 62 ; CHECK: vinsertf128 63 %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 0) 64 ret <4 x double> %res 65 } 66 67 ; CHECK: insert_undef_pd 68 define <4 x double> @insert_undef_pd(<4 x double> %a0, <2 x double> %a1) { 69 ; CHECK: vmovaps %ymm1, %ymm0 70 %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> undef, <2 x double> %a1, i8 0) 71 ret <4 x double> %res 72 } 73 declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone 74 75 76 ; CHECK: insert_ps 77 define <8 x float> @insert_ps(<8 x float> %a0, <4 x float> %a1) { 78 ; CHECK: vinsertf128 79 %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 0) 80 ret <8 x float> %res 81 } 82 83 ; CHECK: insert_undef_ps 84 define <8 x float> @insert_undef_ps(<8 x float> %a0, <4 x float> %a1) { 85 ; CHECK: vmovaps %ymm1, %ymm0 86 %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %a1, i8 0) 87 ret <8 x float> %res 88 } 89 declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone 90 91 92 ; CHECK: insert_si 93 define <8 x i32> @insert_si(<8 x i32> %a0, <4 x i32> %a1) { 94 ; CHECK: vinsertf128 95 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 0) 96 ret <8 x i32> %res 97 } 98 99 ; CHECK: insert_undef_si 100 define <8 x i32> @insert_undef_si(<8 x i32> %a0, <4 x i32> %a1) { 101 ; CHECK: vmovaps %ymm1, %ymm0 102 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> undef, <4 x i32> %a1, i8 0) 103 ret <8 x i32> %res 104 } 105 declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone 106 107 ; rdar://10643481 108 ; CHECK: vinsertf128_combine 109 define <8 x float> @vinsertf128_combine(float* nocapture %f) nounwind uwtable readonly ssp { 110 ; CHECK-NOT: vmovaps 111 ; CHECK: vinsertf128 112 entry: 113 %add.ptr = getelementptr inbounds float* %f, i64 4 114 %0 = bitcast float* %add.ptr to <4 x float>* 115 %1 = load <4 x float>* %0, align 16 116 %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1) 117 ret <8 x float> %2 118 } 119 120 ; rdar://11076953 121 ; CHECK: vinsertf128_ucombine 122 define <8 x float> @vinsertf128_ucombine(float* nocapture %f) nounwind uwtable readonly ssp { 123 ; CHECK-NOT: vmovups 124 ; CHECK: vinsertf128 125 entry: 126 %add.ptr = getelementptr inbounds float* %f, i64 4 127 %0 = bitcast float* %add.ptr to <4 x float>* 128 %1 = load <4 x float>* %0, align 8 129 %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1) 130 ret <8 x float> %2 131 } 132