1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=AVX1 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 3 4 ; Prefer a blend instruction to a vinsert128 instruction because blends 5 ; are simpler (no lane changes) and therefore will have equal or better 6 ; performance. 7 8 define <8 x float> @castA(<4 x float> %m) nounwind uwtable readnone ssp { 9 ; AVX1-LABEL: castA: 10 ; AVX1: vxorps %ymm1, %ymm1, %ymm1 11 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 12 ; AVX1-NEXT: retq 13 ; 14 ; AVX2-LABEL: castA: 15 ; AVX2: vxorps %ymm1, %ymm1, %ymm1 16 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 17 ; AVX2-NEXT: retq 18 19 entry: 20 %shuffle.i = shufflevector <4 x float> %m, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4> 21 ret <8 x float> %shuffle.i 22 } 23 24 define <4 x double> @castB(<2 x double> %m) nounwind uwtable readnone ssp { 25 ; AVX1-LABEL: castB: 26 ; AVX1: vxorpd %ymm1, %ymm1, %ymm1 27 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 28 ; AVX1-NEXT: retq 29 ; 30 ; AVX2-LABEL: castB: 31 ; AVX2: vxorpd %ymm1, %ymm1, %ymm1 32 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 33 ; AVX2-NEXT: retq 34 35 entry: 36 %shuffle.i = shufflevector <2 x double> %m, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2> 37 ret <4 x double> %shuffle.i 38 } 39 40 ; AVX2 is needed for integer types. 41 42 define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp { 43 ; AVX1-LABEL: castC: 44 ; AVX1: vxorps %xmm1, %xmm1, %xmm1 45 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 46 ; AVX1-NEXT: retq 47 ; 48 ; AVX2-LABEL: castC: 49 ; AVX2: vpxor %ymm1, %ymm1, %ymm1 50 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 51 ; AVX2-NEXT: retq 52 53 entry: 54 %shuffle.i = shufflevector <2 x i64> %m, <2 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2> 55 ret <4 x i64> %shuffle.i 56 } 57 58 ; The next three tests don't need any shuffling. There may or may not be a 59 ; vzeroupper before the return, so just check for the absence of shuffles. 60 61 define <4 x float> @castD(<8 x float> %m) nounwind uwtable readnone ssp { 62 ; AVX1-LABEL: castD: 63 ; AVX1-NOT: extract 64 ; AVX1-NOT: blend 65 ; 66 ; AVX2-LABEL: castD: 67 ; AVX2-NOT: extract 68 ; AVX2-NOT: blend 69 70 entry: 71 %shuffle.i = shufflevector <8 x float> %m, <8 x float> %m, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 72 ret <4 x float> %shuffle.i 73 } 74 75 define <2 x i64> @castE(<4 x i64> %m) nounwind uwtable readnone ssp { 76 ; AVX1-LABEL: castE: 77 ; AVX1-NOT: extract 78 ; AVX1-NOT: blend 79 ; 80 ; AVX2-LABEL: castE: 81 ; AVX2-NOT: extract 82 ; AVX2-NOT: blend 83 84 entry: 85 %shuffle.i = shufflevector <4 x i64> %m, <4 x i64> %m, <2 x i32> <i32 0, i32 1> 86 ret <2 x i64> %shuffle.i 87 } 88 89 define <2 x double> @castF(<4 x double> %m) nounwind uwtable readnone ssp { 90 ; AVX1-LABEL: castF: 91 ; AVX1-NOT: extract 92 ; AVX1-NOT: blend 93 ; 94 ; AVX2-LABEL: castF: 95 ; AVX2-NOT: extract 96 ; AVX2-NOT: blend 97 98 entry: 99 %shuffle.i = shufflevector <4 x double> %m, <4 x double> %m, <2 x i32> <i32 0, i32 1> 100 ret <2 x double> %shuffle.i 101 } 102 103