1 ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn | FileCheck %s 2 ; rdar://10050222, rdar://10134392 3 4 define <4 x float> @t1(<4 x float> %a, <1 x i64>* nocapture %p) nounwind { 5 entry: 6 ; CHECK-LABEL: t1: 7 ; CHECK: movlps (%rdi), %xmm0 8 ; CHECK: ret 9 %p.val = load <1 x i64>* %p, align 1 10 %0 = bitcast <1 x i64> %p.val to <2 x float> 11 %shuffle.i = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 12 %shuffle1.i = shufflevector <4 x float> %a, <4 x float> %shuffle.i, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 13 ret <4 x float> %shuffle1.i 14 } 15 16 define <4 x float> @t1a(<4 x float> %a, <1 x i64>* nocapture %p) nounwind { 17 entry: 18 ; CHECK-LABEL: t1a: 19 ; CHECK: movlps (%rdi), %xmm0 20 ; CHECK: ret 21 %0 = bitcast <1 x i64>* %p to double* 22 %1 = load double* %0 23 %2 = insertelement <2 x double> undef, double %1, i32 0 24 %3 = bitcast <2 x double> %2 to <4 x float> 25 %4 = shufflevector <4 x float> %a, <4 x float> %3, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 26 ret <4 x float> %4 27 } 28 29 define void @t2(<1 x i64>* nocapture %p, <4 x float> %a) nounwind { 30 entry: 31 ; CHECK-LABEL: t2: 32 ; CHECK: movlps %xmm0, (%rdi) 33 ; CHECK: ret 34 %cast.i = bitcast <4 x float> %a to <2 x i64> 35 %extract.i = extractelement <2 x i64> %cast.i, i32 0 36 %0 = getelementptr inbounds <1 x i64>* %p, i64 0, i64 0 37 store i64 %extract.i, i64* %0, align 8 38 ret void 39 } 40 41 define void @t2a(<1 x i64>* nocapture %p, <4 x float> %a) nounwind { 42 entry: 43 ; CHECK-LABEL: t2a: 44 ; CHECK: movlps %xmm0, (%rdi) 45 ; CHECK: ret 46 %0 = bitcast <1 x i64>* %p to double* 47 %1 = bitcast <4 x float> %a to <2 x double> 48 %2 = extractelement <2 x double> %1, i32 0 49 store double %2, double* %0 50 ret void 51 } 52 53 ; rdar://10436044 54 define <2 x double> @t3() nounwind readonly { 55 bb: 56 ; CHECK-LABEL: t3: 57 ; CHECK: punpcklqdq %xmm1, %xmm0 58 ; CHECK: movq (%rax), %xmm1 59 ; CHECK: movsd %xmm1, %xmm0 60 %tmp0 = load i128* null, align 1 61 %tmp1 = load <2 x i32>* undef, align 8 62 %tmp2 = bitcast i128 %tmp0 to <16 x i8> 63 %tmp3 = bitcast <2 x i32> %tmp1 to i64 64 %tmp4 = insertelement <2 x i64> undef, i64 %tmp3, i32 0 65 %tmp5 = bitcast <16 x i8> %tmp2 to <2 x double> 66 %tmp6 = bitcast <2 x i64> %tmp4 to <2 x double> 67 %tmp7 = shufflevector <2 x double> %tmp5, <2 x double> %tmp6, <2 x i32> <i32 2, i32 1> 68 ret <2 x double> %tmp7 69 } 70 71 ; rdar://10450317 72 define <2 x i64> @t4() nounwind readonly { 73 bb: 74 ; CHECK-LABEL: t4: 75 ; CHECK: punpcklqdq %xmm0, %xmm1 76 ; CHECK: movq (%rax), %xmm0 77 ; CHECK: movsd %xmm1, %xmm0 78 %tmp0 = load i128* null, align 1 79 %tmp1 = load <2 x i32>* undef, align 8 80 %tmp2 = bitcast i128 %tmp0 to <16 x i8> 81 %tmp3 = bitcast <2 x i32> %tmp1 to i64 82 %tmp4 = insertelement <2 x i64> undef, i64 %tmp3, i32 0 83 %tmp5 = bitcast <16 x i8> %tmp2 to <2 x i64> 84 %tmp6 = shufflevector <2 x i64> %tmp4, <2 x i64> %tmp5, <2 x i32> <i32 2, i32 1> 85 ret <2 x i64> %tmp6 86 } 87