Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn | FileCheck %s
      2 ; rdar://10050222, rdar://10134392
      3 
      4 define <4 x float> @t1(<4 x float> %a, <1 x i64>* nocapture %p) nounwind {
      5 entry:
      6 ; CHECK: t1:
      7 ; CHECK: movlps (%rdi), %xmm0
      8 ; CHECK: ret
      9   %p.val = load <1 x i64>* %p, align 1
     10   %0 = bitcast <1 x i64> %p.val to <2 x float>
     11   %shuffle.i = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
     12   %shuffle1.i = shufflevector <4 x float> %a, <4 x float> %shuffle.i, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
     13   ret <4 x float> %shuffle1.i
     14 }
     15 
     16 define <4 x float> @t1a(<4 x float> %a, <1 x i64>* nocapture %p) nounwind {
     17 entry:
     18 ; CHECK: t1a:
     19 ; CHECK: movlps (%rdi), %xmm0
     20 ; CHECK: ret
     21   %0 = bitcast <1 x i64>* %p to double*
     22   %1 = load double* %0
     23   %2 = insertelement <2 x double> undef, double %1, i32 0
     24   %3 = bitcast <2 x double> %2 to <4 x float>
     25   %4 = shufflevector <4 x float> %a, <4 x float> %3, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
     26   ret <4 x float> %4
     27 }
     28 
     29 define void @t2(<1 x i64>* nocapture %p, <4 x float> %a) nounwind {
     30 entry:
     31 ; CHECK: t2:
     32 ; CHECK: movlps %xmm0, (%rdi)
     33 ; CHECK: ret
     34   %cast.i = bitcast <4 x float> %a to <2 x i64>
     35   %extract.i = extractelement <2 x i64> %cast.i, i32 0
     36   %0 = getelementptr inbounds <1 x i64>* %p, i64 0, i64 0
     37   store i64 %extract.i, i64* %0, align 8
     38   ret void
     39 }
     40 
     41 define void @t2a(<1 x i64>* nocapture %p, <4 x float> %a) nounwind {
     42 entry:
     43 ; CHECK: t2a:
     44 ; CHECK: movlps %xmm0, (%rdi)
     45 ; CHECK: ret
     46   %0 = bitcast <1 x i64>* %p to double*
     47   %1 = bitcast <4 x float> %a to <2 x double>
     48   %2 = extractelement <2 x double> %1, i32 0
     49   store double %2, double* %0
     50   ret void
     51 }
     52 
     53 ; rdar://10436044
     54 define <2 x double> @t3() nounwind readonly {
     55 bb:
     56 ; CHECK: t3:
     57 ; CHECK: punpcklqdq %xmm1, %xmm0
     58 ; CHECK: movq (%rax), %xmm1
     59 ; CHECK: movsd %xmm1, %xmm0
     60   %tmp0 = load i128* null, align 1
     61   %tmp1 = load <2 x i32>* undef, align 8
     62   %tmp2 = bitcast i128 %tmp0 to <16 x i8>
     63   %tmp3 = bitcast <2 x i32> %tmp1 to i64
     64   %tmp4 = insertelement <2 x i64> undef, i64 %tmp3, i32 0
     65   %tmp5 = bitcast <16 x i8> %tmp2 to <2 x double>
     66   %tmp6 = bitcast <2 x i64> %tmp4 to <2 x double>
     67   %tmp7 = shufflevector <2 x double> %tmp5, <2 x double> %tmp6, <2 x i32> <i32 2, i32 1>
     68   ret <2 x double> %tmp7
     69 }
     70 
     71 ; rdar://10450317
     72 define <2 x i64> @t4() nounwind readonly {
     73 bb:
     74 ; CHECK: t4:
     75 ; CHECK: punpcklqdq %xmm0, %xmm1
     76 ; CHECK: movq (%rax), %xmm0
     77 ; CHECK: movsd %xmm1, %xmm0
     78   %tmp0 = load i128* null, align 1
     79   %tmp1 = load <2 x i32>* undef, align 8
     80   %tmp2 = bitcast i128 %tmp0 to <16 x i8>
     81   %tmp3 = bitcast <2 x i32> %tmp1 to i64
     82   %tmp4 = insertelement <2 x i64> undef, i64 %tmp3, i32 0
     83   %tmp5 = bitcast <16 x i8> %tmp2 to <2 x i64>
     84   %tmp6 = shufflevector <2 x i64> %tmp4, <2 x i64> %tmp5, <2 x i32> <i32 2, i32 1>
     85   ret <2 x i64> %tmp6
     86 }
     87