Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
      2 
      3 define <2 x double> @ld(<2 x double> %p) nounwind optsize ssp {
      4 ; CHECK: unpcklpd
      5   %shuffle = shufflevector <2 x double> %p, <2 x double> undef, <2 x i32> zeroinitializer
      6   ret <2 x double> %shuffle
      7 }
      8 
      9 define <2 x double> @hd(<2 x double> %p) nounwind optsize ssp {
     10 ; CHECK: unpckhpd
     11   %shuffle = shufflevector <2 x double> %p, <2 x double> undef, <2 x i32> <i32 1, i32 1>
     12   ret <2 x double> %shuffle
     13 }
     14 
     15 define <2 x i64> @ldi(<2 x i64> %p) nounwind optsize ssp {
     16 ; CHECK: punpcklqdq
     17   %shuffle = shufflevector <2 x i64> %p, <2 x i64> undef, <2 x i32> zeroinitializer
     18   ret <2 x i64> %shuffle
     19 }
     20 
     21 define <2 x i64> @hdi(<2 x i64> %p) nounwind optsize ssp {
     22 ; CHECK: punpckhqdq
     23   %shuffle = shufflevector <2 x i64> %p, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
     24   ret <2 x i64> %shuffle
     25 }
     26 
     27 ; rdar://10050549
     28 %struct.Float2 = type { float, float }
     29 
     30 define <4 x float> @loadhpi(%struct.Float2* %vPtr, <4 x float> %vecin1) nounwind readonly ssp {
     31 entry:
     32 ; CHECK: loadhpi
     33 ; CHECK-NOT: movq
     34 ; CHECK: movhps (
     35   %tmp1 = bitcast %struct.Float2* %vPtr to <1 x i64>*
     36   %addptr7 = getelementptr inbounds <1 x i64>* %tmp1, i64 0
     37   %tmp2 = bitcast <1 x i64>* %addptr7 to float*
     38   %tmp3 = load float* %tmp2, align 4
     39   %vec = insertelement <4 x float> undef, float %tmp3, i32 0
     40   %addptr.i12 = getelementptr inbounds float* %tmp2, i64 1
     41   %tmp4 = load float* %addptr.i12, align 4
     42   %vecin2 = insertelement <4 x float> %vec, float %tmp4, i32 1
     43   %shuffle = shufflevector <4 x float> %vecin1, <4 x float> %vecin2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
     44   ret <4 x float> %shuffle
     45 }
     46 
     47 ; rdar://10119696
     48 ; CHECK: f
     49 define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind readonly ssp {
     50 entry:
     51   ; CHECK: movlps  (%{{rdi|rdx}}), %xmm0
     52   %u110.i = load double* %y, align 1
     53   %tmp8.i = insertelement <2 x double> undef, double %u110.i, i32 0
     54   %tmp9.i = bitcast <2 x double> %tmp8.i to <4 x float>
     55   %shuffle.i = shufflevector <4 x float> %x, <4 x float> %tmp9.i, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
     56   ret <4 x float> %shuffle.i
     57 }
     58 
     59 define <4 x float> @loadhpi2(%struct.Float2* nocapture %vHiCoefPtr_0, %struct.Float2* nocapture %vLoCoefPtr_0, i32 %s) nounwind readonly ssp {
     60 entry:
     61 ; CHECK: loadhpi2
     62 ; CHECK: movhps (
     63 ; CHECK-NOT: movlhps
     64   %0 = bitcast %struct.Float2* %vHiCoefPtr_0 to <1 x i64>*
     65   %idx.ext = sext i32 %s to i64
     66   %add.ptr = getelementptr inbounds <1 x i64>* %0, i64 %idx.ext
     67   %add.ptr.val = load <1 x i64>* %add.ptr, align 1
     68   %1 = bitcast <1 x i64> %add.ptr.val to <2 x float>
     69   %shuffle.i = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
     70   %2 = bitcast %struct.Float2* %vLoCoefPtr_0 to <1 x i64>*
     71   %add.ptr2 = getelementptr inbounds <1 x i64>* %2, i64 %idx.ext
     72   %add.ptr2.val = load <1 x i64>* %add.ptr2, align 1
     73   %3 = bitcast <1 x i64> %add.ptr2.val to <2 x float>
     74   %shuffle.i4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
     75   %shuffle1.i5 = shufflevector <4 x float> %shuffle.i, <4 x float> %shuffle.i4, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
     76   ret <4 x float> %shuffle1.i5
     77 }
     78